obsługa pozostałych elementów wyrażeń regularnych (opcjonalne występienie, dokła…

…dne podanie liczby wystąpień w nawiasach klamrowych) git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@143 ff4e3ee1-f430-4e82-ade0-24591c43f1fd

obsługa pozostałych elementów wyrażeń regularnych (opcjonalne występienie, dokła…
…dne podanie liczby wystąpień w nawiasach klamrowych) git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@143 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Michał Lenart
1 parent 3fa5c338
Showing 5 changed files with 453 additions and 41 deletions
fsabuilder/morfeuszbuilder/fsa/convertinput.py
fsabuilder/morfeuszbuilder/segrules/preprocessor.py
fsabuilder/morfeuszbuilder/segrules/rules.py
fsabuilder/morfeuszbuilder/segrules/rulesParser.py
input/segmenty1.dat
@@ -12,7 +12,7 @@ def _mergeEntries(inputLines, lowercase):
     prevInterps = None
     for key, interp in inputLines:
         key = key.lower() if lowercase else key
-#         print key
+#         print 'key=', key, 'interp=', interp
         assert key
         if prevKey and prevKey == key:
             prevInterps.append(interp)
@@ -95,7 +95,7 @@ class PolimorfConverter4Generator(object):
             line = line.decode(self.inputEncoding).strip('\n')
             orth, base, tag, name = _parseLine(line)
             if base:
-                if u':' in base and len(base) > 1:
+                if u':' in base and len(base) > 1 and base.split(u':', 1)[1].isalpha():
                     base, homonymId = base.split(u':', 1)
                 else:
                     homonymId = ''
@@ -130,8 +130,8 @@ class PolimorfConverter4Generator(object):
                 tagnum = int(tagnum)
                 namenum = int(namenum)
                 typenum = int(typenum)
-                yield (base, Interpretation4Generator(orth, base, tagnum, namenum, typenum, homonymId))
                 prevLine = line
+                yield (base, Interpretation4Generator(orth, base, tagnum, namenum, typenum, homonymId))
     def convert(self, inputLines):
         return _mergeEntries(self._reallyParseLines(self._sortLines(self._partiallyParseLines(inputLines))), lowercase=False)
@@ -9,7 +9,7 @@ from pyparsing import *
 from morfeuszbuilder.utils import exceptions
 from pyparseString import pyparseString
-identifier = Word(alphas, bodyChars=alphanums+u'_>*+')
+identifier = Word(alphas, bodyChars=alphanums+u'_>*+{},')
 define = Keyword('#define').suppress() + identifier + Optional(Suppress('(') + identifier + Suppress(')')) + restOfLine + LineEnd() + StringEnd()
 ifdef = Keyword('#ifdef').suppress() + identifier + LineEnd() + StringEnd()
 endif = Keyword('#endif').suppress() + LineEnd() + StringEnd()
@@ -24,6 +24,9 @@ class SegmentRule(object):
     def addToNFA(self, fsa):
         raise NotImplementedError()
+    def allowsEmptySequence(self):
+        raise NotImplementedError()
+    
     def _doAddToNFA(self, startStates, endState):
         raise NotImplementedError()
@@ -42,6 +45,9 @@ class TagRule(SegmentRule):
     def _doAddToNFA(self, startState, endState):
         startState.addTransition((self.segnum, self.shiftOrth), endState)
+    def allowsEmptySequence(self):
+        return False
+    
     def __str__(self):
         return u'%s(%d)' % (self.segtype, self.segnum)
@@ -77,6 +83,9 @@ class ConcatRule(ComplexRule):
         lastChild = self.children[-1]
         lastChild._doAddToNFA(currStartState, endState)
+    def allowsEmptySequence(self):
+        return all(map(lambda rule: rule.allowsEmptySequence(), self.children))
+    
     def __str__(self):
         return u' '.join(map(lambda c: str(c), self.children))
@@ -93,6 +102,9 @@ class OrRule(ComplexRule):
             child._doAddToNFA(intermStartState, intermEndState)
             intermEndState.addTransition(None, endState)
+    def allowsEmptySequence(self):
+        return any(map(lambda rule: rule.allowsEmptySequence(), self.children))
+    
     def __str__(self):
         return u'|'.join(map(lambda c: str(c), self.children))
@@ -115,5 +127,33 @@ class ZeroOrMoreRule(UnaryRule):
         intermEndState.addTransition(None, endState)
         endState.addTransition(None, intermStartState)
+    def allowsEmptySequence(self):
+        return True
+    
     def __str__(self):
         return u'(' + str(self.child) + ')*'
+
+class OptionalRule(UnaryRule):
+    
+    def __init__(self, child, linenum):
+        super(OptionalRule, self).__init__(child, linenum)
+        assert isinstance(child, SegmentRule)
+    
+    def addToNFA(self, fsa):
+        raise ValueError()
+    
+    def _doAddToNFA(self, startState, endState):
+        intermStartState = RulesNFAState(self)
+        intermEndState = RulesNFAState(self)
+        
+        startState.addTransition(None, intermStartState)
+        startState.addTransition(None, endState)
+        self.child._doAddToNFA(intermStartState, intermEndState)
+        intermEndState.addTransition(None, endState)
+    
+    def allowsEmptySequence(self):
+        return True
+    
+    def __str__(self):
+        return u'(' + str(self.child) + ')?'
+    
@@ -56,7 +56,11 @@ class RulesParser(object):
             combinationEnumeratedLines = segtypesConfigFile.enumerateLinesInSection(section, ignoreComments=False)
             combinationEnumeratedLines = list(preprocessor.preprocess(combinationEnumeratedLines, defs, filename))
             for rule in self._doParse(combinationEnumeratedLines, segtypesHelper, filename):
-#                 print rule
+                if rule.allowsEmptySequence():
+                    raise exceptions.ConfigFileException(
+                                                     filename, 
+                                                     rule.linenum, 
+                                                     'This rule allows empty segments sequence to be accepted')
                 rule.addToNFA(nfa)
 #                 nfa.debug()
             try:
@@ -86,6 +90,35 @@ class RulesParser(object):
 #             return rules.TagRule(segtype)
             return rules.TagRule(segtypesHelper.getSegnum4Segtype(segtype), shiftOrth, segtype, lineNum)
+    def _createQuantRule1(self, child, quantity, lineNum, line, segtypesHelper):
+        if quantity <= 0:
+            raise exceptions.ConfigFileException(segtypesHelper.filename, lineNum, u'%s - invalid quantity: %d' % (line, quantity))
+        else:
+            return rules.ConcatRule(quantity * [child], lineNum)
+    
+    def _createQuantRule2(self, child, leftN, rightN, lineNum, line, segtypesHelper):
+        if leftN > rightN or (leftN, rightN) == (0, 0):
+            raise exceptions.ConfigFileException(segtypesHelper.filename, lineNum, u'%s - invalid quantities: %d %d' % (line, leftN, rightN))
+        elif leftN == 0:
+            children = [rules.OptionalRule(child, lineNum)]
+            for n in range(2, rightN + 1):
+                children.append(self._createQuantRule1(child, n, lineNum, line, segtypesHelper))
+            return rules.OrRule(children, lineNum)
+        else:
+            children = [self._createQuantRule1(child, n, lineNum, line, segtypesHelper) for n in range(leftN, rightN + 1)]
+            return rules.OrRule(children, lineNum)
+    
+    def _createQuantRule3(self, child, quantity, lineNum, line, segtypesHelper):
+        if quantity <= 0:
+            raise exceptions.ConfigFileException(segtypesHelper.filename, lineNum, u'%s - invalid quantity: %d' % (line, quantity))
+        else:
+            return rules.ConcatRule(
+                                    [
+                                        rules.ConcatRule(quantity * [child], lineNum),
+                                        rules.ZeroOrMoreRule(child, lineNum)
+                                    ],
+                                    lineNum)
+    
     def _doParseOneLine(self, lineNum, line, segtypesHelper, filename):
         rule = Forward()
         tagRule = Word(alphanums+'_')
@@ -94,7 +127,11 @@ class RulesParser(object):
         atomicRule = tagRule ^ shiftOrthRule ^ parenRule
         zeroOrMoreRule = atomicRule + Suppress('*')
         oneOrMoreRule = atomicRule + Suppress('+')
-        unaryRule = atomicRule ^ zeroOrMoreRule ^ oneOrMoreRule
+        optionalRule = atomicRule + Suppress('?')
+        quantRule1 = atomicRule + Suppress('{') + Word(nums) + Suppress('}')
+        quantRule2 = atomicRule + Suppress('{') + Word(nums) + Suppress(',') + Word(nums) + Suppress('}')
+        quantRule3 = atomicRule + Suppress('{') + Word(nums) + Suppress(',') + Suppress('}')
+        unaryRule = atomicRule ^ zeroOrMoreRule ^ oneOrMoreRule ^ optionalRule ^ quantRule1 ^ quantRule2 ^ quantRule3
         oneOfRule = delimitedList(unaryRule, delim='|')
         complexRule = unaryRule ^ oneOfRule
         if self.rulesType == RulesParser.PARSE4ANALYZER:
@@ -107,6 +144,10 @@ class RulesParser(object):
         shiftOrthRule.setParseAction(lambda string, loc, toks: self._createNewTagRule(toks[0], True, lineNum, line, segtypesHelper))
 #         parenRule.setParseAction(lambda string, loc, toks: toks[0])
         zeroOrMoreRule.setParseAction(lambda string, loc, toks: rules.ZeroOrMoreRule(toks[0], lineNum))
+        quantRule1.setParseAction(lambda string, loc, toks: self._createQuantRule1(toks[0], int(toks[1], 10), lineNum, line, segtypesHelper))
+        quantRule2.setParseAction(lambda string, loc, toks: self._createQuantRule2(toks[0], int(toks[1], 10), int(toks[2], 10), lineNum, line, segtypesHelper))
+        quantRule3.setParseAction(lambda string, loc, toks: self._createQuantRule3(toks[0], int(toks[1], 10), lineNum, line, segtypesHelper))
+        optionalRule.setParseAction(lambda string, loc, toks: rules.OptionalRule(toks[0], lineNum))
         oneOrMoreRule.setParseAction(lambda string, loc, toks: rules.ConcatRule([toks[0], rules.ZeroOrMoreRule(toks[0], lineNum)], lineNum))
         oneOfRule.setParseAction(lambda string, loc, toks: rules.OrRule(toks, lineNum))
         concatRule.setParseAction(lambda string, loc, toks: toks[0] if len(toks) == 1 else rules.ConcatRule(toks, lineNum))
@@ -7,9 +7,245 @@ praet=split composite
 #define moze_interp(segmenty) wsz_interp segmenty wsz_interp
-dig>* dig
-(adja dywiz)+ adj
-naj> adj_sup
+# Segmenty występujące samodzielnie:
+#
+# domyślny typ segmentu samodzielnego:
+moze_interp(samodz)
+
+# segment samotny, który nie dopuszcza nawet znaku interpunkcyjnego po
+# sobie
+samotny
+
+# przeszlik pojedynczy w formie nieaglutynacyjnej, np. „gniótł”:
+moze_interp(praet_sg_na)
+
+# przeszlik pojedynczy w formie niezróżnicowanej aglutynacyjnie, np. „moze”:
+moze_interp(praet_sg)
+
+# przeszlik mnogi, np. „czytali”:
+moze_interp(praet_pl)
+
+# partykuła „by”:
+moze_interp(by)
+
+# inne segmenty, które dopuszczają po sobie aglutynant,
+# np. „powininna”, „czyżby”:
+moze_interp(z_aglt)
+moze_interp(z_aglt_by)
+
+# forma przymiotnikowa (dopuszcza adja):
+moze_interp(adj)
+
+# dywiz (jako samodzielny segment jest tylko błędnym użyciem w funkcji
+# myślnika, ale trzeba to dopuścić):
+dywiz
+
+# pauza i półpauza w funkcji myślnika
+pauza
+polpauza
+
+#ifdef isolated
+adja
+#endif
+
+
+# Połączenia z aglutynantami:
+#
+#ifdef split
+# Czas przeszły:
+# np. „gniotł·am”
+moze_interp( praet_sg_agl aglsg )
+# np. „czytał·em”
+moze_interp(praet_sg aglsg)
+# np. „czytali·ście”
+moze_interp(praet_pl aglpl)
+
+# Tryb warunkowy:
+# np. „gniótł·by”
+moze_interp(praet_sg_na by)
+# np. „czytało·by”
+moze_interp(praet_sg by)
+# np. „gnietli·by”
+moze_interp(praet_pl by)
+# np. „gniótł·by·ś”
+moze_interp(praet_sg_na by aglsg)
+# np. „czytał·by·m”
+moze_interp(praet_sg by aglsg)
+# np. „gnietli·by·śmy”
+moze_interp(praet_pl by aglpl)
+#else
+# moze_interp(praetcond)
+#endif
+# np. „by·ś”
+moze_interp(by aglsg)
+# np. „by·ście”
+moze_interp(by aglpl)
+
+# np. „gdyby·m”
+moze_interp(z_aglt aglsg)
+moze_interp(z_aglt_by aglsg)
+# np. „gdyby·ście”
+moze_interp(z_aglt aglpl)
+moze_interp(z_aglt_by aglpl)
+# oraz wersje z by, np. chybabym
+moze_interp(z_aglt by aglsg)
+moze_interp(z_aglt by aglpl)
+
+# To jest dużo za dużo, ale tytułem eksperymentu:
+#ifdef permissive
+moze_interp(samodz aglsg)
+moze_interp(samodz aglpl)
+#endif
+
+# Złożone formy przymiotnikowe
+# np. „biało·-·czerwony”
+moze_interp( (adja dywiz)+ adj )
+# poniższe załatwione przez + powyżej:
+# # np. „niebiesko·-·biało·-·czerwona”
+# adja dywiz adja dywiz adj interp?
+# # itd. (zatrzymujemy się pragmatycznie na 5 członach)
+# adja dywiz adja dywiz adja dywiz adj interp?
+# adja dywiz adja dywiz adja dywiz adja dywiz adj interp?
+
+# Formy zanegowane stopnia wyższego przymiotników i przysłówków (WK)
+# np. „nie·grzeczniejszy”, „nie·grzeczniej”
+moze_interp( adj_com )
+moze_interp( nie> adj_com )
+
+# Formy „zanegowane” gerundiów i imiesłowów:
+# np. „nie·czytanie”, „nie·przeczytany”, „nie·czytający”:
+moze_interp( nie> negat )
+
+# Przyimki akceptujące krótką formę „-ń”
+moze_interp(z_on_agl)
+# np. „do·ń”
+moze_interp(z_on_agl on_agl)
+
+# Liczba zapisana jako ciąg cyfr:
+#moze_interp( dig>* dig )
+dig{8}
+dig dig{0,2}
+dig{5,6}
+dig{10,}
+
+# Liczba rzymska zapisana jako ciąg cyfr rzymskich:
+# (kiepskie, trzeba poprawić wyrażeniem regularnym)
+moze_interp( roman>* roman )
+
+# Formacje prefiksalne
+#### trzeba wydzielić odpowiednie samodze!
+# rzeczownikowe
+# np. „euro·sodoma”, „e-·papieros”
+moze_interp(nomina)
+moze_interp( prefs> nomina ) !weak
+# czasownikowe np. „po·nakapywać”
+moze_interp(inf_imperf|praet_imperf|imps_imperf|fin_imperf|impt_sg_imperf|impt_pl_imperf|impt_sg_perf|impt_pl_perf)
+moze_interp( prefv> (inf_imperf|praet_imperf|imps_imperf|fin_imperf|impt_sg_imperf|impt_pl_imperf) ) !weak
+# przymiotnikowe np. „do·żylny”, „euro·sodomski”, „bez·argumentowy”
+moze_interp(ppas|adv_pos|pact)
+# moze_interp(prefa> adj)
+moze_interp( prefa> ( adj|adv_pos|pact|ppas ) ) !weak 
+moze_interp( prefppas> ppas ) !weak
+
+# Apozycje z dywizem
+# np. „kobieta-prezydent”
+moze_interp( nomina (dywiz) nomina )
+
+# Zakresy liczbowe, daty, np. 1911-1939.
+moze_interp((dig>* dig) dywiz (dig>* dig))
+
+# poniższe do sprawdzenia, najwyraźniej obecne w tekstach, skoro wprowadziliśmy:
+# ?
+adj dywiz adj
+# ?
+adj dywiz samodz
+# ?
+samodz dywiz adj
+
+#### PONIŻEJ REGUŁY WK
+# Stopień najwyższy:
+# np. „naj·zieleńszy”, „naj·mądrzej”
+moze_interp( naj> adj_sup )
+# Cząstka li przy osobowych formach czasownika oddzielona dywizem: znasz-li ten kraj
+moze_interp( praet_sg dywiz li)
+moze_interp( praet_sg aglsg dywiz li)
+moze_interp( praet_pl dywiz li)
+moze_interp( praet_pl aglpl dywiz li)
+moze_interp( praet_sg_na dywiz li)
+moze_interp( fin_perf)
+moze_interp( fin_imperf)
+moze_interp( (fin_perf|fin_imperf) dywiz li)
+
+# i bez dywizu --- czy bez dywizu jest sens łapać?
+#moze_interp( praet_sg li)
+#moze_interp( praet_pl li)
+#moze_interp( praet_sg_na li)
+#moze_interp( (fin_perf|fin_imperf) li)
+
+# reguła z partykułą ‹+że› przy trybie rozkazującym
+# zakończonym na spółgłoskę
+moze_interp(impt_sg_imperf ze)
+moze_interp(impt_sg_perf ze)
+# dodatkowo reguła dla part+że (niemalże, omalże, nieomalże):
+# moze_interp(part_z_ze ze) --- już niepotrzebne, part. wpisane do słownika
+# potrzebna jeszcze reguła dla ‹onże›
+# i dla ‹+ż› przy zakończonym na samogłoskę
+moze_interp(impt_pl_imperf z)
+moze_interp(impt_pl_perf z)
+
+# aglutynant przy przymiotniku --- obsługiwane wyżej w wersji permissive
+# moze_interp( adj aglsg )
+moze_interp( adj aglpl )
+
+# forma złoż. liczebnika + przymiotnik, np. wieloaspektowy, pięciomasztowy
+# dwudziestopięcioipółletni
+moze_interp( i )
+moze_interp( pol_zloz > adj ) !weak
+moze_interp( num_zloz>+ adj ) !weak
+moze_interp( num_zloz>+ i> pol_zloz> adj ) !weak
+# moze_interp(num_zloz+ (i pol_zloz)? adj)
+
+# półdolarówka, półsiostra
+moze_interp( ( pol_zloz>|num_zloz> ) nomina ) !weak
+# nie analizuje: dwudziestopięcioipółlatek --- powinien?
+# moze_interp( num_zloz nomina )
+# czy dodać sufiksy: +latek m1, +latek m2, +latka f ? Chyba nie trzeba
+
+# wykluczenie ze złożeń przymiotników: ten, ów, ki, si.
+moze_interp(adj_anty_zloz)
+
+# złożenia adja+adj/adv bez dywizu, np. średniopienny, sierściowłosy
+moze_interp(adja>+ adj) !weak
+
+# złożenia konkretnych przyimków z formą -ń rzeczownika on:
+moze_interp(prep_n)
+moze_interp(prep_n n)
+
+# REGUŁY EKSPERYMENTALNE
+# formy złoż. rzeczowników i sufiksy
+moze_interp( substa> sufs ) !weak
+
+# złożenia imiesłowów przymiotnikowych czynnych;
+# z braku form złoż. korzysta się z imiesłowów przysłówkowych + o
+moze_interp(pcon)
+# moze_interp( (pcon> morphconj dywiz)+ pact )
+
+# liczba arabska formą przymiotnikową, przysłówkową lub rzeczownikową: 22-letni, 20-latek, 
+# 1-majowy, 3-krotnie
+moze_interp( dig>+ dywiz> (adj|adv_pos|nomina) )
+
+# liczba rzymska z formą przymiotnikową: XIX-wieczny
+moze_interp( roman>+ dywiz> adj )
+
+# formy złoż. z sufiksem +latek:
+moze_interp( num_zloz>+ latek ) !weak
+moze_interp(num_zloz>+ i> pol_zloz> latek ) !weak
+
+# złożenia liczbowe z sufiksem +latek, np. 20-latek:
+moze_interp( dig>+ dywiz> latek )
+
+# interpretacja znaków interpunkcyjnych
+# moze_interp(samodz interp)
 [generator combinations]
@@ -19,12 +255,35 @@ nie
 prefs
 prefv
 prefa
+prefppas
+ppas
+pcon
+morphconj
+li
+substa
+sufs
+latek
+dywiz
+pauza
+polpauza
+kropka
+adj_anty_zloz
+adj_com
+fin_perf
+fin_imperf
+nomina
+adv_pos
+pact
+inf_imperf
+praet_imperf
+imps_imperf
+ze
+z
 dig
+roman
 adja
 adj
 adj_sup
-adj_com
-fin
 negat
 on_agl
 z_on_agl
@@ -32,34 +291,45 @@ samotny
 interp
 aglsg
 aglpl
+z_aglt
+z_aglt_by
+by
 praetcond
 praet_sg_agl
 praet_sg_na
 praet_sg
 praet_pl
-z_aglt
-by
-li
-nomina
-adjectiva
-verba_imperf
-dywiz
-kropka
+impt_sg_perf
+impt_sg_imperf
+impt_pl_perf
+impt_pl_imperf
+pol_zloz
+num_zloz
+i
+n
+prep_n
+emoticon
+killfile
 samodz
+
 [tags]
 naj	naj
 nie	nie
 prefs	prefs
 prefv	prefv
 prefa	prefa
+prefppas	prefppas
 dig	dig
+roman	romandig
 adja	adja
+substa	substa
 adj	adj:%:pos
 adj_sup	adj:%:sup
 adj_sup	adv:sup
 adj_com	adj:%:com
 adj_com	adj:%:com
+pcon	pcon:imperf
 negat	ger:%:neg
 negat	pact:%:neg
 negat	ppas:%:neg
@@ -67,49 +337,110 @@ on_agl	ppron3:sg:gen.acc:m1.m2.m3:ter:nakc:praep
 z_on_agl	prep:%
 samotny	brev:pun
 samotny	brev:npun
-samotny	interj
+# samotny	interj
+samotny	emoticon
 interp	interp
 aglsg	aglt:sg:%
 aglpl	aglt:pl:%
-samodz		%
-praet_fin	praet:%
-praet_fin	fin:%
-li		li:qub:%
+praet_sg_agl	praet:sg:%:agl
+praet_sg_na	praet:sg:%:nagl
+praet_sg	praet:sg:%
+praet_pl	praet:pl:%
+praet_sg	winien:sg:%
+praet_pl	winien:pl:%
+fin_perf	fin:%:perf
+fin_imperf	fin:%:imperf
 nomina		subst:%
 nomina		ger:%
 nomina		depr:%
-adjectiva	adj:%
-adjectiva	adv:%
-adjectiva	ppas:%
-adjectiva	pact:%
-verba_imperf	praet:%:imperf
-verba_imperf	fin:%:imperf
-verba_imperf	inf:imperf
-verba_imperf	imps:imperf
-verba_imperf	impt:imperf
-
+adv_pos		adv:pos
+ppas		ppas:%
+pact		pact:%
+praet_imperf	praet:%:imperf
+inf_imperf	inf:imperf
+imps_imperf	imps:imperf
+impt_sg_imperf	impt:sg:%:imperf
+impt_sg_perf	impt:sg:%:perf
+impt_pl_imperf	impt:pl:%:imperf
+impt_pl_perf	impt:pl:%:perf
+num_zloz	num:comp
+prep_n		prep:%:wok
+samodz		%
 [lexemes]
+pol_zloz	pół:num:comp
+i	i:conj
+by	by:qub
+li	li:qub
+ze	+że:qub
+z	+ż:qub
+killfile	+ć:qub
 z_aglt	aby:comp
 z_aglt	bowiem:comp
-by	by:qub
 z_aglt	by:comp
 z_aglt	cóż:subst
 z_aglt	czemu:adv
-z_aglt	czyżby:qub
-z_aglt	choćby:comp
-z_aglt	chociażby:comp
+z_aglt_by	czyżby:qub
+z_aglt_by	choćby:comp
+z_aglt_by	chociażby:comp
 z_aglt	dlaczego:adv
 z_aglt	dopóki:comp
 z_aglt	dopóty:conj
-z_aglt	gdyby:comp
+z_aglt_by	gdyby:comp
 z_aglt	gdzie:qub
 z_aglt	gdzie:adv
-z_aglt	jakby:comp
-z_aglt	jakoby:comp
+z_aglt_by	jakby:comp
+z_aglt_by	jakoby:comp
 z_aglt	kiedy:adv
 z_aglt	kiedy:comp
 z_aglt	tylko:qub
 z_aglt	żeby:comp
+z_aglt	to:conj
+z_aglt	chyba:qub
+z_aglt	że:qub
+z_aglt	czy:conj
+z_aglt_by	oby:qub
+z_aglt_by	bodajby:qub
+z_aglt	co:comp
 dywiz	-:interp
+pauza	—:interp
+polpauza	–:interp
 kropka	.:interp
+n	on:ppron3:sg:gen.acc:m1.m2.m3:ter:nakc:praep
+adj_anty_zloz	ten:adj:%
+adj_anty_zloz	tenże:adj:%
+adj_anty_zloz	ck:adj:%
+adj_anty_zloz	c.k.:adj:%
+adj_anty_zloz	ki:adj:%
+adj_anty_zloz	si:adj:%
+adj_anty_zloz	ow:adj:%
+adj_anty_zloz	ów:adj:%
+adj_anty_zloz	ówże:adj:%
+adj_anty_zloz	mój:adj:%
+adj_anty_zloz	a-z:adj:%
+adj_anty_zloz	a-ż:adj:%
+adj_anty_zloz	kiż:adj:%
+adj_anty_zloz	be:adj:%
+adj_anty_zloz	caca:adj:%
+adj_anty_zloz	czyj:adj:%
+adj_anty_zloz	oboj:adj:%
+adj_anty_zloz	on:adj:%
+adj_anty_zloz	tyli:adj:%
+prep_n		do:prep:%
+prep_n		dla:prep:%
+prep_n		koło:prep:%
+prep_n		na:prep:%
+prep_n		o:prep:%
+prep_n		po:prep:%
+prep_n		poza:prep:%
+prep_n		spoza:prep:%
+prep_n		za:prep:%
+prep_n		zza:prep:%
+morphconj	+o+:morphconj
+# adj_anty_zloz	pop:adj:%
+sufs	+znawca:subst:%
+sufs	+dawca:subst:%
+sufs	+biorca:subst:%
+sufs	+żerca:subst:%
+sufs	+maniak:subst:%
+latek	latek:subst:%