Commit 4ea040d0c359bd5f64b432695ae6799011e0fb72

Authored by Michał Lenart
1 parent 8d5a878e

- zrobiona konwersja NFA -> DFA dla automatów do zlepiania segmentów

- usunięcie "ignoreOrth"

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@87 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
fsabuilder/PoliMorfTest.cek 0 → 100644
  1 +bij ABć+impt:sg:sec:imperf+pospolita
  2 +bija AAć+fin:sg:ter:imperf+pospolita
  3 +bijacie ADć+fin:pl:sec:imperf+pospolita
  4 +bijaj ABć+impt:sg:sec:imperf+pospolita
  5 +bijajcie AEć+impt:pl:sec:imperf+pospolita
  6 +bijajmy ADć+impt:pl:pri:imperf+pospolita
  7 +bijają ACć+fin:pl:ter:imperf+pospolita
  8 +bijając ADć+pcon:imperf+pospolita
  9 +bijająca AEć+pact:sg:nom.voc:f:imperf:aff+pospolita
  10 +bijające AEć+pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff+pospolita|AEć+pact:sg:nom.acc.voc:n1.n2:imperf:aff+pospolita
  11 +bijającego AGć+pact:sg:acc:m1.m2:imperf:aff+pospolita|AGć+pact:sg:gen:m1.m2.m3.n1.n2:imperf:aff+pospolita
  12 +bijającej AFć+pact:sg:gen.dat.loc:f:imperf:aff+pospolita
  13 +bijającemu AGć+pact:sg:dat:m1.m2.m3.n1.n2:imperf:aff+pospolita
  14 +bijający AEć+pact:pl:nom.voc:m1.p1:imperf:aff+pospolita|AEć+pact:sg:acc:m3:imperf:aff+pospolita|AEć+pact:sg:nom.voc:m1.m2.m3:imperf:aff+pospolita
  15 +bijających AGć+pact:pl:acc:m1.p1:imperf:aff+pospolita|AGć+pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff+pospolita
  16 +bijającym AFć+pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff+pospolita|AFć+pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff+pospolita
  17 +bijającymi AGć+pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff+pospolita
  18 +bijającą AEć+pact:sg:acc.inst:f:imperf:aff+pospolita
  19 +bijak AA+subst:sg:acc:m3+pospolita|AA+subst:sg:nom:m3+pospolita
  20 +bijaka AB+subst:sg:gen:m3+pospolita
  21 +bijakach AD+subst:pl:loc:m3+pospolita
  22 +bijakami AD+subst:pl:inst:m3+pospolita
  23 +bijaki AB+subst:pl:acc:m3+pospolita|AB+subst:pl:nom:m3+pospolita|AB+subst:pl:voc:m3+pospolita
  24 +bijakiem AD+subst:sg:inst:m3+pospolita
  25 +bijakom AC+subst:pl:dat:m3+pospolita
  26 +bijakowa ABy+adj:sg:nom.voc:f:pos+pospolita
  27 +bijakowe ABy+adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos+pospolita|ABy+adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos+pospolita|ABy+adj:sg:acc:n1.n2:pos+pospolita|ABy+adj:sg:nom.voc:n1.n2:pos+pospolita
  28 +bijakowego ADy+adj:sg:acc:m1.m2:pos+pospolita|ADy+adj:sg:gen:m1.m2.m3.n1.n2:pos+pospolita
  29 +bijakowej ACy+adj:sg:dat:f:pos+pospolita|ACy+adj:sg:gen:f:pos+pospolita|ACy+adj:sg:loc:f:pos+pospolita
  30 +bijakowemu ADy+adj:sg:dat:m1.m2.m3.n1.n2:pos+pospolita
  31 +bijakowi ABy+adj:pl:nom.voc:m1.p1:pos+pospolita|AD+subst:sg:dat:m3+pospolita
  32 +bijakowo ABy+adja+pospolita
  33 +bijakowości ACć+subst:pl:acc:f+pospolita|ACć+subst:pl:gen:f+pospolita|ACć+subst:pl:nom:f+pospolita|ACć+subst:pl:voc:f+pospolita|ACć+subst:sg:dat:f+pospolita|ACć+subst:sg:gen:f+pospolita|ACć+subst:sg:loc:f+pospolita|ACć+subst:sg:voc:f+pospolita
  34 +bijakowościach AFć+subst:pl:loc:f+pospolita
  35 +bijakowościami AFć+subst:pl:inst:f+pospolita
  36 +bijakowościom AEć+subst:pl:dat:f+pospolita
  37 +bijakowością ADć+subst:sg:inst:f+pospolita
  38 +bijakowość AA+subst:sg:acc:f+pospolita|AA+subst:sg:nom:f+pospolita
  39 +bijakowy AA+adj:sg:acc:m3:pos+pospolita|AA+adj:sg:nom.voc:m1.m2.m3:pos+pospolita
  40 +bijakowych AC+adj:pl:acc:m1.p1:pos+pospolita|AC+adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos+pospolita|AC+adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos+pospolita
  41 +bijakowym AB+adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos+pospolita|AB+adj:sg:inst:m1.m2.m3.n1.n2:pos+pospolita|AB+adj:sg:loc:m1.m2.m3.n1.n2:pos+pospolita
  42 +bijakowymi AC+adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos+pospolita
  43 +bijakową ABy+adj:sg:acc:f:pos+pospolita|ABy+adj:sg:inst:f:pos+pospolita
  44 +bijaku AB+subst:sg:loc:m3+pospolita|AB+subst:sg:voc:m3+pospolita
  45 +bijaków AC+subst:pl:gen:m3+pospolita
  46 +bijali ACć+praet:pl:m1.p1:imperf+pospolita
  47 +bijam ABć+fin:sg:pri:imperf+pospolita
  48 +bijamy ACć+fin:pl:pri:imperf+pospolita
  49 +bijana ACć+ppas:sg:nom.voc:f:imperf:aff+pospolita
  50 +bijance ACka+subst:sg:dat:f+pospolita|ACka+subst:sg:loc:f+pospolita
  51 +bijane ACć+ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff+pospolita|ACć+ppas:sg:nom.acc.voc:n1.n2:imperf:aff+pospolita
  52 +bijanego AEć+ppas:sg:acc:m1.m2:imperf:aff+pospolita|AEć+ppas:sg:gen:m1.m2.m3.n1.n2:imperf:aff+pospolita
  53 +bijanej ADć+ppas:sg:gen.dat.loc:f:imperf:aff+pospolita
  54 +bijanek ACka+subst:pl:gen:f+pospolita
  55 +bijanemu AEć+ppas:sg:dat:m1.m2.m3.n1.n2:imperf:aff+pospolita
  56 +bijani ACć+ppas:pl:nom.voc:m1.p1:imperf:aff+pospolita
  57 +bijania ADć+ger:sg:gen:n2:imperf:aff+pospolita
  58 +bijanie ADć+ger:sg:nom.acc:n2:imperf:aff+pospolita
  59 +bijaniem AEć+ger:sg:inst:n2:imperf:aff+pospolita
  60 +bijaniu ADć+ger:sg:dat.loc:n2:imperf:aff+pospolita
  61 +bijanka AA+subst:sg:nom:f+pospolita
  62 +bijankach AC+subst:pl:loc:f+pospolita
  63 +bijankami AC+subst:pl:inst:f+pospolita
  64 +bijanki ABa+subst:pl:acc:f+pospolita|ABa+subst:pl:nom:f+pospolita|ABa+subst:pl:voc:f+pospolita|ABa+subst:sg:gen:f+pospolita
  65 +bijanko ABa+subst:sg:voc:f+pospolita
  66 +bijankom ACa+subst:pl:dat:f+pospolita
  67 +bijanką ABa+subst:sg:inst:f+pospolita
  68 +bijankę ABa+subst:sg:acc:f+pospolita
  69 +bijano ACć+imps:imperf+pospolita
  70 +bijany ACć+ppas:sg:acc:m3:imperf:aff+pospolita|ACć+ppas:sg:nom.voc:m1.m2.m3:imperf:aff+pospolita
  71 +bijanych AEć+ppas:pl:acc:m1.p1:imperf:aff+pospolita|AEć+ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff+pospolita
  72 +bijanym ADć+ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff+pospolita|ADć+ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff+pospolita
  73 +bijanymi AEć+ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff+pospolita
  74 +bijaną ACć+ppas:sg:acc.inst:f:imperf:aff+pospolita
  75 +bijasz ACć+fin:sg:sec:imperf+pospolita
  76 +bijatyce ACka+subst:sg:dat:f+pospolita|ACka+subst:sg:loc:f+pospolita
  77 +bijatyk AAa+subst:pl:gen:f+pospolita
  78 +bijatyka AA+subst:sg:nom:f+pospolita
  79 +bijatykach AC+subst:pl:loc:f+pospolita
  80 +bijatykami AC+subst:pl:inst:f+pospolita
  81 +bijatyki ABa+subst:pl:acc:f+pospolita|ABa+subst:pl:nom:f+pospolita|ABa+subst:pl:voc:f+pospolita|ABa+subst:sg:gen:f+pospolita
  82 +bijatyko ABa+subst:sg:voc:f+pospolita
  83 +bijatykom ACa+subst:pl:dat:f+pospolita
  84 +bijatyką ABa+subst:sg:inst:f+pospolita
  85 +bijatykę ABa+subst:sg:acc:f+pospolita
  86 +bijać AA+inf:imperf+pospolita
  87 +bijał ABć+praet:sg:m1.m2.m3:imperf+pospolita
  88 +bijała ACć+praet:sg:f:imperf+pospolita
  89 +bijało ACć+praet:sg:n1.n2:imperf+pospolita
  90 +bijały ACć+praet:pl:m2.m3.f.n1.n2.p2.p3:imperf+pospolita
  91 +bijcie AEć+impt:pl:sec:imperf+pospolita
  92 +bije ACć+fin:sg:ter:imperf+pospolita
  93 +bijecie AFć+fin:pl:sec:imperf+pospolita
  94 +bijekcja AA+subst:sg:nom:f+pospolita
  95 +bijekcjach AC+subst:pl:loc:f+pospolita
  96 +bijekcjami AC+subst:pl:inst:f+pospolita
  97 +bijekcje ABa+subst:pl:acc:f+pospolita|ABa+subst:pl:nom:f+pospolita|ABa+subst:pl:voc:f+pospolita
  98 +bijekcji ABa+subst:pl:gen:f+pospolita|ABa+subst:sg:dat:f+pospolita|ABa+subst:sg:gen:f+pospolita|ABa+subst:sg:loc:f+pospolita
  99 +bijekcjo ABa+subst:sg:voc:f+pospolita
  100 +bijekcjom ACa+subst:pl:dat:f+pospolita
  101 +bijekcją ABa+subst:sg:inst:f+pospolita
  102 +bijekcję ABa+subst:sg:acc:f+pospolita
  103 +bijekcyj ACja+subst:pl:gen:f+pospolita
  104 +bijemy AEć+fin:pl:pri:imperf+pospolita
  105 +bijesz AEć+fin:sg:sec:imperf+pospolita
  106 +bijmy ADć+impt:pl:pri:imperf+pospolita
  107 +bijnik AA+subst:sg:acc:m3+pospolita|AA+subst:sg:nom:m3+pospolita
  108 +bijnika AB+subst:sg:gen:m3+pospolita
  109 +bijnikach AD+subst:pl:loc:m3+pospolita
  110 +bijnikami AD+subst:pl:inst:m3+pospolita
  111 +bijniki AB+subst:pl:acc:m3+pospolita|AB+subst:pl:nom:m3+pospolita|AB+subst:pl:voc:m3+pospolita
  112 +bijnikiem AD+subst:sg:inst:m3+pospolita
  113 +bijnikom AC+subst:pl:dat:m3+pospolita
  114 +bijnikowi AD+subst:sg:dat:m3+pospolita
  115 +bijniku AB+subst:sg:loc:m3+pospolita|AB+subst:sg:voc:m3+pospolita
  116 +bijników AC+subst:pl:gen:m3+pospolita
  117 +biją ACć+fin:pl:ter:imperf+pospolita
  118 +bijąc ADć+pcon:imperf+pospolita
  119 +bijąca AEć+pact:sg:nom.voc:f:imperf:aff+pospolita
  120 +bijące AEć+pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff+pospolita|AEć+pact:sg:nom.acc.voc:n1.n2:imperf:aff+pospolita
  121 +bijącego AGć+pact:sg:acc:m1.m2:imperf:aff+pospolita|AGć+pact:sg:gen:m1.m2.m3.n1.n2:imperf:aff+pospolita
  122 +bijącej AFć+pact:sg:gen.dat.loc:f:imperf:aff+pospolita
  123 +bijącemu AGć+pact:sg:dat:m1.m2.m3.n1.n2:imperf:aff+pospolita
  124 +bijący AEć+pact:pl:nom.voc:m1.p1:imperf:aff+pospolita|AEć+pact:sg:acc:m3:imperf:aff+pospolita|AEć+pact:sg:nom.voc:m1.m2.m3:imperf:aff+pospolita
  125 +bijących AGć+pact:pl:acc:m1.p1:imperf:aff+pospolita|AGć+pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff+pospolita
  126 +bijącym AFć+pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff+pospolita|AFć+pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff+pospolita
  127 +bijącymi AGć+pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff+pospolita
  128 +bijącą AEć+pact:sg:acc.inst:f:imperf:aff+pospolita
fsabuilder/buildfsa.py
@@ -10,9 +10,10 @@ import logging @@ -10,9 +10,10 @@ import logging
10 import codecs 10 import codecs
11 from morfeuszbuilder.fsa import encode 11 from morfeuszbuilder.fsa import encode
12 from morfeuszbuilder.fsa import convertinput 12 from morfeuszbuilder.fsa import convertinput
13 -from morfeuszbuilder.fsa import common  
14 from morfeuszbuilder.fsa.fsa import FSA 13 from morfeuszbuilder.fsa.fsa import FSA
15 from morfeuszbuilder.fsa.serializer import VLengthSerializer1, VLengthSerializer2, SimpleSerializer 14 from morfeuszbuilder.fsa.serializer import VLengthSerializer1, VLengthSerializer2, SimpleSerializer
  15 +from morfeuszbuilder.tagset.tagset import Tagset
  16 +from morfeuszbuilder.segrules import rulesParser
16 from optparse import OptionParser 17 from optparse import OptionParser
17 18
18 # class InputFormat(): 19 # class InputFormat():
@@ -50,6 +51,10 @@ def _parseOptions(): @@ -50,6 +51,10 @@ def _parseOptions():
50 dest='tagsetFile', 51 dest='tagsetFile',
51 metavar='FILE', 52 metavar='FILE',
52 help='path to the file with tagset') 53 help='path to the file with tagset')
  54 + parser.add_option('--segments-file',
  55 + dest='segmentsFile',
  56 + metavar='FILE',
  57 + help='path to the file with segment rules')
53 parser.add_option('-o', '--output-file', 58 parser.add_option('-o', '--output-file',
54 dest='outputFile', 59 dest='outputFile',
55 metavar='FILE', 60 metavar='FILE',
@@ -107,6 +112,8 @@ def _parseOptions(): @@ -107,6 +112,8 @@ def _parseOptions():
107 _checkOption(opts.serializationMethod, parser, "Serialization method file is missing") 112 _checkOption(opts.serializationMethod, parser, "Serialization method file is missing")
108 _checkExactlyOneOptionSet([opts.analyzer, opts.generator], 113 _checkExactlyOneOptionSet([opts.analyzer, opts.generator],
109 parser, 'Must set exactly one FSA type: --analyzer or --generator') 114 parser, 'Must set exactly one FSA type: --analyzer or --generator')
  115 + if opts.analyzer:
  116 + _checkOption(opts.segmentsFile, parser, "Segment rules file is missing")
110 117
111 if not opts.serializationMethod.upper() in [SerializationMethod.SIMPLE, SerializationMethod.V1, SerializationMethod.V2]: 118 if not opts.serializationMethod.upper() in [SerializationMethod.SIMPLE, SerializationMethod.V1, SerializationMethod.V2]:
112 print >> sys.stderr, '--serialization-method must be one of ('+str([SerializationMethod.SIMPLE, SerializationMethod.V1, SerializationMethod.V2])+')' 119 print >> sys.stderr, '--serialization-method must be one of ('+str([SerializationMethod.SIMPLE, SerializationMethod.V1, SerializationMethod.V2])+')'
@@ -147,9 +154,8 @@ def _printStats(fsa): @@ -147,9 +154,8 @@ def _printStats(fsa):
147 logging.info('sink states num: '+str(sinkNum)) 154 logging.info('sink states num: '+str(sinkNum))
148 logging.info('array states num: '+str(arrayNum)) 155 logging.info('array states num: '+str(arrayNum))
149 156
150 -def buildAnalyzerFromPoliMorf(inputFile, tagsetFile): 157 +def buildAnalyzerFromPoliMorf(inputFile, tagset):
151 encoder = encode.MorphEncoder() 158 encoder = encode.MorphEncoder()
152 - tagset = common.Tagset(tagsetFile)  
153 fsa = FSA(encoder, tagset) 159 fsa = FSA(encoder, tagset)
154 inputData = _readPolimorfInput4Analyzer(inputFile, tagset, encoder) 160 inputData = _readPolimorfInput4Analyzer(inputFile, tagset, encoder)
155 for word, data in inputData: 161 for word, data in inputData:
@@ -160,7 +166,7 @@ def buildAnalyzerFromPoliMorf(inputFile, tagsetFile): @@ -160,7 +166,7 @@ def buildAnalyzerFromPoliMorf(inputFile, tagsetFile):
160 166
161 def buildGeneratorFromPoliMorf(inputFile, tagsetFile): 167 def buildGeneratorFromPoliMorf(inputFile, tagsetFile):
162 encoder = encode.Encoder4Generator() 168 encoder = encode.Encoder4Generator()
163 - tagset = common.Tagset(tagsetFile) 169 + tagset = Tagset(tagsetFile)
164 fsa = FSA(encoder, tagset) 170 fsa = FSA(encoder, tagset)
165 inputData = _readPolimorfInput4Generator(inputFile, tagset, encoder) 171 inputData = _readPolimorfInput4Generator(inputFile, tagset, encoder)
166 for word, data in inputData: 172 for word, data in inputData:
@@ -175,10 +181,15 @@ def main(opts): @@ -175,10 +181,15 @@ def main(opts):
175 else: 181 else:
176 logging.basicConfig(level=logging.INFO) 182 logging.basicConfig(level=logging.INFO)
177 183
  184 + tagset = Tagset(opts.tagsetFile)
  185 +
178 if opts.analyzer: 186 if opts.analyzer:
179 - fsa = buildAnalyzerFromPoliMorf(opts.inputFile, opts.tagsetFile) 187 + fsa = buildAnalyzerFromPoliMorf(opts.inputFile, tagset)
  188 + segmentRulesManager = rulesParser.RulesParser(tagset).parse(opts.segmentsFile)
  189 + additionalData = segmentRulesManager.serialize()
180 else: 190 else:
181 - fsa = buildGeneratorFromPoliMorf(opts.inputFile, opts.tagsetFile) 191 + fsa = buildGeneratorFromPoliMorf(opts.inputFile, tagset)
  192 + additionalData = bytearray()
182 193
183 if opts.trainFile: 194 if opts.trainFile:
184 logging.info('training with '+opts.trainFile+' ...') 195 logging.info('training with '+opts.trainFile+' ...')
fsabuilder/morfeuszbuilder/fsa/fsa.py
@@ -119,4 +119,3 @@ class FSA(object): @@ -119,4 +119,3 @@ class FSA(object):
119 state.reverseOffset = currReverseOffset 119 state.reverseOffset = currReverseOffset
120 for state in self.initialState.dfs(set()): 120 for state in self.initialState.dfs(set()):
121 state.offset = currReverseOffset - state.reverseOffset 121 state.offset = currReverseOffset - state.reverseOffset
122 -  
123 \ No newline at end of file 122 \ No newline at end of file
fsabuilder/morfeuszbuilder/fsa/fsa.pyc
No preview for this file type
fsabuilder/morfeuszbuilder/fsa/serializer.py
@@ -45,16 +45,15 @@ class Serializer(object): @@ -45,16 +45,15 @@ class Serializer(object):
45 45
46 def serialize2BinaryFile(self, fname): 46 def serialize2BinaryFile(self, fname):
47 with open(fname, 'wb') as f: 47 with open(fname, 'wb') as f:
48 - f.write(self.fsa2bytearray()) 48 + f.write(self.fsa2bytearray(self.serializeTagset(self.fsa.tagset)))
49 49
50 def getStateSize(self, state): 50 def getStateSize(self, state):
51 raise NotImplementedError('Not implemented') 51 raise NotImplementedError('Not implemented')
52 52
53 - def fsa2bytearray(self): 53 + def fsa2bytearray(self, additionalData=bytearray()):
54 res = bytearray() 54 res = bytearray()
55 - res.extend(self.serializePrologue(self.serializeTagset(self.fsa.tagset))) 55 + res.extend(self.serializePrologue(additionalData))
56 self.fsa.calculateOffsets(sizeCounter=lambda state: self.getStateSize(state)) 56 self.fsa.calculateOffsets(sizeCounter=lambda state: self.getStateSize(state))
57 - logging.debug('SERIALIZE')  
58 for state in sorted(self.fsa.dfs(), key=lambda s: s.offset): 57 for state in sorted(self.fsa.dfs(), key=lambda s: s.offset):
59 res.extend(self.state2bytearray(state)) 58 res.extend(self.state2bytearray(state))
60 return res 59 return res
fsabuilder/morfeuszbuilder/fsa/serializer.pyc
No preview for this file type
fsabuilder/morfeuszbuilder/fsa/state.py
@@ -8,6 +8,8 @@ class State(object): @@ -8,6 +8,8 @@ class State(object):
8 ''' 8 '''
9 A state in an automaton 9 A state in an automaton
10 ''' 10 '''
  11 +
  12 + statesCounter = 0
11 13
12 def __init__(self, additionalData=None): 14 def __init__(self, additionalData=None):
13 self.transitionsMap = {} 15 self.transitionsMap = {}
@@ -18,6 +20,9 @@ class State(object): @@ -18,6 +20,9 @@ class State(object):
18 self.label2Freq = {} 20 self.label2Freq = {}
19 self.serializeAsArray = False 21 self.serializeAsArray = False
20 self.additionalData = additionalData 22 self.additionalData = additionalData
  23 +
  24 + self.idx = State.statesCounter
  25 + State.statesCounter += 1
21 26
22 @property 27 @property
23 def transitionsNum(self): 28 def transitionsNum(self):
@@ -51,10 +56,16 @@ class State(object): @@ -51,10 +56,16 @@ class State(object):
51 else: 56 else:
52 return self.encodedData 57 return self.encodedData
53 58
54 - def dfs(self, alreadyVisited=set(), sortKey=lambda (_, state): -state.freq): 59 + def dfs(self, alreadyVisited, sortKey=lambda (_, state): -state.freq):
55 if not self in alreadyVisited: 60 if not self in alreadyVisited:
  61 + alreadyVisited.add(self)
56 for _, state in sorted(self.transitionsMap.iteritems(), key=sortKey): 62 for _, state in sorted(self.transitionsMap.iteritems(), key=sortKey):
57 for state1 in state.dfs(alreadyVisited): 63 for state1 in state.dfs(alreadyVisited):
58 yield state1 64 yield state1
59 - alreadyVisited.add(self)  
60 yield self 65 yield self
  66 +
  67 + def debug(self):
  68 + print '----------------'
  69 + print 'STATE:', self.idx
  70 + for label, s in self.transitionsMap.iteritems():
  71 + print label, '-->', s.idx
fsabuilder/morfeuszbuilder/fsa/state.pyc
No preview for this file type
fsabuilder/morfeuszbuilder/fsa/visualizer.py
@@ -12,7 +12,7 @@ class Visualizer(object): @@ -12,7 +12,7 @@ class Visualizer(object):
12 def __init__(self): 12 def __init__(self):
13 pass 13 pass
14 14
15 - def visualize(self, fsa): 15 + def visualize(self, fsa, charLabels=True):
16 G = nx.DiGraph() 16 G = nx.DiGraph()
17 allStates = list(reversed(list(fsa.initialState.dfs(set())))) 17 allStates = list(reversed(list(fsa.initialState.dfs(set()))))
18 edgeLabelsMap = {} 18 edgeLabelsMap = {}
@@ -21,10 +21,12 @@ class Visualizer(object): @@ -21,10 +21,12 @@ class Visualizer(object):
21 G.add_node(idx, offset=state.offset) 21 G.add_node(idx, offset=state.offset)
22 for c, targetState in state.transitionsMap.iteritems(): 22 for c, targetState in state.transitionsMap.iteritems():
23 G.add_edge(idx, allStates.index(targetState)) 23 G.add_edge(idx, allStates.index(targetState))
24 - label = chr(c) if c <= 127 else '%' 24 + label = (chr(c) if c <= 127 else '%') if charLabels \
  25 + else c
25 edgeLabelsMap[(idx, allStates.index(targetState))] = label 26 edgeLabelsMap[(idx, allStates.index(targetState))] = label
26 nodeLabelsMap[idx] = state.offset if not state.isAccepting() else state.encodedData + '(' + str(state.offset) + ')' 27 nodeLabelsMap[idx] = state.offset if not state.isAccepting() else state.encodedData + '(' + str(state.offset) + ')'
27 pos=nx.shell_layout(G) 28 pos=nx.shell_layout(G)
  29 +# pos=nx.random_layout(G)
28 nx.draw_networkx_nodes(G, 30 nx.draw_networkx_nodes(G,
29 pos, 31 pos,
30 nodelist=list([allStates.index(s) for s in allStates if not s.isAccepting()]), 32 nodelist=list([allStates.index(s) for s in allStates if not s.isAccepting()]),
fsabuilder/morfeuszbuilder/fsa/visualizer.pyc
No preview for this file type
fsabuilder/morfeuszbuilder/segrules/preprocessor.py
@@ -6,8 +6,7 @@ Created on 23 sty 2014 @@ -6,8 +6,7 @@ Created on 23 sty 2014
6 import re 6 import re
7 from pyparsing import * 7 from pyparsing import *
8 8
9 -identifier = Word(alphas, bodyChars=alphanums+'_')  
10 -token = Word(alphas, bodyChars=alphanums+'_+>') 9 +identifier = Word(alphas, bodyChars=alphanums+'_>*+')
11 define = Keyword('#define').suppress() + identifier + Optional(Suppress('(') + identifier + Suppress(')')) + restOfLine + LineEnd() + StringEnd() 10 define = Keyword('#define').suppress() + identifier + Optional(Suppress('(') + identifier + Suppress(')')) + restOfLine + LineEnd() + StringEnd()
12 ifdef = Keyword('#ifdef').suppress() + identifier + LineEnd() + StringEnd() 11 ifdef = Keyword('#ifdef').suppress() + identifier + LineEnd() + StringEnd()
13 endif = Keyword('#endif').suppress() + LineEnd() + StringEnd() 12 endif = Keyword('#endif').suppress() + LineEnd() + StringEnd()
fsabuilder/morfeuszbuilder/segrules/rules.py
@@ -34,6 +34,9 @@ class TagRule(SegmentRule): @@ -34,6 +34,9 @@ class TagRule(SegmentRule):
34 34
35 def _doAddToNFA(self, startState, endState): 35 def _doAddToNFA(self, startState, endState):
36 startState.addTransition(self.segnum, endState) 36 startState.addTransition(self.segnum, endState)
  37 +
  38 + def __str__(self):
  39 + return u''+self.segnum
37 40
38 class UnaryRule(SegmentRule): 41 class UnaryRule(SegmentRule):
39 42
@@ -95,12 +98,3 @@ class ZeroOrMoreRule(UnaryRule): @@ -95,12 +98,3 @@ class ZeroOrMoreRule(UnaryRule):
95 self.child._doAddToNFA(intermStartState, intermEndState) 98 self.child._doAddToNFA(intermStartState, intermEndState)
96 intermEndState.addTransition(None, endState) 99 intermEndState.addTransition(None, endState)
97 endState.addTransition(None, intermStartState) 100 endState.addTransition(None, intermStartState)
98 -  
99 -class IgnoreOrthRule(UnaryRule):  
100 -  
101 - def __init__(self, child):  
102 - super(IgnoreOrthRule, self).__init__(child)  
103 -  
104 - def _doAddToNFA(self, startState, endState):  
105 - startState.addTransition(self.child.segnum, endState, ignoreOrth=True)  
106 -  
fsabuilder/morfeuszbuilder/segrules/rulesManager.py 0 → 100644
  1 +'''
  2 +Created on 20 lut 2014
  3 +
  4 +@author: mlenart
  5 +'''
  6 +
  7 +class RulesManager(object):
  8 +
  9 + def __init__(self):
  10 + self.options2DFA = {}
  11 +
  12 + def _options2Key(self, optionsMap):
  13 + return frozenset(optionsMap.items())
  14 +
  15 + def addDFA4Options(self, optionsMap, dfa):
  16 + self.options2DFA[self._options2Key(optionsMap)] = dfa
  17 +
  18 + def serialize(self):
  19 + pass
0 \ No newline at end of file 20 \ No newline at end of file
fsabuilder/morfeuszbuilder/segrules/rulesNFA.py
@@ -8,33 +8,98 @@ from morfeuszbuilder.fsa import fsa, state, encode @@ -8,33 +8,98 @@ from morfeuszbuilder.fsa import fsa, state, encode
8 8
9 class RulesNFAState(object): 9 class RulesNFAState(object):
10 10
11 - def __init__(self, initial=False, final=False): 11 + statesCounter = 0
  12 +
  13 + def __init__(self, initial=False, final=False, weak=False):
12 self.transitionsMap = {} 14 self.transitionsMap = {}
13 self.initial = initial 15 self.initial = initial
14 self.final = final 16 self.final = final
  17 + self.weak = weak
  18 + self.idx = RulesNFAState.statesCounter
  19 + RulesNFAState.statesCounter += 1
  20 +
  21 + def addTransition(self, label, targetState):
  22 + self.transitionsMap.setdefault(label, set())
  23 + self.transitionsMap[label].add(targetState)
  24 +
  25 + def getClosure(self, visited):
  26 + if self in visited:
  27 + return set()
  28 + else:
  29 + visited.add(self)
  30 + res = set()
  31 + res.add(self)
  32 + for nextState in self.transitionsMap.get(None, []):
  33 + if self.idx in [6,8,4]:
  34 + print nextState.idx
  35 + print self.transitionsMap
  36 + res |= nextState.getClosure(visited)
  37 + return res
15 38
16 - def addTransition(self, label, targetState, ignoreOrth=False):  
17 - assert not ignoreOrth or label is not None  
18 - self.transitionsMap.setdefault((label, ignoreOrth), set())  
19 - self.transitionsMap[(label, ignoreOrth)].add(targetState) 39 + def dfs(self, visitedStates=set()):
  40 + if not self in visitedStates:
  41 + visitedStates.add(self)
  42 + yield self
  43 + for _, nextStates in self.transitionsMap.iteritems():
  44 + for state in nextStates:
  45 + for state1 in state.dfs():
  46 + yield state1
  47 +
  48 + def debug(self):
  49 + print '----------------'
  50 + print 'STATE:', self.idx
  51 + for label, nextStates in self.transitionsMap.iteritems():
  52 + print label, '-->', [s.idx for s in sorted(nextStates, key=lambda s: s.idx)]
20 53
21 class RulesNFA(object): 54 class RulesNFA(object):
22 55
23 - def __init__(self, key2Def={}): 56 + def __init__(self):
24 self.initialState = RulesNFAState(initial=True) 57 self.initialState = RulesNFAState(initial=True)
25 58
26 - def _doConvertState(self, dfaState, nfaStates):  
27 - for label, (nextIgnoreOrth, nextNFAStates) in self._groupOutputByLabels(nfaStates).iteritems():  
28 - nextDFAState = state.State(additionalData=nextIgnoreOrth) 59 + def _groupOutputByLabels(self, nfaStates):
  60 + res = {}
  61 + for nfaState in nfaStates:
  62 + for label, nextStates in nfaState.transitionsMap.iteritems():
  63 + if label is not None:
  64 + res.setdefault(label, set())
  65 + for nextNFAState in nextStates:
  66 + res[label] |= nextNFAState.getClosure(set())
  67 +# print 'closure of', nextNFAState.idx, 'is', [s.idx for s in sorted(nextNFAState.getClosure(), key=lambda s: s.idx)]
  68 + return res
  69 +
  70 + def _doConvertState(self, dfaState, nfaStates, nfaSubset2DFAState):
  71 + assert all(map(lambda state: state.weak, nfaStates)) \
  72 + or not any(map(lambda state: state.weak, nfaStates))
  73 + weak = all(map(lambda state: state.weak, nfaStates))
  74 + final = any(map(lambda state: state.final, nfaStates))
  75 + assert not weak or not final
  76 + if final:
  77 + # dfaState should be final
  78 + # and contain info about weakness
  79 + dfaState.encodedData = bytearray([1 if weak else 0])
  80 + for label, nextNFAStates in self._groupOutputByLabels(nfaStates).iteritems():
  81 +# print '============'
  82 +# print 'states:', [s.idx for s in sorted(nfaStates, key=lambda s: s.idx)]
  83 +# print 'label:', label
  84 +# print 'nextStates:', [s.idx for s in sorted(nextNFAStates, key=lambda s: s.idx)]
  85 + key = frozenset(nextNFAStates)
  86 + if key in nfaSubset2DFAState:
  87 + nextDFAState = nfaSubset2DFAState[key]
  88 + else:
  89 + nextDFAState = state.State()
  90 + nfaSubset2DFAState[key] = nextDFAState
  91 + self._doConvertState(nextDFAState, nextNFAStates, nfaSubset2DFAState)
29 dfaState.setTransition(label, nextDFAState) 92 dfaState.setTransition(label, nextDFAState)
30 - dfaState.encodedData = bytearray()  
31 - self._doConvertState(nextDFAState, nextNFAStates)  
32 93
33 def convertToDFA(self): 94 def convertToDFA(self):
34 - dfa = fsa.FSA(encoder=None, encodeWords=False)  
35 - startStates = self.initialState.getClosure() 95 + dfa = fsa.FSA(encoder=None, encodeData=False, encodeWords=False)
  96 + startStates = self.initialState.getClosure(set())
36 assert not any(filter(lambda s: s.final, startStates)) 97 assert not any(filter(lambda s: s.final, startStates))
37 dfa.initialState = state.State(additionalData=False) 98 dfa.initialState = state.State(additionalData=False)
38 - self._doConvertState(dfa.initialState, startStates)  
39 - 99 + self._doConvertState(dfa.initialState, startStates, {frozenset(startStates): dfa.initialState})
  100 + return dfa
  101 +
  102 + def debug(self):
  103 + for state in self.initialState.dfs():
  104 + state.debug()
40 105
41 \ No newline at end of file 106 \ No newline at end of file
fsabuilder/morfeuszbuilder/segrules/rulesParser.py
@@ -3,7 +3,7 @@ from pyparsing import * @@ -3,7 +3,7 @@ from pyparsing import *
3 ParserElement.enablePackrat() 3 ParserElement.enablePackrat()
4 from morfeuszbuilder.tagset import segtypes 4 from morfeuszbuilder.tagset import segtypes
5 from morfeuszbuilder.utils import configFile, exceptions 5 from morfeuszbuilder.utils import configFile, exceptions
6 -from morfeuszbuilder.segrules import preprocessor, rules 6 +from morfeuszbuilder.segrules import preprocessor, rules, rulesManager
7 import codecs 7 import codecs
8 import re 8 import re
9 9
@@ -28,9 +28,9 @@ class RulesParser(object): @@ -28,9 +28,9 @@ class RulesParser(object):
28 return res 28 return res
29 29
30 def parse(self, filename): 30 def parse(self, filename):
31 - res = [] 31 + res = rulesManager.RulesManager()
32 32
33 - segtypesConfigFile = configFile.ConfigFile(filename, ['options', 'combinations', 'tags', 'lexemes']) 33 + segtypesConfigFile = configFile.ConfigFile(filename, ['options', 'combinations', 'tags', 'lexemes', 'segment types'])
34 key2Defs = self._getKey2Defs(segtypesConfigFile) 34 key2Defs = self._getKey2Defs(segtypesConfigFile)
35 segtypesHelper = segtypes.Segtypes(self.tagset, segtypesConfigFile) 35 segtypesHelper = segtypes.Segtypes(self.tagset, segtypesConfigFile)
36 36
@@ -39,14 +39,18 @@ class RulesParser(object): @@ -39,14 +39,18 @@ class RulesParser(object):
39 for define in defs: 39 for define in defs:
40 def2Key[define] = key 40 def2Key[define] = key
41 41
  42 + firstNFA = None
42 for defs in itertools.product(*key2Defs.values()): 43 for defs in itertools.product(*key2Defs.values()):
43 key2Def = dict([(def2Key[define], define) for define in defs]) 44 key2Def = dict([(def2Key[define], define) for define in defs])
44 - nfa = rulesNFA.RulesNFA(key2Def) 45 + nfa = rulesNFA.RulesNFA()
  46 + if not firstNFA:
  47 + firstNFA = nfa
45 combinationEnumeratedLines = segtypesConfigFile.enumerateLinesInSection('combinations') 48 combinationEnumeratedLines = segtypesConfigFile.enumerateLinesInSection('combinations')
46 combinationEnumeratedLines = list(preprocessor.preprocess(combinationEnumeratedLines, defs)) 49 combinationEnumeratedLines = list(preprocessor.preprocess(combinationEnumeratedLines, defs))
47 for rule in self._doParse(combinationEnumeratedLines, segtypesHelper): 50 for rule in self._doParse(combinationEnumeratedLines, segtypesHelper):
48 rule.addToNFA(nfa) 51 rule.addToNFA(nfa)
49 - res.append(nfa) 52 + dfa = nfa.convertToDFA()
  53 + res.addDFA4Options(key2Def, dfa)
50 return res 54 return res
51 55
52 def _doParse(self, combinationEnumeratedLines, segtypesHelper): 56 def _doParse(self, combinationEnumeratedLines, segtypesHelper):
@@ -58,14 +62,14 @@ class RulesParser(object): @@ -58,14 +62,14 @@ class RulesParser(object):
58 if not segtypesHelper.hasSegtype(segtype): 62 if not segtypesHelper.hasSegtype(segtype):
59 raise exceptions.ConfigFileException(segtypesHelper.filename, lineNum, u'%s - invalid segment type: %s' % (line, segtype)) 63 raise exceptions.ConfigFileException(segtypesHelper.filename, lineNum, u'%s - invalid segment type: %s' % (line, segtype))
60 else: 64 else:
  65 +# return rules.TagRule(segtype)
61 return rules.TagRule(segtypesHelper.getSegnum4Segtype(segtype)) 66 return rules.TagRule(segtypesHelper.getSegnum4Segtype(segtype))
62 67
63 def _doParseOneLine(self, lineNum, line, segtypesHelper): 68 def _doParseOneLine(self, lineNum, line, segtypesHelper):
64 rule = Forward() 69 rule = Forward()
65 - tagRule = Word(alphanums+'_')  
66 - ignoreOrthRule = tagRule + Suppress('>') 70 + tagRule = Word(alphanums+'_>')
67 parenRule = Suppress('(') + rule + Suppress(')') 71 parenRule = Suppress('(') + rule + Suppress(')')
68 - atomicRule = tagRule ^ ignoreOrthRule ^ parenRule 72 + atomicRule = tagRule ^ parenRule
69 zeroOrMoreRule = atomicRule + Suppress('*') 73 zeroOrMoreRule = atomicRule + Suppress('*')
70 oneOrMoreRule = atomicRule + Suppress('+') 74 oneOrMoreRule = atomicRule + Suppress('+')
71 unaryRule = atomicRule ^ zeroOrMoreRule ^ oneOrMoreRule 75 unaryRule = atomicRule ^ zeroOrMoreRule ^ oneOrMoreRule
@@ -75,19 +79,10 @@ class RulesParser(object): @@ -75,19 +79,10 @@ class RulesParser(object):
75 rule << concatRule 79 rule << concatRule
76 80
77 tagRule.setParseAction(lambda string, loc, toks: self._createNewTagRule(toks[0], lineNum, line, segtypesHelper)) 81 tagRule.setParseAction(lambda string, loc, toks: self._createNewTagRule(toks[0], lineNum, line, segtypesHelper))
78 - ignoreOrthRule.setParseAction(lambda string, loc, toks: rules.IgnoreOrthRule(toks[0]))  
79 # parenRule.setParseAction(lambda string, loc, toks: toks[0]) 82 # parenRule.setParseAction(lambda string, loc, toks: toks[0])
80 zeroOrMoreRule.setParseAction(lambda string, loc, toks: rules.ZeroOrMoreRule(toks[0])) 83 zeroOrMoreRule.setParseAction(lambda string, loc, toks: rules.ZeroOrMoreRule(toks[0]))
81 oneOrMoreRule.setParseAction(lambda string, loc, toks: rules.ConcatRule([toks[0], rules.ZeroOrMoreRule(toks[0])])) 84 oneOrMoreRule.setParseAction(lambda string, loc, toks: rules.ConcatRule([toks[0], rules.ZeroOrMoreRule(toks[0])]))
82 oneOfRule.setParseAction(lambda string, loc, toks: rules.OrRule(toks)) 85 oneOfRule.setParseAction(lambda string, loc, toks: rules.OrRule(toks))
83 concatRule.setParseAction(lambda string, loc, toks: toks[0] if len(toks) == 1 else rules.ConcatRule(toks)) 86 concatRule.setParseAction(lambda string, loc, toks: toks[0] if len(toks) == 1 else rules.ConcatRule(toks))
84 -  
85 -  
86 -# rule << tagRule ^ ignoreOrthRule ^ zeroOrMoreRule ^ oneOrMoreRule ^ orRule ^ concatRule ^ parenRule  
87 -  
88 -# tagRule.setParseAction(lambda s,l,toks: doprint(toks))  
89 -# print lineNum, line  
90 parsedRule = rule.parseString(line, parseAll=True)[0] 87 parsedRule = rule.parseString(line, parseAll=True)[0]
91 - print parsedRule  
92 return parsedRule 88 return parsedRule
93 -# print parsedLine  
fsabuilder/morfeuszbuilder/segrules/test/parserTest.py
@@ -7,12 +7,20 @@ import unittest @@ -7,12 +7,20 @@ import unittest
7 import os 7 import os
8 from morfeuszbuilder.segrules import rulesParser 8 from morfeuszbuilder.segrules import rulesParser
9 from morfeuszbuilder.tagset import tagset 9 from morfeuszbuilder.tagset import tagset
  10 +from morfeuszbuilder.fsa import visualizer, serializer
10 11
11 class Test(unittest.TestCase): 12 class Test(unittest.TestCase):
12 print 'do test' 13 print 'do test'
13 t = tagset.Tagset(os.path.join(os.path.dirname(__file__), 'polimorf.tagset')) 14 t = tagset.Tagset(os.path.join(os.path.dirname(__file__), 'polimorf.tagset'))
14 parser = rulesParser.RulesParser(t) 15 parser = rulesParser.RulesParser(t)
15 - parser.parse(os.path.join(os.path.dirname(__file__), 'segmenty.dat')) 16 + fsas = parser.parse(os.path.join(os.path.dirname(__file__), 'segmenty.dat'))
  17 + fsa = fsas[0]
  18 + for s in fsa.dfs():
  19 + s.debug()
  20 + print 'states:', len(list(fsa.dfs()))
  21 + print 'transitions:', fsa.getTransitionsNum()
  22 + visualizer.Visualizer().visualize(fsa, charLabels=False)
  23 + print 'size:', len(serializer.SimpleSerializer(fsa).fsa2bytearray(bytearray()))
16 print 'done' 24 print 'done'
17 25
18 if __name__ == "__main__": 26 if __name__ == "__main__":
fsabuilder/morfeuszbuilder/segrules/test/segmenty.dat
@@ -103,7 +103,7 @@ moze_interp( naj&gt; adj_sup ) @@ -103,7 +103,7 @@ moze_interp( naj&gt; adj_sup )
103 103
104 # Formy „zanegowane” gerundiów i imiesłowów: 104 # Formy „zanegowane” gerundiów i imiesłowów:
105 # np. „nie·czytanie”, „nie·przeczytany”, „nie·czytający”: 105 # np. „nie·czytanie”, „nie·przeczytany”, „nie·czytający”:
106 -moze_interp( nie > negat ) 106 +moze_interp( nie> negat )
107 107
108 # Przyimki akceptujące krótką formę „-ń” 108 # Przyimki akceptujące krótką formę „-ń”
109 moze_interp(z_on_agl) 109 moze_interp(z_on_agl)
@@ -111,7 +111,7 @@ moze_interp(z_on_agl) @@ -111,7 +111,7 @@ moze_interp(z_on_agl)
111 moze_interp(z_on_agl on_agl) 111 moze_interp(z_on_agl on_agl)
112 112
113 # Liczba zapisana jako ciąg cyfr: 113 # Liczba zapisana jako ciąg cyfr:
114 -moze_interp( dig>* dig ) 114 +#moze_interp( dig>* dig )
115 115
116 # Formacje prefiksalne 116 # Formacje prefiksalne
117 #### trzeba wydzielić odpowiednie samodze! 117 #### trzeba wydzielić odpowiednie samodze!
@@ -132,13 +132,35 @@ adj dywiz samodz @@ -132,13 +132,35 @@ adj dywiz samodz
132 # ? 132 # ?
133 samodz dywiz adj 133 samodz dywiz adj
134 134
  135 +[segment types]
  136 +naj>
  137 +nie>
  138 +prefs
  139 +prefv
  140 +dig>
  141 +adja
  142 +adj
  143 +adj_sup
  144 +negat
  145 +on_agl
  146 +z_on_agl
  147 +samotny
  148 +interp
  149 +aglsg
  150 +aglpl
  151 +praetcond
  152 +praet_sg_agl
  153 +praet_sg_na
  154 +praet_sg
  155 +praet_pl
  156 +samodz
135 157
136 [tags] 158 [tags]
137 -naj naj  
138 -nie nie 159 +naj> naj
  160 +nie> nie
139 prefs prefs 161 prefs prefs
140 prefv prefv 162 prefv prefv
141 -dig dig 163 +dig> dig
142 adja adja 164 adja adja
143 adj adj:%:pos 165 adj adj:%:pos
144 adj_sup adj:%:sup 166 adj_sup adj:%:sup
fsabuilder/morfeuszbuilder/segrules/test/segmenty1.dat 0 → 100644
  1 +[options]
  2 +aggl=permissive strict isolated
  3 +praet=split composite
  4 +
  5 +[combinations]
  6 +#define wsz_interp (interp|kropka|dywiz)*
  7 +
  8 +#define moze_interp(segmenty) wsz_interp segmenty wsz_interp
  9 +
  10 +moze_interp(samodz)
  11 +samotny
  12 +
  13 +
  14 +[segment types]
  15 +naj>
  16 +nie>
  17 +prefs
  18 +prefv
  19 +dig
  20 +adja
  21 +adj
  22 +adj_sup
  23 +negat
  24 +on_agl
  25 +z_on_agl
  26 +samotny
  27 +interp
  28 +aglsg
  29 +aglpl
  30 +praetcond
  31 +praet_sg_agl
  32 +praet_sg_na
  33 +praet_sg
  34 +praet_pl
  35 +samodz
  36 +
  37 +[tags]
  38 +naj naj
  39 +nie nie
  40 +prefs prefs
  41 +prefv prefv
  42 +dig dig
  43 +adja adja
  44 +adj adj:%:pos
  45 +adj_sup adj:%:sup
  46 +adj_sup adv:sup
  47 +negat ger:%:neg
  48 +negat pact:%:neg
  49 +negat ppas:%:neg
  50 +on_agl ppron3:sg:gen.acc:m1.m2.m3:ter:nakc:praep
  51 +z_on_agl prep:%
  52 +samotny brev:pun
  53 +samotny brev:npun
  54 +samotny intrj
  55 +interp interp
  56 +aglsg aglt:sg:%
  57 +aglpl aglt:pl:%
  58 +praetcond cond:%
  59 +praetcond praet:%:pri:%
  60 +praetcond praet:%:sec:%
  61 +praetcond praet:%:ter:%
  62 +praet_sg_agl praet:sg:%:agl
  63 +praet_sg_na praet:sg:%:nagl
  64 +praet_sg praet:sg:%
  65 +praet_pl praet:pl:%
  66 +praet_sg winien:sg:%
  67 +praet_pl winien:pl:%
  68 +samodz %
  69 +
  70 +[lexemes]
  71 +z_aglt aby:comp
  72 +z_aglt bowiem:comp
  73 +by by:qub
  74 +z_aglt by:comp
  75 +z_aglt cóż:subst
  76 +z_aglt czemu:adv
  77 +z_aglt czyżby:qub
  78 +z_aglt choćby:comp
  79 +z_aglt chociażby:comp
  80 +z_aglt dlaczego:adv
  81 +z_aglt dopóki:comp
  82 +z_aglt dopóty:conj
  83 +z_aglt gdyby:comp
  84 +z_aglt gdzie:qub
  85 +z_aglt gdzie:adv
  86 +z_aglt jakby:comp
  87 +z_aglt jakoby:comp
  88 +z_aglt kiedy:adv
  89 +z_aglt kiedy:comp
  90 +z_aglt tylko:qub
  91 +z_aglt żeby:comp
  92 +dywiz -:interp
  93 +kropka .:interp