Commit e05d60fbf11f94bc4d2eeedb0affff25d4b2cbe6
1 parent
1eff484c
praca nad przechowywaniem słownika z uwzględnieniem tagsetu.
git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@13 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
10 changed files
with
912 additions
and
99 deletions
fsabuilder/fsa/buildfsa.py
... | ... | @@ -10,6 +10,7 @@ import logging |
10 | 10 | import codecs |
11 | 11 | import encode |
12 | 12 | import convertinput |
13 | +import common | |
13 | 14 | from fsa import FSA |
14 | 15 | from serializer import VLengthSerializer1, VLengthSerializer2, SimpleSerializer |
15 | 16 | from visualizer import Visualizer |
... | ... | @@ -36,7 +37,7 @@ class SerializationMethod(): |
36 | 37 | V1 = 'V1' |
37 | 38 | V2 = 'V2' |
38 | 39 | |
39 | -def parseOptions(): | |
40 | +def _parseOptions(): | |
40 | 41 | """ |
41 | 42 | Parses commandline args |
42 | 43 | """ |
... | ... | @@ -45,13 +46,17 @@ def parseOptions(): |
45 | 46 | dest='inputFile', |
46 | 47 | metavar='FILE', |
47 | 48 | help='path to input file') |
49 | + parser.add_option('--tagset-file', | |
50 | + dest='tagsetFile', | |
51 | + metavar='FILE', | |
52 | + help='path to the file with tagset') | |
48 | 53 | parser.add_option('-o', '--output-file', |
49 | 54 | dest='outputFile', |
50 | 55 | metavar='FILE', |
51 | 56 | help='path to output file') |
52 | - parser.add_option('-t', '--fsa-type', | |
53 | - dest='fsaType', | |
54 | - help='result FSA type - MORPH (for morphological analysis) or SPELL (for simple spell checker)') | |
57 | +# parser.add_option('-t', '--fsa-type', | |
58 | +# dest='fsaType', | |
59 | +# help='result FSA type - MORPH (for morphological analysis) or SPELL (for simple spell checker)') | |
55 | 60 | # parser.add_option('--input-format', |
56 | 61 | # dest='inputFormat', |
57 | 62 | # help='input format - ENCODED, POLIMORF or PLAIN') |
... | ... | @@ -90,7 +95,7 @@ def parseOptions(): |
90 | 95 | |
91 | 96 | opts, args = parser.parse_args() |
92 | 97 | |
93 | - if None in [opts.inputFile, opts.outputFile, opts.outputFormat, opts.fsaType, opts.serializationMethod]: | |
98 | + if None in [opts.inputFile, opts.outputFile, opts.outputFormat, opts.tagsetFile, opts.serializationMethod]: | |
94 | 99 | parser.print_help() |
95 | 100 | exit(1) |
96 | 101 | if not opts.outputFormat.upper() in [OutputFormat.BINARY, OutputFormat.CPP]: |
... | ... | @@ -101,10 +106,14 @@ def parseOptions(): |
101 | 106 | # logging.error('input format must be one of ('+str([InputFormat.ENCODED, InputFormat.POLIMORF, InputFormat.PLAIN])+')') |
102 | 107 | # parser.print_help() |
103 | 108 | # exit(1) |
104 | - if not opts.fsaType.upper() in [FSAType.MORPH, FSAType.SPELL]: | |
105 | - logging.error('--fsa-type must be one of ('+str([FSAType.MORPH, FSAType.SPELL])+')') | |
106 | - parser.print_help() | |
107 | - exit(1) | |
109 | +# if not opts.fsaType.upper() in [FSAType.MORPH, FSAType.SPELL]: | |
110 | +# logging.error('--fsa-type must be one of ('+str([FSAType.MORPH, FSAType.SPELL])+')') | |
111 | +# parser.print_help() | |
112 | +# exit(1) | |
113 | +# if opts.fsaType == FSAType.MORPH and opts.tagsetFile is None: | |
114 | +# logging.error('must provide tagset file') | |
115 | +# parser.print_help() | |
116 | +# exit(1) | |
108 | 117 | |
109 | 118 | if not opts.serializationMethod.upper() in [SerializationMethod.SIMPLE, SerializationMethod.V1, SerializationMethod.V2]: |
110 | 119 | logging.error('--serialization-method must be one of ('+str([SerializationMethod.SIMPLE, SerializationMethod.V1, SerializationMethod.V2])+')') |
... | ... | @@ -122,58 +131,93 @@ def parseOptions(): |
122 | 131 | # exit(1) |
123 | 132 | return opts |
124 | 133 | |
125 | -def readEncodedInput(inputFile): | |
126 | - with codecs.open(inputFile, 'r', 'utf8') as f: | |
127 | - for line in f: | |
128 | - word, interps = line.strip().split() | |
129 | - yield word, interps.split(u'|') | |
130 | - | |
131 | -def readPolimorfInput(inputFile, encoder): | |
134 | +def _readPolimorfInput(inputFile, tagsetFile, encoder): | |
135 | + tagset = common.Tagset(tagsetFile) | |
132 | 136 | with codecs.open(inputFile, 'r', 'utf8') as f: |
133 | - for entry in convertinput.convertPolimorf(f, lambda (word, interp): encoder.word2SortKey(word)): | |
137 | + for entry in convertinput.convertPolimorf(f, tagset, encoder): | |
134 | 138 | yield entry |
135 | 139 | |
136 | -def readPlainInput(inputFile, encoder): | |
140 | +def _readPlainInput(inputFile, encoder): | |
137 | 141 | with codecs.open(inputFile, 'r', 'utf8') as f: |
138 | 142 | for line in sorted(f, key=encoder.word2SortKey): |
139 | 143 | word = line.strip() |
140 | 144 | yield word, '' |
141 | 145 | |
142 | -def readTrainData(trainFile): | |
146 | +def _readTrainData(trainFile): | |
143 | 147 | with codecs.open(trainFile, 'r', 'utf8') as f: |
144 | 148 | for line in f: |
145 | 149 | yield line.strip() |
146 | 150 | |
151 | +def _printStats(fsa): | |
152 | + acceptingNum = 0 | |
153 | + sinkNum = 0 | |
154 | + arrayNum = 0 | |
155 | + for s in fsa.dfs(): | |
156 | + if s.isAccepting(): | |
157 | + acceptingNum += 1 | |
158 | + if s.transitionsNum == 0: | |
159 | + sinkNum += 1 | |
160 | + if s.serializeAsArray: | |
161 | + arrayNum += 1 | |
162 | + logging.info('states num: '+str(fsa.getStatesNum())) | |
163 | + logging.info('transitions num: '+str(fsa.getTransitionsNum())) | |
164 | + logging.info('accepting states num: '+str(acceptingNum)) | |
165 | + logging.info('sink states num: '+str(sinkNum)) | |
166 | + logging.info('array states num: '+str(arrayNum)) | |
167 | + | |
168 | +def buildFromPoliMorf(inputFile, tagsetFile): | |
169 | + encoder = encode.MorphEncoder() | |
170 | + fsa = FSA(encoder) | |
171 | + inputData = _readPolimorfInput(inputFile, tagsetFile, encoder) | |
172 | + fsa.feed(inputData) | |
173 | + _printStats(fsa) | |
174 | + return fsa | |
175 | + | |
176 | +def buildFromPlain(inputFile, tagsetFile): | |
177 | + pass | |
178 | + | |
147 | 179 | def main(opts): |
148 | 180 | if opts.debug: |
149 | 181 | logging.basicConfig(level=logging.DEBUG) |
150 | 182 | else: |
151 | 183 | logging.basicConfig(level=logging.INFO) |
152 | - encoder = encode.Encoder() | |
153 | - fsa = FSA(encoder) | |
154 | 184 | |
155 | - inputData = { | |
156 | - FSAType.MORPH: readPolimorfInput(opts.inputFile, encoder), | |
157 | - FSAType.SPELL: readPlainInput(opts.inputFile, encoder) | |
158 | - }[opts.fsaType] | |
185 | + fsa = buildFromPoliMorf(opts.inputFile, opts.tagsetFile) | |
186 | +# { | |
187 | +# FSAType.SPELL: buildFromPlain(opts.inputFile), | |
188 | +# FSAType.MORPH: buildFromPoliMorf(opts.inputFile, opts.tagsetFile) | |
189 | +# }[opts.fsaType] | |
159 | 190 | |
160 | - logging.info('feeding FSA with data ...') | |
161 | - fsa.feed(inputData) | |
162 | 191 | if opts.trainFile: |
163 | 192 | logging.info('training with '+opts.trainFile+' ...') |
164 | - fsa.train(readTrainData(opts.trainFile)) | |
193 | + fsa.train(_readTrainData(opts.trainFile)) | |
165 | 194 | logging.info('done training') |
195 | + | |
196 | +# encoder = { | |
197 | +# FSAType.SPELL: encode.SimpleEncoder(), | |
198 | +# FSAType.MORPH: encode.MorphEncoder() | |
199 | +# }[opts.fsaType] | |
200 | +# | |
201 | +# fsa = FSA(encoder) | |
202 | +# | |
203 | +# inputData = { | |
204 | +# FSAType.MORPH: _readPolimorfInput(opts.inputFile, opts.tagsetFile, encoder), | |
205 | +# FSAType.SPELL: _readPlainInput(opts.inputFile, encoder) | |
206 | +# }[opts.fsaType] | |
207 | + | |
208 | +# logging.info('feeding FSA with data ...') | |
209 | +# fsa.feed(inputData) | |
210 | +# if opts.trainFile: | |
211 | +# logging.info('training with '+opts.trainFile+' ...') | |
212 | +# fsa.train(readTrainData(opts.trainFile)) | |
213 | +# logging.info('done training') | |
166 | 214 | |
167 | 215 | serializer = { |
168 | 216 | SerializationMethod.SIMPLE: SimpleSerializer, |
169 | 217 | SerializationMethod.V1: VLengthSerializer1, |
170 | 218 | SerializationMethod.V2: VLengthSerializer2, |
171 | 219 | }[opts.serializationMethod](fsa) |
172 | - logging.info('states num: '+str(fsa.getStatesNum())) | |
173 | - logging.info('transitions num: '+str(fsa.getTransitionsNum())) | |
174 | - logging.info('accepting states num: '+str(len([s for s in fsa.dfs() if s.isAccepting()]))) | |
175 | - logging.info('sink states num: '+str(len([s for s in fsa.dfs() if len(s.transitionsMap.items()) == 0]))) | |
176 | - logging.info('array states num: '+str(len([s for s in fsa.dfs() if s.serializeAsArray]))) | |
220 | + | |
177 | 221 | { |
178 | 222 | OutputFormat.CPP: serializer.serialize2CppFile, |
179 | 223 | OutputFormat.BINARY: serializer.serialize2BinaryFile |
... | ... | @@ -184,7 +228,7 @@ def main(opts): |
184 | 228 | Visualizer().visualize(fsa) |
185 | 229 | |
186 | 230 | if __name__ == '__main__': |
187 | - opts = parseOptions() | |
231 | + opts = _parseOptions() | |
188 | 232 | if opts.profile: |
189 | 233 | with PyCallGraph(output=GraphvizOutput()): |
190 | 234 | main(opts) |
... | ... |
fsabuilder/fsa/common.py
0 → 100644
1 | +''' | |
2 | +Created on Nov 7, 2013 | |
3 | + | |
4 | +@author: mlenart | |
5 | +''' | |
6 | + | |
7 | +import codecs | |
8 | + | |
9 | +class Lemma(object): | |
10 | + | |
11 | + def __init__(self, cutLength, suffixToAdd): | |
12 | + self.cutLength = cutLength | |
13 | + self.suffixToAdd = suffixToAdd | |
14 | + | |
15 | +class Interpretation(object): | |
16 | + | |
17 | + def __init__(self, orth, base, tagnum, namenum, encoder): | |
18 | + assert type(orth) == unicode | |
19 | + assert type(base) == unicode | |
20 | + root = u'' | |
21 | + for o, b in zip(orth, base): | |
22 | + if o == b: | |
23 | + root += o | |
24 | + else: | |
25 | + break | |
26 | + cutLength = len(orth) - len(root) | |
27 | + self.lemma = Lemma( | |
28 | + cutLength=cutLength, | |
29 | + suffixToAdd=base[len(root):]) | |
30 | + self.tagnum = tagnum | |
31 | + self.namenum = namenum | |
32 | + | |
33 | + def getSortKey(self): | |
34 | + return (self.lemma.cutLength, self.lemma.suffixToAdd, self.tagnum, self.namenum) | |
35 | + | |
36 | + def __eq__(self, other): | |
37 | + if isinstance(other, Interpretation): | |
38 | + return self.getSortKey() == other.getSortKey() | |
39 | + else: | |
40 | + return False | |
41 | + | |
42 | + def __hash__(self): | |
43 | + return hash(self.getSortKey()) | |
44 | + | |
45 | +class Tagset(object): | |
46 | + | |
47 | + TAGS = 1 | |
48 | + NAMES = 2 | |
49 | + SEP = '\t' | |
50 | + | |
51 | + def __init__(self, filename, encoding='utf8'): | |
52 | + self.tag2tagnum = {} | |
53 | + self.name2namenum = {} | |
54 | + self._doInit(filename, encoding) | |
55 | + print self.tag2tagnum | |
56 | + print self.name2namenum | |
57 | + | |
58 | + def _doInit(self, filename, encoding): | |
59 | + addingTo = None | |
60 | + with codecs.open(filename, 'r', encoding) as f: | |
61 | + for line in f: | |
62 | + line = line.strip('\n') | |
63 | + if line == u'[TAGS]': | |
64 | + addingTo = Tagset.TAGS | |
65 | + elif line == u'[NAMES]': | |
66 | + addingTo = Tagset.NAMES | |
67 | + elif line and not line.startswith(u'#'): | |
68 | + assert addingTo in [Tagset.TAGS, Tagset.NAMES] | |
69 | + res = {Tagset.TAGS: self.tag2tagnum, | |
70 | + Tagset.NAMES: self.name2namenum}[addingTo] | |
71 | + tagNum = line.split(Tagset.SEP)[0] | |
72 | + tag = line.split(Tagset.SEP)[1] | |
73 | + assert tag not in res | |
74 | + res[tag] = int(tagNum) | |
... | ... |
fsabuilder/fsa/convertinput.py
... | ... | @@ -3,59 +3,42 @@ Created on Oct 23, 2013 |
3 | 3 | |
4 | 4 | @author: mlenart |
5 | 5 | ''' |
6 | -import sys | |
7 | -import fileinput | |
8 | 6 | import logging |
9 | -from encode import Encoder | |
7 | +from common import Interpretation | |
10 | 8 | |
11 | -def _encodeInterp(orth, base, tag, name): | |
12 | - removePrefix = 0 | |
13 | - root = u'' | |
14 | - for o, b in zip(orth, base): | |
15 | - if o == b: | |
16 | - root += o | |
17 | - else: | |
18 | - break | |
19 | - removeSuffixNum = len(orth) - len(root) | |
20 | - addSuffix = base[len(root):] | |
21 | - return u'+'.join([ | |
22 | - chr(ord('A')+removePrefix) + chr(ord('A')+removeSuffixNum) + addSuffix, | |
23 | - tag, | |
24 | - name]) | |
9 | +def _sortLines(inputLines, encoder): | |
10 | + logging.info('sorting input...') | |
11 | + lines = list(inputLines) | |
12 | + logging.info('done read data into list') | |
13 | + lines.sort(key=lambda line: encoder.word2SortKey(line.split('\t')[0])) | |
14 | + logging.info('done sorting') | |
15 | + return lines | |
25 | 16 | |
26 | -def _parsePolimorf(inputLines): | |
27 | - for line0 in inputLines: | |
28 | - line = line0.strip(u'\n') | |
17 | +def _parseLines(inputLines, tagset, encoder): | |
18 | + for line in inputLines: | |
19 | + line = line.strip(u'\n') | |
29 | 20 | if line: |
30 | 21 | # print line |
31 | 22 | orth, base, tag, name = line.split(u'\t') |
32 | - yield (orth, _encodeInterp(orth, base, tag, name)) | |
23 | + tagnum = tagset.tag2tagnum[tag] | |
24 | + namenum = tagset.name2namenum[name] | |
25 | + yield (orth, Interpretation(orth, base, tagnum, namenum, encoder)) | |
33 | 26 | |
34 | -def _sortAndMergeParsedInput(inputData, key=lambda k: k): | |
35 | - logging.info('sorting input...') | |
36 | - entries = list(inputData) | |
37 | - entries.sort(key=key) | |
38 | - logging.info('done sorting') | |
27 | +def _mergeEntries(inputLines): | |
39 | 28 | prevOrth = None |
40 | 29 | prevInterps = None |
41 | - for orth, interp in entries: | |
30 | + for orth, interp in inputLines: | |
31 | + orth = orth.lower() | |
32 | + assert orth | |
42 | 33 | if prevOrth and prevOrth == orth: |
43 | 34 | prevInterps.append(interp) |
44 | 35 | else: |
45 | 36 | if prevOrth: |
46 | - yield (prevOrth, sorted(set(prevInterps))) | |
37 | + yield (prevOrth, frozenset(prevInterps)) | |
47 | 38 | prevOrth = orth |
48 | 39 | prevInterps = [interp] |
40 | + yield (prevOrth, frozenset(prevInterps)) | |
49 | 41 | |
50 | -def convertPolimorf(inputLines, sortKey=lambda k: k): | |
51 | - for orth, interps in _sortAndMergeParsedInput(_parsePolimorf(inputLines), key=sortKey): | |
42 | +def convertPolimorf(inputLines, tagset, encoder): | |
43 | + for orth, interps in _mergeEntries(_parseLines(_sortLines(inputLines, encoder), tagset, encoder)): | |
52 | 44 | yield orth, interps |
53 | - | |
54 | -def _decodeInputLines(rawInputLines, encoding): | |
55 | - for line in rawInputLines: | |
56 | - yield line.decode(encoding) | |
57 | - | |
58 | -if __name__ == '__main__': | |
59 | - encoder = Encoder() | |
60 | - for orth, interps in convertPolimorf(_decodeInputLines(fileinput.input(), 'utf8'), lambda (orth, interp): encoder.word2SortKey(orth)): | |
61 | - print u'\t'.join([orth, u'|'.join(interps)]).encode('utf8') | |
... | ... |
fsabuilder/fsa/encode.py
1 | 1 | ''' |
2 | 2 | Created on Oct 23, 2013 |
3 | 3 | |
4 | -@author: lennyn | |
4 | +@author: mlenart | |
5 | 5 | ''' |
6 | 6 | |
7 | +import logging | |
8 | + | |
7 | 9 | class Encoder(object): |
8 | 10 | ''' |
9 | 11 | classdocs |
10 | 12 | ''' |
11 | 13 | |
12 | 14 | |
13 | - def __init__(self, encoding='utf8', appendZero=False): | |
15 | + def __init__(self, encoding='utf8'): | |
14 | 16 | ''' |
15 | 17 | Constructor |
16 | 18 | ''' |
17 | 19 | self.encoding = encoding |
18 | - self.appendZero = appendZero | |
19 | 20 | |
20 | - def encodeWord(self, word): | |
21 | + def encodeWord(self, word, lowercase=True): | |
21 | 22 | assert type(word) == unicode |
22 | - res = bytearray(word, self.encoding) | |
23 | - if self.appendZero: | |
24 | - res.append(0) | |
23 | + res = bytearray(word.lower() if lowercase else word, self.encoding) | |
25 | 24 | return res |
26 | 25 | |
27 | 26 | def encodeData(self, data): |
28 | - return bytearray(u'|'.join(data).encode(self.encoding)) + bytearray([0]) | |
27 | + raise NotImplementedError() | |
28 | +# return bytearray(u'|'.join(data).encode(self.encoding)) + bytearray([0]) | |
29 | 29 | |
30 | 30 | def decodeData(self, rawData): |
31 | + return NotImplementedError() | |
31 | 32 | # print unicode(str(rawData), self.encoding)[:-1] |
32 | 33 | # print unicode(str(rawData), self.encoding)[:-1].split(u'|') |
33 | - return unicode(str(rawData), self.encoding)[:-1].split(u'|') | |
34 | +# return unicode(str(rawData), self.encoding)[:-1].split(u'|') | |
34 | 35 | |
35 | 36 | def word2SortKey(self, word): |
36 | - return word.encode(self.encoding) | |
37 | + return word.lower().encode(self.encoding) | |
38 | + | |
39 | +class SimpleEncoder(Encoder): | |
40 | + | |
41 | + def __init__(self, encoding='utf8', appendZero=False): | |
42 | + super(SimpleEncoder, self).__init__(encoding, appendZero) | |
43 | + | |
44 | + def encodeData(self, data): | |
45 | + return bytearray(data, encoding=self.encoding) + bytearray([0]) | |
46 | + | |
47 | + def decodeData(self, rawData): | |
48 | + return unicode(str(rawData)[:-1], self.encoding) | |
49 | + | |
50 | +class MorphEncoder(Encoder): | |
51 | + | |
52 | + def __init__(self, encoding='utf8'): | |
53 | + super(MorphEncoder, self).__init__(encoding) | |
54 | + | |
55 | + def encodeData(self, interpsList): | |
56 | + res = bytearray() | |
57 | +# print interpsList | |
58 | + firstByte = len(interpsList) | |
59 | + assert firstByte < 256 | |
60 | + assert firstByte > 0 | |
61 | + res.append(firstByte) | |
62 | + assert type(interpsList) == frozenset | |
63 | + for interp in sorted(interpsList, key=lambda i: i.getSortKey()): | |
64 | + res.extend(self._encodeLemma(interp.lemma)) | |
65 | + res.extend(self._encodeTagNum(interp.tagnum)) | |
66 | + res.extend(self._encodeNameNum(interp.namenum)) | |
67 | + return res | |
68 | + | |
69 | + def _encodeLemma(self, lemma): | |
70 | + res = bytearray() | |
71 | + assert lemma.cutLength < 256 and lemma.cutLength >= 0 | |
72 | + res.append(lemma.cutLength) | |
73 | + res.extend(self.encodeWord(lemma.suffixToAdd, lowercase=False)) | |
74 | + res.append(0) | |
75 | + return res | |
76 | + | |
77 | + def _encodeTagNum(self, tagnum): | |
78 | + res = bytearray() | |
79 | +# logging.info((tagnum & 0xFF00) >> 8) | |
80 | + assert tagnum < 65536 and tagnum >= 0 | |
81 | + res.append((tagnum & 0xFF00) >> 8) | |
82 | + res.append(tagnum & 0x00FF) | |
83 | +# logging.info('%d %s %s' % (tagnum, hex(res[0]), hex(res[1]))) | |
84 | + return res | |
85 | + | |
86 | + def _encodeNameNum(self, namenum): | |
87 | + assert namenum < 256 and namenum >= 0 | |
88 | + return bytearray([namenum]) | |
89 | + | |
37 | 90 | \ No newline at end of file |
... | ... |
fsabuilder/fsa/fsa.py
... | ... | @@ -31,16 +31,15 @@ class FSA(object): |
31 | 31 | # allWords = [] |
32 | 32 | for n, (word, data) in enumerate(input, start=1): |
33 | 33 | assert data is not None |
34 | - if type(data) in [str, unicode]: | |
35 | - data = [data] | |
36 | 34 | encodedWord = self.encodeWord(word) |
37 | - assert encodedWord >= self.encodedPrevWord | |
35 | + assert encodedWord > self.encodedPrevWord | |
38 | 36 | if encodedWord > self.encodedPrevWord: |
39 | 37 | self._addSorted(encodedWord, self.encodeData(data)) |
40 | 38 | self.encodedPrevWord = encodedWord |
41 | 39 | # assert self.tryToRecognize(word) == data |
42 | 40 | if n % 10000 == 0: |
43 | 41 | logging.info(word) |
42 | + logging.info(str(self.register.getStatesNum())) | |
44 | 43 | # allWords.append(word) |
45 | 44 | for label in encodedWord: |
46 | 45 | self.label2Freq[label] = self.label2Freq.get(label, 0) + 1 |
... | ... |
fsabuilder/fsa/test/PoliMorfSmall.tab
0 → 100644
1 | +abdominalności abdominalność subst:pl:acc:f pospolita | |
2 | +abdominalności abdominalność subst:pl:gen:f pospolita | |
3 | +abdominalności abdominalność subst:pl:nom:f pospolita | |
4 | +abdominalności abdominalność subst:pl:voc:f pospolita | |
5 | +abdominalności abdominalność subst:sg:dat:f pospolita | |
6 | +abdominalności abdominalność subst:sg:gen:f pospolita | |
7 | +abdominalności abdominalność subst:sg:loc:f pospolita | |
8 | +abdominalności abdominalność subst:sg:voc:f pospolita | |
9 | +abdominalnościach abdominalność subst:pl:loc:f pospolita | |
10 | +abdominalnościami abdominalność subst:pl:inst:f pospolita | |
11 | +abdominalnością abdominalność subst:sg:inst:f pospolita | |
12 | +abdominalnościom abdominalność subst:pl:dat:f pospolita | |
13 | +abdominalność abdominalność subst:sg:acc:f pospolita | |
14 | +abdominalność abdominalność subst:sg:nom:f pospolita | |
15 | +abdominalna abdominalny adj:sg:nom.voc:f:pos pospolita | |
16 | +abdominalną abdominalny adj:sg:acc:f:pos pospolita | |
17 | +abdominalną abdominalny adj:sg:inst:f:pos pospolita | |
18 | +abdominalne abdominalny adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos pospolita | |
19 | +abdominalne abdominalny adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos pospolita | |
20 | +abdominalne abdominalny adj:sg:acc:n1.n2:pos pospolita | |
21 | +abdominalne abdominalny adj:sg:nom.voc:n1.n2:pos pospolita | |
22 | +abdominalnego abdominalny adj:sg:acc:m1.m2:pos pospolita | |
23 | +abdominalnego abdominalny adj:sg:gen:m1.m2.m3.n1.n2:pos pospolita | |
24 | +abdominalnej abdominalny adj:sg:dat:f:pos pospolita | |
25 | +abdominalnej abdominalny adj:sg:gen:f:pos pospolita | |
26 | +abdominalnej abdominalny adj:sg:loc:f:pos pospolita | |
27 | +abdominalnemu abdominalny adj:sg:dat:m1.m2.m3.n1.n2:pos pospolita | |
28 | +abdominalni abdominalny adj:pl:nom.voc:m1.p1:pos pospolita | |
29 | +abdominalno abdominalny adja pospolita | |
30 | +abdominalny abdominalny adj:sg:acc:m3:pos pospolita | |
31 | +abdominalny abdominalny adj:sg:nom.voc:m1.m2.m3:pos pospolita | |
32 | +abdominalnych abdominalny adj:pl:acc:m1.p1:pos pospolita | |
33 | +abdominalnych abdominalny adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos pospolita | |
34 | +abdominalnych abdominalny adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos pospolita | |
35 | +abdominalnym abdominalny adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos pospolita | |
36 | +abdominalnym abdominalny adj:sg:inst:m1.m2.m3.n1.n2:pos pospolita | |
37 | +abdominalnym abdominalny adj:sg:loc:m1.m2.m3.n1.n2:pos pospolita | |
38 | +abdominalnymi abdominalny adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos pospolita | |
39 | +abdominoplastyce abdominoplastyka subst:sg:dat:f pospolita | |
40 | +abdominoplastyce abdominoplastyka subst:sg:loc:f pospolita | |
41 | +abdominoplastyk abdominoplastyka subst:pl:gen:f pospolita | |
42 | +abdominoplastyka abdominoplastyka subst:sg:nom:f pospolita | |
43 | +abdominoplastykach abdominoplastyka subst:pl:loc:f pospolita | |
44 | +abdominoplastykami abdominoplastyka subst:pl:inst:f pospolita | |
45 | +abdominoplastyką abdominoplastyka subst:sg:inst:f pospolita | |
46 | +abdominoplastykę abdominoplastyka subst:sg:acc:f pospolita | |
47 | +abdominoplastyki abdominoplastyka subst:pl:acc:f pospolita | |
48 | +abdominoplastyki abdominoplastyka subst:pl:nom:f pospolita | |
49 | +abdominoplastyki abdominoplastyka subst:pl:voc:f pospolita | |
50 | +abdominoplastyki abdominoplastyka subst:sg:gen:f pospolita | |
51 | +abdominoplastyko abdominoplastyka subst:sg:voc:f pospolita | |
52 | +abdominoplastykom abdominoplastyka subst:pl:dat:f pospolita | |
0 | 53 | \ No newline at end of file |
... | ... |
fsabuilder/fsa/test/polimorf.tagset
0 → 100644
1 | +#!MORFEUSZ-TAGSET 0.1 | |
2 | + | |
3 | +[TAGS] | |
4 | + | |
5 | +0 adj:pl:acc:m1.p1:com | |
6 | +1 adj:pl:acc:m1.p1:pos | |
7 | +2 adj:pl:acc:m1.p1:sup | |
8 | +3 adj:pl:acc:m2.m3.f.n1.n2.p2.p3:com | |
9 | +4 adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos | |
10 | +5 adj:pl:acc:m2.m3.f.n1.n2.p2.p3:sup | |
11 | +6 adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:com | |
12 | +7 adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos | |
13 | +8 adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:sup | |
14 | +9 adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:com | |
15 | +10 adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos | |
16 | +11 adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:sup | |
17 | +12 adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:com | |
18 | +13 adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos | |
19 | +14 adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:sup | |
20 | +15 adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:com | |
21 | +16 adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos | |
22 | +17 adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:sup | |
23 | +18 adj:pl:nom.voc:m1.p1:com | |
24 | +19 adj:pl:nom.voc:m1.p1:pos | |
25 | +20 adj:pl:nom.voc:m1.p1:sup | |
26 | +21 adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:com | |
27 | +22 adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos | |
28 | +23 adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:sup | |
29 | +24 adj:pl:nom:m1.p1:pos | |
30 | +25 adj:pl:nom:m2.m3.f.n1.n2.p2.p3:pos | |
31 | +26 adj:sg:acc:f:com | |
32 | +27 adj:sg:acc:f:pos | |
33 | +28 adj:sg:acc:f:sup | |
34 | +29 adj:sg:acc:m1.m2:com | |
35 | +30 adj:sg:acc:m1.m2:pos | |
36 | +31 adj:sg:acc:m1.m2:sup | |
37 | +32 adj:sg:acc:m3:com | |
38 | +33 adj:sg:acc:m3:pos | |
39 | +34 adj:sg:acc:m3:sup | |
40 | +35 adj:sg:acc:n1.n2:com | |
41 | +36 adj:sg:acc:n1.n2:pos | |
42 | +37 adj:sg:acc:n1.n2:sup | |
43 | +38 adj:sg:dat:f:com | |
44 | +39 adj:sg:dat:f:pos | |
45 | +40 adj:sg:dat:f:sup | |
46 | +41 adj:sg:dat:m1.m2.m3.n1.n2:com | |
47 | +42 adj:sg:dat:m1.m2.m3.n1.n2:pos | |
48 | +43 adj:sg:dat:m1.m2.m3.n1.n2:sup | |
49 | +44 adj:sg:gen:f:com | |
50 | +45 adj:sg:gen:f:pos | |
51 | +46 adj:sg:gen:f:sup | |
52 | +47 adj:sg:gen:m1.m2.m3.n1.n2:com | |
53 | +48 adj:sg:gen:m1.m2.m3.n1.n2:pos | |
54 | +49 adj:sg:gen:m1.m2.m3.n1.n2:sup | |
55 | +50 adj:sg:inst:f:com | |
56 | +51 adj:sg:inst:f:pos | |
57 | +52 adj:sg:inst:f:sup | |
58 | +53 adj:sg:inst:m1.m2.m3.n1.n2:com | |
59 | +54 adj:sg:inst:m1.m2.m3.n1.n2:pos | |
60 | +55 adj:sg:inst:m1.m2.m3.n1.n2:sup | |
61 | +56 adj:sg:loc:f:com | |
62 | +57 adj:sg:loc:f:pos | |
63 | +58 adj:sg:loc:f:sup | |
64 | +59 adj:sg:loc:m1.m2.m3.n1.n2:com | |
65 | +60 adj:sg:loc:m1.m2.m3.n1.n2:pos | |
66 | +61 adj:sg:loc:m1.m2.m3.n1.n2:sup | |
67 | +62 adj:sg:nom.voc:f:com | |
68 | +63 adj:sg:nom.voc:f:pos | |
69 | +64 adj:sg:nom.voc:f:sup | |
70 | +65 adj:sg:nom.voc:m1.m2.m3:com | |
71 | +66 adj:sg:nom.voc:m1.m2.m3:pos | |
72 | +67 adj:sg:nom.voc:m1.m2.m3:sup | |
73 | +68 adj:sg:nom.voc:n1.n2:com | |
74 | +69 adj:sg:nom.voc:n1.n2:pos | |
75 | +70 adj:sg:nom.voc:n1.n2:sup | |
76 | +71 adj:sg:nom:f:pos | |
77 | +72 adj:sg:nom:m1.m2.m3:pos | |
78 | +73 adj:sg:nom:n1.n2:pos | |
79 | +74 adja | |
80 | +75 adjc | |
81 | +76 adjp | |
82 | +77 adv | |
83 | +78 adv:com | |
84 | +79 adv:pos | |
85 | +80 adv:sup | |
86 | +81 aglt:pl:pri:imperf:nwok | |
87 | +82 aglt:pl:pri:imperf:wok | |
88 | +83 aglt:pl:sec:imperf:nwok | |
89 | +84 aglt:pl:sec:imperf:wok | |
90 | +85 aglt:sg:pri:imperf:nwok | |
91 | +86 aglt:sg:pri:imperf:wok | |
92 | +87 aglt:sg:sec:imperf:nwok | |
93 | +88 aglt:sg:sec:imperf:wok | |
94 | +89 bedzie:pl:pri:imperf | |
95 | +90 bedzie:pl:sec:imperf | |
96 | +91 bedzie:pl:ter:imperf | |
97 | +92 bedzie:sg:pri:imperf | |
98 | +93 bedzie:sg:sec:imperf | |
99 | +94 bedzie:sg:ter:imperf | |
100 | +95 burk | |
101 | +96 comp | |
102 | +97 conj | |
103 | +98 depr:pl:nom:m2 | |
104 | +99 depr:pl:voc:m2 | |
105 | +100 fin:pl:pri:imperf | |
106 | +101 fin:pl:pri:imperf.perf | |
107 | +102 fin:pl:pri:perf | |
108 | +103 fin:pl:sec:imperf | |
109 | +104 fin:pl:sec:imperf.perf | |
110 | +105 fin:pl:sec:perf | |
111 | +106 fin:pl:ter:imperf | |
112 | +107 fin:pl:ter:imperf.perf | |
113 | +108 fin:pl:ter:perf | |
114 | +109 fin:sg:pri:imperf | |
115 | +110 fin:sg:pri:imperf.perf | |
116 | +111 fin:sg:pri:perf | |
117 | +112 fin:sg:sec:imperf | |
118 | +113 fin:sg:sec:imperf.perf | |
119 | +114 fin:sg:sec:perf | |
120 | +115 fin:sg:ter:imperf | |
121 | +116 fin:sg:ter:imperf.perf | |
122 | +117 fin:sg:ter:perf | |
123 | +118 ger:sg:dat.loc:n2:imperf.perf:aff | |
124 | +119 ger:sg:dat.loc:n2:imperf.perf:neg | |
125 | +120 ger:sg:dat.loc:n2:imperf:aff | |
126 | +121 ger:sg:dat.loc:n2:imperf:neg | |
127 | +122 ger:sg:dat.loc:n2:perf:aff | |
128 | +123 ger:sg:dat.loc:n2:perf:neg | |
129 | +124 ger:sg:gen:n2:imperf.perf:aff | |
130 | +125 ger:sg:gen:n2:imperf.perf:neg | |
131 | +126 ger:sg:gen:n2:imperf:aff | |
132 | +127 ger:sg:gen:n2:imperf:neg | |
133 | +128 ger:sg:gen:n2:perf:aff | |
134 | +129 ger:sg:gen:n2:perf:neg | |
135 | +130 ger:sg:inst:n2:imperf.perf:aff | |
136 | +131 ger:sg:inst:n2:imperf.perf:neg | |
137 | +132 ger:sg:inst:n2:imperf:aff | |
138 | +133 ger:sg:inst:n2:imperf:neg | |
139 | +134 ger:sg:inst:n2:perf:aff | |
140 | +135 ger:sg:inst:n2:perf:neg | |
141 | +136 ger:sg:nom.acc:n2:imperf.perf:aff | |
142 | +137 ger:sg:nom.acc:n2:imperf.perf:neg | |
143 | +138 ger:sg:nom.acc:n2:imperf:aff | |
144 | +139 ger:sg:nom.acc:n2:imperf:neg | |
145 | +140 ger:sg:nom.acc:n2:perf:aff | |
146 | +141 ger:sg:nom.acc:n2:perf:neg | |
147 | +142 imps:imperf | |
148 | +143 imps:imperf.perf | |
149 | +144 imps:perf | |
150 | +145 impt:pl:pri:imperf | |
151 | +146 impt:pl:pri:imperf.perf | |
152 | +147 impt:pl:pri:perf | |
153 | +148 impt:pl:sec:imperf | |
154 | +149 impt:pl:sec:imperf.perf | |
155 | +150 impt:pl:sec:perf | |
156 | +151 impt:sg:sec:imperf | |
157 | +152 impt:sg:sec:imperf.perf | |
158 | +153 impt:sg:sec:perf | |
159 | +154 inf:imperf | |
160 | +155 inf:imperf.perf | |
161 | +156 inf:perf | |
162 | +157 interj | |
163 | +158 num:comp | |
164 | +159 num:pl:acc:m1:rec | |
165 | +160 num:pl:dat.loc:n1.p1.p2:congr.rec | |
166 | +161 num:pl:dat:m1.m2.m3.n2.f:congr | |
167 | +162 num:pl:gen.dat.inst.loc:m1.m2.m3.f.n1.n2.p1.p2:congr | |
168 | +163 num:pl:gen.dat.inst.loc:m1.m2.m3.f.n2:congr | |
169 | +164 num:pl:gen.dat.loc:m1.m2.m3.n2.f:congr | |
170 | +165 num:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2:congr | |
171 | +166 num:pl:gen.loc:m1.m2.m3.n2.f:congr | |
172 | +167 num:pl:gen:n1.p1.p2:rec | |
173 | +168 num:pl:inst:f:congr | |
174 | +169 num:pl:inst:m1.m2.m3.f.n1.n2.p1.p2:congr | |
175 | +170 num:pl:inst:m1.m2.m3.f.n2:congr | |
176 | +171 num:pl:inst:m1.m2.m3.n2.f:congr | |
177 | +172 num:pl:inst:m1.m2.m3.n2:congr | |
178 | +173 num:pl:inst:n1.p1.p2:rec | |
179 | +174 num:pl:nom.acc.voc:f:congr | |
180 | +175 num:pl:nom.acc.voc:m1:rec | |
181 | +176 num:pl:nom.acc.voc:m2.m3.f.n1.n2.p1.p2:rec | |
182 | +177 num:pl:nom.acc.voc:m2.m3.f.n2:rec | |
183 | +178 num:pl:nom.acc.voc:m2.m3.n2.f:congr | |
184 | +179 num:pl:nom.acc.voc:m2.m3.n2:congr | |
185 | +180 num:pl:nom.acc.voc:n1.p1.p2:rec | |
186 | +181 num:pl:nom.acc:m1.m2.m3.f.n1.n2.p1.p2:rec | |
187 | +182 num:pl:nom.gen.dat.inst.acc.loc.voc:m1.m2.m3.f.n1.n2.p1.p2:rec | |
188 | +183 num:pl:nom.voc:m1:congr | |
189 | +184 num:pl:nom.voc:m1:rec | |
190 | +185 num:sg:nom.gen.dat.inst.acc.loc.voc:f:rec | |
191 | +186 num:sg:nom.gen.dat.inst.acc.loc.voc:m1.m2.m3.n1.n2:rec | |
192 | +187 pact:pl:acc:m1.p1:imperf.perf:aff | |
193 | +188 pact:pl:acc:m1.p1:imperf.perf:neg | |
194 | +189 pact:pl:acc:m1.p1:imperf:aff | |
195 | +190 pact:pl:acc:m1.p1:imperf:neg | |
196 | +191 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff | |
197 | +192 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg | |
198 | +193 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff | |
199 | +194 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg | |
200 | +195 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff | |
201 | +196 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg | |
202 | +197 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff | |
203 | +198 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg | |
204 | +199 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff | |
205 | +200 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg | |
206 | +201 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff | |
207 | +202 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg | |
208 | +203 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:aff | |
209 | +204 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:neg | |
210 | +205 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff | |
211 | +206 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:neg | |
212 | +207 pact:pl:nom.voc:m1.p1:imperf.perf:aff | |
213 | +208 pact:pl:nom.voc:m1.p1:imperf.perf:neg | |
214 | +209 pact:pl:nom.voc:m1.p1:imperf:aff | |
215 | +210 pact:pl:nom.voc:m1.p1:imperf:neg | |
216 | +211 pact:sg:acc.inst:f:imperf.perf:aff | |
217 | +212 pact:sg:acc.inst:f:imperf.perf:neg | |
218 | +213 pact:sg:acc.inst:f:imperf:aff | |
219 | +214 pact:sg:acc.inst:f:imperf:neg | |
220 | +215 pact:sg:acc:m1.m2:imperf.perf:aff | |
221 | +216 pact:sg:acc:m1.m2:imperf.perf:neg | |
222 | +217 pact:sg:acc:m1.m2:imperf:aff | |
223 | +218 pact:sg:acc:m1.m2:imperf:neg | |
224 | +219 pact:sg:acc:m3:imperf.perf:aff | |
225 | +220 pact:sg:acc:m3:imperf.perf:neg | |
226 | +221 pact:sg:acc:m3:imperf:aff | |
227 | +222 pact:sg:acc:m3:imperf:neg | |
228 | +223 pact:sg:dat:m1.m2.m3.n1.n2:imperf.perf:aff | |
229 | +224 pact:sg:dat:m1.m2.m3.n1.n2:imperf.perf:neg | |
230 | +225 pact:sg:dat:m1.m2.m3.n1.n2:imperf:aff | |
231 | +226 pact:sg:dat:m1.m2.m3.n1.n2:imperf:neg | |
232 | +227 pact:sg:gen.dat.loc:f:imperf.perf:aff | |
233 | +228 pact:sg:gen.dat.loc:f:imperf.perf:neg | |
234 | +229 pact:sg:gen.dat.loc:f:imperf:aff | |
235 | +230 pact:sg:gen.dat.loc:f:imperf:neg | |
236 | +231 pact:sg:gen:m1.m2.m3.n1.n2:imperf.perf:aff | |
237 | +232 pact:sg:gen:m1.m2.m3.n1.n2:imperf.perf:neg | |
238 | +233 pact:sg:gen:m1.m2.m3.n1.n2:imperf:aff | |
239 | +234 pact:sg:gen:m1.m2.m3.n1.n2:imperf:neg | |
240 | +235 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:aff | |
241 | +236 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:neg | |
242 | +237 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff | |
243 | +238 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf:neg | |
244 | +239 pact:sg:nom.acc.voc:n1.n2:imperf.perf:aff | |
245 | +240 pact:sg:nom.acc.voc:n1.n2:imperf.perf:neg | |
246 | +241 pact:sg:nom.acc.voc:n1.n2:imperf:aff | |
247 | +242 pact:sg:nom.acc.voc:n1.n2:imperf:neg | |
248 | +243 pact:sg:nom.voc:f:imperf.perf:aff | |
249 | +244 pact:sg:nom.voc:f:imperf.perf:neg | |
250 | +245 pact:sg:nom.voc:f:imperf:aff | |
251 | +246 pact:sg:nom.voc:f:imperf:neg | |
252 | +247 pact:sg:nom.voc:m1.m2.m3:imperf.perf:aff | |
253 | +248 pact:sg:nom.voc:m1.m2.m3:imperf.perf:neg | |
254 | +249 pact:sg:nom.voc:m1.m2.m3:imperf:aff | |
255 | +250 pact:sg:nom.voc:m1.m2.m3:imperf:neg | |
256 | +251 pant:perf | |
257 | +252 pcon:imperf | |
258 | +253 ppas:pl:acc:m1.p1:imperf.perf:aff | |
259 | +254 ppas:pl:acc:m1.p1:imperf.perf:neg | |
260 | +255 ppas:pl:acc:m1.p1:imperf:aff | |
261 | +256 ppas:pl:acc:m1.p1:imperf:neg | |
262 | +257 ppas:pl:acc:m1.p1:perf:aff | |
263 | +258 ppas:pl:acc:m1.p1:perf:neg | |
264 | +259 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff | |
265 | +260 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg | |
266 | +261 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff | |
267 | +262 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg | |
268 | +263 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff | |
269 | +264 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg | |
270 | +265 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff | |
271 | +266 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg | |
272 | +267 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff | |
273 | +268 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg | |
274 | +269 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff | |
275 | +270 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg | |
276 | +271 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff | |
277 | +272 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg | |
278 | +273 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff | |
279 | +274 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg | |
280 | +275 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff | |
281 | +276 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg | |
282 | +277 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:aff | |
283 | +278 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:neg | |
284 | +279 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff | |
285 | +280 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:neg | |
286 | +281 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:aff | |
287 | +282 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:neg | |
288 | +283 ppas:pl:nom.voc:m1.p1:imperf.perf:aff | |
289 | +284 ppas:pl:nom.voc:m1.p1:imperf.perf:neg | |
290 | +285 ppas:pl:nom.voc:m1.p1:imperf:aff | |
291 | +286 ppas:pl:nom.voc:m1.p1:imperf:neg | |
292 | +287 ppas:pl:nom.voc:m1.p1:perf:aff | |
293 | +288 ppas:pl:nom.voc:m1.p1:perf:neg | |
294 | +289 ppas:sg:acc.inst:f:imperf.perf:aff | |
295 | +290 ppas:sg:acc.inst:f:imperf.perf:neg | |
296 | +291 ppas:sg:acc.inst:f:imperf:aff | |
297 | +292 ppas:sg:acc.inst:f:imperf:neg | |
298 | +293 ppas:sg:acc.inst:f:perf:aff | |
299 | +294 ppas:sg:acc.inst:f:perf:neg | |
300 | +295 ppas:sg:acc:m1.m2:imperf.perf:aff | |
301 | +296 ppas:sg:acc:m1.m2:imperf.perf:neg | |
302 | +297 ppas:sg:acc:m1.m2:imperf:aff | |
303 | +298 ppas:sg:acc:m1.m2:imperf:neg | |
304 | +299 ppas:sg:acc:m1.m2:perf:aff | |
305 | +300 ppas:sg:acc:m1.m2:perf:neg | |
306 | +301 ppas:sg:acc:m3:imperf.perf:aff | |
307 | +302 ppas:sg:acc:m3:imperf.perf:neg | |
308 | +303 ppas:sg:acc:m3:imperf:aff | |
309 | +304 ppas:sg:acc:m3:imperf:neg | |
310 | +305 ppas:sg:acc:m3:perf:aff | |
311 | +306 ppas:sg:acc:m3:perf:neg | |
312 | +307 ppas:sg:dat:m1.m2.m3.n1.n2:imperf.perf:aff | |
313 | +308 ppas:sg:dat:m1.m2.m3.n1.n2:imperf.perf:neg | |
314 | +309 ppas:sg:dat:m1.m2.m3.n1.n2:imperf:aff | |
315 | +310 ppas:sg:dat:m1.m2.m3.n1.n2:imperf:neg | |
316 | +311 ppas:sg:dat:m1.m2.m3.n1.n2:perf:aff | |
317 | +312 ppas:sg:dat:m1.m2.m3.n1.n2:perf:neg | |
318 | +313 ppas:sg:gen.dat.loc:f:imperf.perf:aff | |
319 | +314 ppas:sg:gen.dat.loc:f:imperf.perf:neg | |
320 | +315 ppas:sg:gen.dat.loc:f:imperf:aff | |
321 | +316 ppas:sg:gen.dat.loc:f:imperf:neg | |
322 | +317 ppas:sg:gen.dat.loc:f:perf:aff | |
323 | +318 ppas:sg:gen.dat.loc:f:perf:neg | |
324 | +319 ppas:sg:gen:m1.m2.m3.n1.n2:imperf.perf:aff | |
325 | +320 ppas:sg:gen:m1.m2.m3.n1.n2:imperf.perf:neg | |
326 | +321 ppas:sg:gen:m1.m2.m3.n1.n2:imperf:aff | |
327 | +322 ppas:sg:gen:m1.m2.m3.n1.n2:imperf:neg | |
328 | +323 ppas:sg:gen:m1.m2.m3.n1.n2:perf:aff | |
329 | +324 ppas:sg:gen:m1.m2.m3.n1.n2:perf:neg | |
330 | +325 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:aff | |
331 | +326 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:neg | |
332 | +327 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff | |
333 | +328 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf:neg | |
334 | +329 ppas:sg:inst.loc:m1.m2.m3.n1.n2:perf:aff | |
335 | +330 ppas:sg:inst.loc:m1.m2.m3.n1.n2:perf:neg | |
336 | +331 ppas:sg:nom.acc.voc:n1.n2:imperf.perf:aff | |
337 | +332 ppas:sg:nom.acc.voc:n1.n2:imperf.perf:neg | |
338 | +333 ppas:sg:nom.acc.voc:n1.n2:imperf:aff | |
339 | +334 ppas:sg:nom.acc.voc:n1.n2:imperf:neg | |
340 | +335 ppas:sg:nom.acc.voc:n1.n2:perf:aff | |
341 | +336 ppas:sg:nom.acc.voc:n1.n2:perf:neg | |
342 | +337 ppas:sg:nom.voc:f:imperf.perf:aff | |
343 | +338 ppas:sg:nom.voc:f:imperf.perf:neg | |
344 | +339 ppas:sg:nom.voc:f:imperf:aff | |
345 | +340 ppas:sg:nom.voc:f:imperf:neg | |
346 | +341 ppas:sg:nom.voc:f:perf:aff | |
347 | +342 ppas:sg:nom.voc:f:perf:neg | |
348 | +343 ppas:sg:nom.voc:m1.m2.m3:imperf.perf:aff | |
349 | +344 ppas:sg:nom.voc:m1.m2.m3:imperf.perf:neg | |
350 | +345 ppas:sg:nom.voc:m1.m2.m3:imperf:aff | |
351 | +346 ppas:sg:nom.voc:m1.m2.m3:imperf:neg | |
352 | +347 ppas:sg:nom.voc:m1.m2.m3:perf:aff | |
353 | +348 ppas:sg:nom.voc:m1.m2.m3:perf:neg | |
354 | +349 ppron12:pl:acc:_:pri | |
355 | +350 ppron12:pl:acc:_:sec | |
356 | +351 ppron12:pl:dat:_:pri | |
357 | +352 ppron12:pl:dat:_:sec | |
358 | +353 ppron12:pl:gen:_:pri | |
359 | +354 ppron12:pl:gen:_:sec | |
360 | +355 ppron12:pl:inst:_:pri | |
361 | +356 ppron12:pl:inst:_:sec | |
362 | +357 ppron12:pl:loc:_:pri | |
363 | +358 ppron12:pl:loc:_:sec | |
364 | +359 ppron12:pl:nom:_:pri | |
365 | +360 ppron12:pl:nom:_:sec | |
366 | +361 ppron12:pl:voc:_:pri | |
367 | +362 ppron12:pl:voc:_:sec | |
368 | +363 ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:akc | |
369 | +364 ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:nakc | |
370 | +365 ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:akc | |
371 | +366 ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:nakc | |
372 | +367 ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:akc | |
373 | +368 ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:nakc | |
374 | +369 ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:akc | |
375 | +370 ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:nakc | |
376 | +371 ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:akc | |
377 | +372 ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:nakc | |
378 | +373 ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:akc | |
379 | +374 ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:nakc | |
380 | +375 ppron12:sg:inst:m1.m2.m3.f.n1.n2:pri | |
381 | +376 ppron12:sg:inst:m1.m2.m3.f.n1.n2:sec | |
382 | +377 ppron12:sg:loc:m1.m2.m3.f.n1.n2:pri | |
383 | +378 ppron12:sg:loc:m1.m2.m3.f.n1.n2:sec | |
384 | +379 ppron12:sg:nom:m1.m2.m3.f.n1.n2:pri | |
385 | +380 ppron12:sg:nom:m1.m2.m3.f.n1.n2:sec | |
386 | +381 ppron12:sg:voc:m1.m2.m3.f.n1.n2:pri | |
387 | +382 ppron12:sg:voc:m1.m2.m3.f.n1.n2:sec | |
388 | +383 ppron3:pl:acc:m1.p1:ter:_:npraep | |
389 | +384 ppron3:pl:acc:m1.p1:ter:_:praep | |
390 | +385 ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:_:npraep | |
391 | +386 ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:_:praep | |
392 | +387 ppron3:pl:dat:_:ter:_:npraep | |
393 | +388 ppron3:pl:dat:_:ter:_:praep | |
394 | +389 ppron3:pl:gen:_:ter:_:npraep | |
395 | +390 ppron3:pl:gen:_:ter:_:praep | |
396 | +391 ppron3:pl:inst:_:ter:_:_ | |
397 | +392 ppron3:pl:loc:_:ter:_:_ | |
398 | +393 ppron3:pl:nom:m1.p1:ter:_:_ | |
399 | +394 ppron3:pl:nom:m2.m3.f.n1.n2.p2.p3:ter:_:_ | |
400 | +395 ppron3:sg:acc:f:ter:_:npraep | |
401 | +396 ppron3:sg:acc:f:ter:_:praep | |
402 | +397 ppron3:sg:acc:m1.m2.m3:ter:akc:npraep | |
403 | +398 ppron3:sg:acc:m1.m2.m3:ter:akc:praep | |
404 | +399 ppron3:sg:acc:m1.m2.m3:ter:nakc:npraep | |
405 | +400 ppron3:sg:acc:m1.m2.m3:ter:nakc:praep | |
406 | +401 ppron3:sg:acc:n1.n2:ter:_:npraep | |
407 | +402 ppron3:sg:acc:n1.n2:ter:_:praep | |
408 | +403 ppron3:sg:dat:f:ter:_:npraep | |
409 | +404 ppron3:sg:dat:f:ter:_:praep | |
410 | +405 ppron3:sg:dat:m1.m2.m3:ter:_:praep | |
411 | +406 ppron3:sg:dat:m1.m2.m3:ter:akc:npraep | |
412 | +407 ppron3:sg:dat:m1.m2.m3:ter:nakc:npraep | |
413 | +408 ppron3:sg:dat:n1.n2:ter:_:praep | |
414 | +409 ppron3:sg:dat:n1.n2:ter:akc:npraep | |
415 | +410 ppron3:sg:dat:n1.n2:ter:nakc:npraep | |
416 | +411 ppron3:sg:gen:f:ter:_:npraep | |
417 | +412 ppron3:sg:gen:f:ter:_:praep | |
418 | +413 ppron3:sg:gen:m1.m2.m3:ter:akc:npraep | |
419 | +414 ppron3:sg:gen:m1.m2.m3:ter:akc:praep | |
420 | +415 ppron3:sg:gen:m1.m2.m3:ter:nakc:npraep | |
421 | +416 ppron3:sg:gen:m1.m2.m3:ter:nakc:praep | |
422 | +417 ppron3:sg:gen:n1.n2:ter:_:praep | |
423 | +418 ppron3:sg:gen:n1.n2:ter:akc:npraep | |
424 | +419 ppron3:sg:gen:n1.n2:ter:nakc:npraep | |
425 | +420 ppron3:sg:inst:f:ter:_:praep | |
426 | +421 ppron3:sg:inst:m1.m2.m3:ter:_:_ | |
427 | +422 ppron3:sg:inst:n1.n2:ter:_:_ | |
428 | +423 ppron3:sg:loc:f:ter:_:_ | |
429 | +424 ppron3:sg:loc:m1.m2.m3:ter:_:_ | |
430 | +425 ppron3:sg:loc:n1.n2:ter:_:_ | |
431 | +426 ppron3:sg:nom:f:ter:_:_ | |
432 | +427 ppron3:sg:nom:m1.m2.m3:ter:_:_ | |
433 | +428 ppron3:sg:nom:n1.n2:ter:_:_ | |
434 | +429 praet:pl:m1.p1:imperf | |
435 | +430 praet:pl:m1.p1:imperf.perf | |
436 | +431 praet:pl:m1.p1:perf | |
437 | +432 praet:pl:m2.m3.f.n1.n2.p2.p3:imperf | |
438 | +433 praet:pl:m2.m3.f.n1.n2.p2.p3:imperf.perf | |
439 | +434 praet:pl:m2.m3.f.n1.n2.p2.p3:perf | |
440 | +435 praet:sg:f:imperf | |
441 | +436 praet:sg:f:imperf.perf | |
442 | +437 praet:sg:f:perf | |
443 | +438 praet:sg:m1.m2.m3:imperf | |
444 | +439 praet:sg:m1.m2.m3:imperf.perf | |
445 | +440 praet:sg:m1.m2.m3:imperf:agl | |
446 | +441 praet:sg:m1.m2.m3:imperf:nagl | |
447 | +442 praet:sg:m1.m2.m3:perf | |
448 | +443 praet:sg:m1.m2.m3:perf:agl | |
449 | +444 praet:sg:m1.m2.m3:perf:nagl | |
450 | +445 praet:sg:n1.n2:imperf | |
451 | +446 praet:sg:n1.n2:imperf.perf | |
452 | +447 praet:sg:n1.n2:perf | |
453 | +448 pred | |
454 | +449 prep:acc | |
455 | +450 prep:acc:nwok | |
456 | +451 prep:acc:wok | |
457 | +452 prep:dat | |
458 | +453 prep:gen | |
459 | +454 prep:gen:nwok | |
460 | +455 prep:gen:wok | |
461 | +456 prep:inst | |
462 | +457 prep:inst:nwok | |
463 | +458 prep:inst:wok | |
464 | +459 prep:loc | |
465 | +460 prep:loc:nwok | |
466 | +461 prep:loc:wok | |
467 | +462 prep:nom | |
468 | +463 qub | |
469 | +464 subst:pl:acc:f | |
470 | +465 subst:pl:acc:m1 | |
471 | +466 subst:pl:acc:m2 | |
472 | +467 subst:pl:acc:m3 | |
473 | +468 subst:pl:acc:n1 | |
474 | +469 subst:pl:acc:n2 | |
475 | +470 subst:pl:acc:p1 | |
476 | +471 subst:pl:acc:p2 | |
477 | +472 subst:pl:acc:p3 | |
478 | +473 subst:pl:dat:f | |
479 | +474 subst:pl:dat:m1 | |
480 | +475 subst:pl:dat:m2 | |
481 | +476 subst:pl:dat:m3 | |
482 | +477 subst:pl:dat:n1 | |
483 | +478 subst:pl:dat:n2 | |
484 | +479 subst:pl:dat:p1 | |
485 | +480 subst:pl:dat:p2 | |
486 | +481 subst:pl:dat:p3 | |
487 | +482 subst:pl:gen:f | |
488 | +483 subst:pl:gen:m1 | |
489 | +484 subst:pl:gen:m2 | |
490 | +485 subst:pl:gen:m3 | |
491 | +486 subst:pl:gen:n1 | |
492 | +487 subst:pl:gen:n2 | |
493 | +488 subst:pl:gen:p1 | |
494 | +489 subst:pl:gen:p2 | |
495 | +490 subst:pl:gen:p3 | |
496 | +491 subst:pl:inst:f | |
497 | +492 subst:pl:inst:m1 | |
498 | +493 subst:pl:inst:m2 | |
499 | +494 subst:pl:inst:m3 | |
500 | +495 subst:pl:inst:n1 | |
501 | +496 subst:pl:inst:n2 | |
502 | +497 subst:pl:inst:p1 | |
503 | +498 subst:pl:inst:p2 | |
504 | +499 subst:pl:inst:p3 | |
505 | +500 subst:pl:loc:f | |
506 | +501 subst:pl:loc:m1 | |
507 | +502 subst:pl:loc:m2 | |
508 | +503 subst:pl:loc:m3 | |
509 | +504 subst:pl:loc:n1 | |
510 | +505 subst:pl:loc:n2 | |
511 | +506 subst:pl:loc:p1 | |
512 | +507 subst:pl:loc:p2 | |
513 | +508 subst:pl:loc:p3 | |
514 | +509 subst:pl:nom:f | |
515 | +510 subst:pl:nom:m1 | |
516 | +511 subst:pl:nom:m2 | |
517 | +512 subst:pl:nom:m3 | |
518 | +513 subst:pl:nom:n1 | |
519 | +514 subst:pl:nom:n2 | |
520 | +515 subst:pl:nom:p1 | |
521 | +516 subst:pl:nom:p2 | |
522 | +517 subst:pl:nom:p3 | |
523 | +518 subst:pl:voc:f | |
524 | +519 subst:pl:voc:m1 | |
525 | +520 subst:pl:voc:m2 | |
526 | +521 subst:pl:voc:m3 | |
527 | +522 subst:pl:voc:n1 | |
528 | +523 subst:pl:voc:n2 | |
529 | +524 subst:pl:voc:p1 | |
530 | +525 subst:pl:voc:p2 | |
531 | +526 subst:pl:voc:p3 | |
532 | +527 subst:sg:acc:f | |
533 | +528 subst:sg:acc:m1 | |
534 | +529 subst:sg:acc:m2 | |
535 | +530 subst:sg:acc:m3 | |
536 | +531 subst:sg:acc:n1 | |
537 | +532 subst:sg:acc:n2 | |
538 | +533 subst:sg:dat:f | |
539 | +534 subst:sg:dat:m1 | |
540 | +535 subst:sg:dat:m2 | |
541 | +536 subst:sg:dat:m3 | |
542 | +537 subst:sg:dat:n1 | |
543 | +538 subst:sg:dat:n2 | |
544 | +539 subst:sg:gen:f | |
545 | +540 subst:sg:gen:m1 | |
546 | +541 subst:sg:gen:m2 | |
547 | +542 subst:sg:gen:m3 | |
548 | +543 subst:sg:gen:n1 | |
549 | +544 subst:sg:gen:n2 | |
550 | +545 subst:sg:inst:f | |
551 | +546 subst:sg:inst:m1 | |
552 | +547 subst:sg:inst:m2 | |
553 | +548 subst:sg:inst:m3 | |
554 | +549 subst:sg:inst:n1 | |
555 | +550 subst:sg:inst:n2 | |
556 | +551 subst:sg:loc:f | |
557 | +552 subst:sg:loc:m1 | |
558 | +553 subst:sg:loc:m2 | |
559 | +554 subst:sg:loc:m3 | |
560 | +555 subst:sg:loc:n1 | |
561 | +556 subst:sg:loc:n2 | |
562 | +557 subst:sg:nom:f | |
563 | +558 subst:sg:nom:m1 | |
564 | +559 subst:sg:nom:m2 | |
565 | +560 subst:sg:nom:m3 | |
566 | +561 subst:sg:nom:n1 | |
567 | +562 subst:sg:nom:n2 | |
568 | +563 subst:sg:voc:f | |
569 | +564 subst:sg:voc:m1 | |
570 | +565 subst:sg:voc:m2 | |
571 | +566 subst:sg:voc:m3 | |
572 | +567 subst:sg:voc:n1 | |
573 | +568 subst:sg:voc:n2 | |
574 | +569 winien:pl:m1.p1:imperf | |
575 | +570 winien:pl:m2.m3.f.n1.n2.p2.p3:imperf | |
576 | +571 winien:sg:f:imperf | |
577 | +572 winien:sg:m1.m2.m3:imperf | |
578 | +573 winien:sg:n1.n2:imperf | |
579 | + | |
580 | +[NAMES] | |
581 | + | |
582 | +0 | |
583 | +1 etnonim | |
584 | +2 geograficzna | |
585 | +3 imię | |
586 | +4 nazwisko | |
587 | +5 określenie dodatkowe | |
588 | +6 organizacja | |
589 | +7 osoba | |
590 | +8 pospolita | |
591 | +9 własna | |
592 | +10 wydarzenie | |
593 | +11 wytwór | |
594 | + | |
... | ... |
fsabuilder/fsa/test/testConstruction.py
... | ... | @@ -2,17 +2,16 @@ |
2 | 2 | ''' |
3 | 3 | Created on Oct 8, 2013 |
4 | 4 | |
5 | -@author: lennyn | |
5 | +@author: mlenart | |
6 | 6 | ''' |
7 | 7 | import unittest |
8 | -from fsa import fsa, visualizer, encode | |
8 | +import os | |
9 | +from fsa import fsa, visualizer, encode, buildfsa | |
9 | 10 | |
10 | 11 | class Test(unittest.TestCase): |
11 | 12 | |
12 | - | |
13 | 13 | def testSimpleConstruction(self): |
14 | - print 'dupa' | |
15 | - a = fsa.FSA(encode.Encoder()) | |
14 | + a = fsa.FSA(encode.SimpleEncoder()) | |
16 | 15 | input = sorted([ |
17 | 16 | (u'bić', ''), |
18 | 17 | (u'bij', ''), |
... | ... | @@ -50,19 +49,17 @@ class Test(unittest.TestCase): |
50 | 49 | (u'biłyśmy', ''), |
51 | 50 | ], key=lambda w: bytearray(w[0], 'utf8')) |
52 | 51 | a.feed(input) |
53 | - print a.getStatesNum() | |
54 | -# print a.tryToRecognize(u'bi') | |
55 | -# print a.tryToRecognize(u'bić') | |
56 | -# print a.tryToRecognize(u'bili') | |
57 | 52 | for w, res in input: |
58 | - print w, res, a.tryToRecognize(w) | |
59 | 53 | recognized = a.tryToRecognize(w) |
60 | - if type(res) in [str, unicode]: | |
61 | - recognized = recognized[0] | |
62 | 54 | assert recognized == res |
63 | 55 | a.calculateOffsets(lambda state: 1 + 4 * len(state.transitionsMap.keys()) + (len(state.encodedData) if state.isAccepting() else 0)) |
64 | 56 | visualizer.Visualizer().visualize(a) |
65 | - print 'done' | |
57 | + | |
58 | + def testPolimorfConstruction(self): | |
59 | + inputFile = os.path.join(os.path.dirname(__file__), 'PoliMorfSmall.tab') | |
60 | + tagsetFile = os.path.join(os.path.dirname(__file__), 'polimorf.tagset') | |
61 | + fsa = buildfsa.buildFromPoliMorf(inputFile, tagsetFile) | |
62 | +# visualizer.Visualizer().visualize(fsa) | |
66 | 63 | |
67 | 64 | if __name__ == "__main__": |
68 | 65 | #import sys;sys.argv = ['', 'Test.testSimpleConstruction'] |
... | ... |
fsabuilder/utils/__init__.py
0 → 100644
fsabuilder/utils/extractTagset.py
0 → 100644
1 | +''' | |
2 | +Created on Nov 7, 2013 | |
3 | + | |
4 | +@author: mlenart | |
5 | +''' | |
6 | +import sys | |
7 | + | |
8 | +if __name__ == '__main__': | |
9 | + version = sys.argv[1] | |
10 | + res = set() | |
11 | + print '#morfeusz-tagset', version | |
12 | + for line in sys.stdin: | |
13 | + if line.strip(): | |
14 | + tag = line.split('\t')[2] | |
15 | + res.add(tag) | |
16 | + for idx, tag in enumerate(sorted(res)): | |
17 | + print str(idx) + '\t' + tag | |
... | ... |