Commit 1c1bf6777d2888a58f0faf084f903a5534c74a60
1 parent
28f11d57
- różne poprawki w parsowaniu tagsetu
- praca nad parsowaniem reguł zlepiania segmentów git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@85 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
18 changed files
with
1231 additions
and
125 deletions
fsabuilder/.settings/org.eclipse.core.resources.prefs
fsabuilder/.settings/org.eclipse.ltk.core.refactoring.prefs
0 → 100644
fsabuilder/morfeuszbuilder/fsa/common.py
... | ... | @@ -77,34 +77,3 @@ class Interpretation4Generator(object): |
77 | 77 | |
78 | 78 | def __repr__(self): |
79 | 79 | return unicode(self) |
80 | - | |
81 | -class Tagset(object): | |
82 | - | |
83 | - TAGS = 1 | |
84 | - NAMES = 2 | |
85 | - SEP = '\t' | |
86 | - | |
87 | - def __init__(self, filename, encoding='utf8'): | |
88 | - self.tag2tagnum = {} | |
89 | - self.name2namenum = {} | |
90 | - self._doInit(filename, encoding) | |
91 | -# print self.tag2tagnum | |
92 | -# print self.name2namenum | |
93 | - | |
94 | - def _doInit(self, filename, encoding): | |
95 | - addingTo = None | |
96 | - with codecs.open(filename, 'r', encoding) as f: | |
97 | - for line in f: | |
98 | - line = line.strip('\n') | |
99 | - if line == u'[TAGS]': | |
100 | - addingTo = Tagset.TAGS | |
101 | - elif line == u'[NAMES]': | |
102 | - addingTo = Tagset.NAMES | |
103 | - elif line and not line.startswith(u'#'): | |
104 | - assert addingTo in [Tagset.TAGS, Tagset.NAMES] | |
105 | - res = {Tagset.TAGS: self.tag2tagnum, | |
106 | - Tagset.NAMES: self.name2namenum}[addingTo] | |
107 | - tagNum = line.split(Tagset.SEP)[0] | |
108 | - tag = line.split(Tagset.SEP)[1] | |
109 | - assert tag not in res | |
110 | - res[tag] = int(tagNum) | |
... | ... |
fsabuilder/morfeuszbuilder/fsa/test/testConstruction.py
... | ... | @@ -6,62 +6,62 @@ Created on Oct 8, 2013 |
6 | 6 | ''' |
7 | 7 | import unittest |
8 | 8 | import os |
9 | -from fsa import fsa, visualizer, encode, buildfsa | |
10 | -from fsa.serializer import SimpleSerializer | |
9 | +from morfeuszbuilder.fsa import fsa, visualizer, encode | |
10 | +from morfeuszbuilder.fsa.serializer import SimpleSerializer | |
11 | 11 | |
12 | 12 | class Test(unittest.TestCase): |
13 | - | |
14 | - def testSimpleConstruction(self): | |
15 | - a = fsa.FSA(encode.SimpleEncoder()) | |
16 | - input = sorted([ | |
17 | - (u'bić', ''), | |
18 | - (u'bij', ''), | |
19 | - (u'biją', ''), | |
20 | - (u'bijcie', ''), | |
21 | - (u'bije', ''), | |
22 | - (u'bijecie', ''), | |
23 | - (u'bijemy', ''), | |
24 | - (u'bijesz', ''), | |
25 | - (u'biję', ''), | |
26 | - (u'bijmy', ''), | |
27 | - (u'bili', 'asd'), | |
28 | - (u'biliby', ''), | |
29 | - (u'bilibyście', ''), | |
30 | - (u'bilibyśmy', ''), | |
31 | - (u'biliście', 'asdfas'), | |
32 | - (u'biliśmy', ''), | |
33 | - (u'bił', 'wersadfas'), | |
34 | - (u'biła', 'asdfasd'), | |
35 | - (u'biłaby', 'asdfa'), | |
36 | - (u'biłabym', ''), | |
37 | - (u'biłabyś', 'asdfa'), | |
38 | - (u'biłam', 'dfas'), | |
39 | - (u'biłaś', 'asdfas'), | |
40 | - (u'biłby', ''), | |
41 | - (u'biłbym', 'asdfa'), | |
42 | - (u'biłbyś', ''), | |
43 | - (u'biłem', ''), | |
44 | - (u'biłeś', 'sadfa'), | |
45 | - (u'biły', ''), | |
46 | - (u'biłyby', ''), | |
47 | - (u'biłybyście', ''), | |
48 | - (u'biłybyśmy', ''), | |
49 | - (u'biłyście', ''), | |
50 | - (u'biłyśmy', ''), | |
51 | - ], key=lambda w: bytearray(w[0], 'utf8')) | |
52 | - a.feed(input) | |
53 | - for w, res in input: | |
54 | - recognized = a.tryToRecognize(w) | |
55 | - assert recognized == res | |
56 | - a.calculateOffsets(lambda state: 1 + 4 * len(state.transitionsMap.keys()) + (len(state.encodedData) if state.isAccepting() else 0)) | |
57 | - visualizer.Visualizer().visualize(a) | |
58 | - | |
59 | - def testPolimorfConstruction(self): | |
60 | - inputFile = os.path.join(os.path.dirname(__file__), 'PoliMorfSmall.tab') | |
61 | - tagsetFile = os.path.join(os.path.dirname(__file__), 'polimorf.tagset') | |
62 | - fsa = buildfsa.buildFromPoliMorf(inputFile, tagsetFile) | |
63 | - serializer = SimpleSerializer(fsa) | |
64 | - serializer.serialize2BinaryFile('/tmp/test0.fsa') | |
13 | + pass | |
14 | +# def testSimpleConstruction(self): | |
15 | +# a = fsa.FSA(encode.SimpleEncoder()) | |
16 | +# input = sorted([ | |
17 | +# (u'bić', ''), | |
18 | +# (u'bij', ''), | |
19 | +# (u'biją', ''), | |
20 | +# (u'bijcie', ''), | |
21 | +# (u'bije', ''), | |
22 | +# (u'bijecie', ''), | |
23 | +# (u'bijemy', ''), | |
24 | +# (u'bijesz', ''), | |
25 | +# (u'biję', ''), | |
26 | +# (u'bijmy', ''), | |
27 | +# (u'bili', 'asd'), | |
28 | +# (u'biliby', ''), | |
29 | +# (u'bilibyście', ''), | |
30 | +# (u'bilibyśmy', ''), | |
31 | +# (u'biliście', 'asdfas'), | |
32 | +# (u'biliśmy', ''), | |
33 | +# (u'bił', 'wersadfas'), | |
34 | +# (u'biła', 'asdfasd'), | |
35 | +# (u'biłaby', 'asdfa'), | |
36 | +# (u'biłabym', ''), | |
37 | +# (u'biłabyś', 'asdfa'), | |
38 | +# (u'biłam', 'dfas'), | |
39 | +# (u'biłaś', 'asdfas'), | |
40 | +# (u'biłby', ''), | |
41 | +# (u'biłbym', 'asdfa'), | |
42 | +# (u'biłbyś', ''), | |
43 | +# (u'biłem', ''), | |
44 | +# (u'biłeś', 'sadfa'), | |
45 | +# (u'biły', ''), | |
46 | +# (u'biłyby', ''), | |
47 | +# (u'biłybyście', ''), | |
48 | +# (u'biłybyśmy', ''), | |
49 | +# (u'biłyście', ''), | |
50 | +# (u'biłyśmy', ''), | |
51 | +# ], key=lambda w: bytearray(w[0], 'utf8')) | |
52 | +# a.feed(input) | |
53 | +# for w, res in input: | |
54 | +# recognized = a.tryToRecognize(w) | |
55 | +# assert recognized == res | |
56 | +# a.calculateOffsets(lambda state: 1 + 4 * len(state.transitionsMap.keys()) + (len(state.encodedData) if state.isAccepting() else 0)) | |
57 | +# visualizer.Visualizer().visualize(a) | |
58 | +# | |
59 | +# def testPolimorfConstruction(self): | |
60 | +# inputFile = os.path.join(os.path.dirname(__file__), 'PoliMorfSmall.tab') | |
61 | +# tagsetFile = os.path.join(os.path.dirname(__file__), 'polimorf.tagset') | |
62 | +# fsa = buildfsa.buildFromPoliMorf(inputFile, tagsetFile) | |
63 | +# serializer = SimpleSerializer(fsa) | |
64 | +# serializer.serialize2BinaryFile('/tmp/test0.fsa') | |
65 | 65 | # visualizer.Visualizer().visualize(fsa) |
66 | 66 | |
67 | 67 | if __name__ == "__main__": |
... | ... |
fsabuilder/morfeuszbuilder/segrules/preprocessor.py
... | ... | @@ -7,6 +7,7 @@ import re |
7 | 7 | from pyparsing import * |
8 | 8 | |
9 | 9 | identifier = Word(alphas, bodyChars=alphanums+'_') |
10 | +token = Word(alphas, bodyChars=alphanums+'_+>') | |
10 | 11 | define = Keyword('#define').suppress() + identifier + Optional(Suppress('(') + identifier + Suppress(')')) + restOfLine + LineEnd() + StringEnd() |
11 | 12 | ifdef = Keyword('#ifdef').suppress() + identifier + LineEnd() + StringEnd() |
12 | 13 | endif = Keyword('#endif').suppress() + LineEnd() + StringEnd() |
... | ... | @@ -64,7 +65,7 @@ def _processLine(line, defines): |
64 | 65 | defineInstance = Forward() |
65 | 66 | localId = identifier.copy() |
66 | 67 | |
67 | - rule << OneOrMore(localId ^ defineInstance ^ Word('*|+?')) | |
68 | + rule << OneOrMore(defineInstance ^ localId ^ Word('*|+?>') ^ (Literal('(') + rule + Literal(')'))) | |
68 | 69 | defineInstance << localId + Suppress('(') + rule + Suppress(')') |
69 | 70 | |
70 | 71 | rule.setParseAction(lambda s, l, t: ' '.join(t)) |
... | ... | @@ -77,25 +78,25 @@ def _processLine(line, defines): |
77 | 78 | def preprocess(inputLines, defs): |
78 | 79 | defines = {} |
79 | 80 | ifdefsStack = [] |
80 | - for lineNum, line in enumerate(inputLines, start=1): | |
81 | + for lineNum, line in inputLines: | |
81 | 82 | if line.startswith('#define'): |
82 | - try: | |
83 | - parsedDefine = list(define.parseString(line)) | |
84 | - if len(parsedDefine) == 2: | |
85 | - name, val = parsedDefine | |
86 | - defines[name] = NonArgDefine(name, val) | |
87 | - else: | |
88 | - name, arg, val = parsedDefine | |
89 | - localDefines = defines.copy() | |
90 | - localDefines[arg] = NonArgDefine(arg, arg) | |
91 | - val = _processLine(val, localDefines) | |
92 | - defines[name] = ArgDefine(name, arg, val) | |
93 | - except: | |
94 | - pass | |
83 | + parsedDefine = list(define.parseString(line)) | |
84 | + if len(parsedDefine) == 2: | |
85 | + name, val = parsedDefine | |
86 | + defines[name] = NonArgDefine(name, val) | |
87 | + else: | |
88 | + name, arg, val = parsedDefine | |
89 | + localDefines = defines.copy() | |
90 | + localDefines[arg] = NonArgDefine(arg, arg) | |
91 | + val = _processLine(val, localDefines) | |
92 | + defines[name] = ArgDefine(name, arg, val) | |
95 | 93 | elif line.startswith('#ifdef'): |
96 | 94 | name = ifdef.parseString(line)[0] |
97 | 95 | ifdefsStack.append(name) |
98 | 96 | elif line.startswith('#endif'): |
99 | 97 | ifdefsStack.pop() |
98 | + elif line.startswith('#'): | |
99 | + yield lineNum, line | |
100 | 100 | elif len(ifdefsStack) == 0 or all(map(lambda name: name in defs, ifdefsStack)): |
101 | - yield _processLine(line, defines) | |
101 | + yield lineNum, _processLine(line, defines) | |
102 | + | |
102 | 103 | \ No newline at end of file |
... | ... |
fsabuilder/morfeuszbuilder/segrules/segrules.py renamed to fsabuilder/morfeuszbuilder/segrules/rules.py
... | ... | @@ -15,38 +15,45 @@ class SegmentRule(object): |
15 | 15 | Constructor |
16 | 16 | ''' |
17 | 17 | |
18 | -class SimpleRule(SegmentRule): | |
18 | +class TagRule(SegmentRule): | |
19 | 19 | |
20 | - def __init__(self, name, typeId): | |
21 | - self.name = name | |
22 | - self.identifier = typeId | |
20 | + def __init__(self, tagType, line): | |
21 | + self.tagType = tagType | |
22 | + self.line = line | |
23 | + | |
24 | +class UnaryRule(SegmentRule): | |
25 | + | |
26 | + def __init__(self, child, line): | |
27 | + self.child = child | |
28 | + self.line = line | |
23 | 29 | |
24 | 30 | class ComplexRule(SegmentRule): |
25 | 31 | |
26 | - def __init__(self, children): | |
32 | + def __init__(self, children, line): | |
27 | 33 | self.children = children |
34 | + self.line = line | |
28 | 35 | |
29 | 36 | class ConcatRule(ComplexRule): |
30 | 37 | |
31 | - def __init__(self, children): | |
32 | - super(ConcatRule, self).__init__(children) | |
38 | + def __init__(self, children, line): | |
39 | + super(ConcatRule, self).__init__(children, line) | |
33 | 40 | |
34 | 41 | class OrRule(ComplexRule): |
35 | 42 | |
36 | - def __init__(self, children): | |
37 | - super(OrRule, self).__init__(children) | |
38 | - | |
39 | -class UnaryRule(SegmentRule): | |
40 | - | |
41 | - def __init__(self, child): | |
42 | - self.child = child | |
43 | + def __init__(self, children, line): | |
44 | + super(OrRule, self).__init__(children, line) | |
43 | 45 | |
44 | 46 | class ZeroOrMoreRule(UnaryRule): |
45 | 47 | |
46 | - def __init__(self, child): | |
47 | - super(ZeroOrMoreRule, self).__init__(child) | |
48 | + def __init__(self, child, line): | |
49 | + super(ZeroOrMoreRule, self).__init__(child, line) | |
50 | + | |
51 | +class OneOrMoreRule(UnaryRule): | |
52 | + | |
53 | + def __init__(self, child, line): | |
54 | + super(OneOrMoreRule, self).__init__(child, line) | |
48 | 55 | |
49 | 56 | class IgnoreOrthRule(UnaryRule): |
50 | 57 | |
51 | - def __init__(self, child): | |
52 | - super(IgnoreOrthRule, self).__init__(child) | |
58 | + def __init__(self, child, line): | |
59 | + super(IgnoreOrthRule, self).__init__(child, line) | |
... | ... |
fsabuilder/morfeuszbuilder/segrules/rulesParser.py
0 → 100644
1 | + | |
2 | +from pyparsing import * | |
3 | +from morfeuszbuilder.tagset import segtypes | |
4 | +from morfeuszbuilder.utils import configFile | |
5 | +from morfeuszbuilder.segrules import preprocessor | |
6 | +import codecs | |
7 | +import re | |
8 | + | |
9 | +import itertools | |
10 | +import logging | |
11 | +import segsfsa | |
12 | + | |
13 | +# header = Suppress('[') + Word(alphas, bodyChars=alphanums+'_') + Suppress(']') | |
14 | +# define = Keyword('#define').suppress() + identifier + Optional(Suppress('(') + identifier + Suppress(')')) + restOfLine + LineEnd() + StringEnd() | |
15 | +# ifdef = Keyword('#ifdef').suppress() + identifier + LineEnd() + StringEnd() | |
16 | +# endif = Keyword('#endif').suppress() + LineEnd() + StringEnd() | |
17 | + | |
18 | +def doprint(toks): | |
19 | + print toks | |
20 | + | |
21 | +class RulesParser(object): | |
22 | + | |
23 | + def __init__(self, tagset): | |
24 | + self.tagset = tagset | |
25 | + | |
26 | + def _getKey2Defs(self, segtypesConfigFile): | |
27 | + res = {} | |
28 | + for lineNum, line in segtypesConfigFile.enumerateLinesInSection('options'): | |
29 | + lineToParse = Word(alphanums+'_') + Suppress('=') + Group(OneOrMore(Word(alphanums+'_'))) + LineEnd().suppress() | |
30 | + try: | |
31 | + key, defs = lineToParse.parseString(line) | |
32 | + res[key] = tuple(defs) | |
33 | + except Exception as ex: | |
34 | + raise configFile.ConfigFileException(segtypesConfigFile.filename, lineNum, u'Error in [options] section: %s' % str(ex)) | |
35 | + return res | |
36 | + | |
37 | + def parse(self, filename): | |
38 | + res = [] | |
39 | + | |
40 | + segtypesConfigFile = configFile.ConfigFile(filename, ['options', 'combinations', 'tags', 'lexemes']) | |
41 | + key2Defs = self._getKey2Defs(segtypesConfigFile) | |
42 | + segtypesHelper = segtypes.Segtypes(self.tagset, segtypesConfigFile) | |
43 | + | |
44 | + def2Key = {} | |
45 | + for key, defs in key2Defs.iteritems(): | |
46 | + for define in defs: | |
47 | + def2Key[define] = key | |
48 | + | |
49 | + for defs in itertools.product(*key2Defs.values()): | |
50 | + key2Def = dict([(def2Key[define], define) for define in defs]) | |
51 | + fsa = segsfsa.SegmentsFSA(key2Def) | |
52 | + combinationEnumeratedLines = segtypesConfigFile.enumerateLinesInSection('combinations') | |
53 | + combinationEnumeratedLines = list(preprocessor.preprocess(combinationEnumeratedLines, defs)) | |
54 | + for rule in self._doParse(combinationEnumeratedLines, segtypesHelper): | |
55 | + fsa.addSegmentRule(rule) | |
56 | + res.append(fsa) | |
57 | + return res | |
58 | + | |
59 | + def _doParse(self, combinationEnumeratedLines, segtypesHelper): | |
60 | + for lineNum, line in combinationEnumeratedLines: | |
61 | + if not line.startswith('#'): | |
62 | + yield self._doParseOneLine(lineNum, line, segtypesHelper) | |
63 | + | |
64 | + def _doParseOneLine(self, lineNum, line, segtypesHelper): | |
65 | + rule = Forward() | |
66 | + tagRule = Word(alphanums+'_') | |
67 | + ignoreOrthRule = tagRule + Suppress('>') | |
68 | + parenRule = Suppress('(') + rule + Suppress(')') | |
69 | + atomicRule = tagRule ^ ignoreOrthRule ^ parenRule | |
70 | + zeroOrMoreRule = atomicRule + Suppress('*') | |
71 | + oneOrMoreRule = atomicRule + Suppress('+') | |
72 | + unaryRule = atomicRule ^ zeroOrMoreRule ^ oneOrMoreRule | |
73 | + oneOfRule = delimitedList(unaryRule, delim='|') | |
74 | + complexRule = unaryRule ^ oneOfRule | |
75 | + concatRule = OneOrMore(complexRule) | |
76 | + rule << concatRule | |
77 | +# rule << tagRule ^ ignoreOrthRule ^ zeroOrMoreRule ^ oneOrMoreRule ^ orRule ^ concatRule ^ parenRule | |
78 | + | |
79 | +# tagRule.setParseAction(lambda s,l,toks: doprint(toks)) | |
80 | +# print lineNum, line | |
81 | + parsedLine = rule.parseString(line, parseAll=True) | |
82 | +# print parsedLine | |
... | ... |
fsabuilder/morfeuszbuilder/segrules/segsfsa.py
... | ... | @@ -14,7 +14,7 @@ class SegmentsFSAState(object): |
14 | 14 | |
15 | 15 | class SegmentsFSA(object): |
16 | 16 | |
17 | - def __init__(self): | |
17 | + def __init__(self, key2Def={}): | |
18 | 18 | self.initialState = SegmentsFSAState() |
19 | 19 | |
20 | 20 | def addSegmentRule(self, segmentRule): |
... | ... | @@ -23,3 +23,5 @@ class SegmentsFSA(object): |
23 | 23 | def serialize(self): |
24 | 24 | res = bytearray() |
25 | 25 | return res |
26 | + | |
27 | + | |
26 | 28 | \ No newline at end of file |
... | ... |
fsabuilder/morfeuszbuilder/segrules/test.py
... | ... | @@ -4,7 +4,7 @@ Created on 24 sty 2014 |
4 | 4 | @author: mlenart |
5 | 5 | ''' |
6 | 6 | |
7 | -import preprocessor | |
7 | +from morfeuszbuilder.segrules import preprocessor | |
8 | 8 | |
9 | 9 | if __name__ == '__main__': |
10 | 10 | text = ''' |
... | ... | @@ -13,8 +13,8 @@ dupa |
13 | 13 | #define X(x) a x b |
14 | 14 | #define Y(x) X(x) c |
15 | 15 | #define B(x) X(x) |
16 | -#define Z(x) Y(X(x)) d | |
17 | -#define AB(asd) dupa asd dupa | |
16 | +#define Z(x) Y( X(x) jhg) d | |
17 | +#define A_B(asd) dupa asd dupa asfda_asdfa | |
18 | 18 | Y(Z(a) b X(c) Y(d)) |
19 | 19 | #ifdef extra |
20 | 20 | asdfasa |
... | ... | @@ -30,7 +30,7 @@ aaaa asd |
30 | 30 | asdfasdfada |
31 | 31 | #endif |
32 | 32 | |
33 | -AB(x) | |
33 | +A_B( (x)+ x) | |
34 | 34 | ''' |
35 | - for line in preprocessor.preprocess(text.split('\n'), ['extra', 'superextra']): | |
35 | + for line in preprocessor.preprocess(enumerate(text.split('\n')), ['extra', 'superextra']): | |
36 | 36 | print line |
37 | 37 | \ No newline at end of file |
... | ... |
fsabuilder/morfeuszbuilder/segrules/test/__init__.py
0 → 100644
fsabuilder/morfeuszbuilder/segrules/test/parserTest.py
0 → 100644
1 | +''' | |
2 | +Created on 18 lut 2014 | |
3 | + | |
4 | +@author: mlenart | |
5 | +''' | |
6 | +import unittest | |
7 | +import os | |
8 | +from morfeuszbuilder.segrules import rulesParser | |
9 | +from morfeuszbuilder.tagset import tagset | |
10 | + | |
11 | +class Test(unittest.TestCase): | |
12 | + t = tagset.Tagset(os.path.join(os.path.dirname(__file__), 'polimorf.tagset')) | |
13 | + parser = rulesParser.RulesParser(t) | |
14 | + parser.parse(os.path.join(os.path.dirname(__file__), 'segmenty.dat')) | |
15 | + | |
16 | +if __name__ == "__main__": | |
17 | + unittest.main() | |
18 | +# testParser() | |
0 | 19 | \ No newline at end of file |
... | ... |
fsabuilder/morfeuszbuilder/segrules/test/polimorf.tagset
0 → 100644
1 | +#!MORFEUSZ-TAGSET 0.1 | |
2 | + | |
3 | +[TAGS] | |
4 | + | |
5 | +0 adj:pl:acc:m1.p1:com | |
6 | +1 adj:pl:acc:m1.p1:pos | |
7 | +2 adj:pl:acc:m1.p1:sup | |
8 | +3 adj:pl:acc:m2.m3.f.n1.n2.p2.p3:com | |
9 | +4 adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos | |
10 | +5 adj:pl:acc:m2.m3.f.n1.n2.p2.p3:sup | |
11 | +6 adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:com | |
12 | +7 adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos | |
13 | +8 adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:sup | |
14 | +9 adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:com | |
15 | +10 adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos | |
16 | +11 adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:sup | |
17 | +12 adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:com | |
18 | +13 adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos | |
19 | +14 adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:sup | |
20 | +15 adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:com | |
21 | +16 adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos | |
22 | +17 adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:sup | |
23 | +18 adj:pl:nom.voc:m1.p1:com | |
24 | +19 adj:pl:nom.voc:m1.p1:pos | |
25 | +20 adj:pl:nom.voc:m1.p1:sup | |
26 | +21 adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:com | |
27 | +22 adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos | |
28 | +23 adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:sup | |
29 | +24 adj:pl:nom:m1.p1:pos | |
30 | +25 adj:pl:nom:m2.m3.f.n1.n2.p2.p3:pos | |
31 | +26 adj:sg:acc:f:com | |
32 | +27 adj:sg:acc:f:pos | |
33 | +28 adj:sg:acc:f:sup | |
34 | +29 adj:sg:acc:m1.m2:com | |
35 | +30 adj:sg:acc:m1.m2:pos | |
36 | +31 adj:sg:acc:m1.m2:sup | |
37 | +32 adj:sg:acc:m3:com | |
38 | +33 adj:sg:acc:m3:pos | |
39 | +34 adj:sg:acc:m3:sup | |
40 | +35 adj:sg:acc:n1.n2:com | |
41 | +36 adj:sg:acc:n1.n2:pos | |
42 | +37 adj:sg:acc:n1.n2:sup | |
43 | +38 adj:sg:dat:f:com | |
44 | +39 adj:sg:dat:f:pos | |
45 | +40 adj:sg:dat:f:sup | |
46 | +41 adj:sg:dat:m1.m2.m3.n1.n2:com | |
47 | +42 adj:sg:dat:m1.m2.m3.n1.n2:pos | |
48 | +43 adj:sg:dat:m1.m2.m3.n1.n2:sup | |
49 | +44 adj:sg:gen:f:com | |
50 | +45 adj:sg:gen:f:pos | |
51 | +46 adj:sg:gen:f:sup | |
52 | +47 adj:sg:gen:m1.m2.m3.n1.n2:com | |
53 | +48 adj:sg:gen:m1.m2.m3.n1.n2:pos | |
54 | +49 adj:sg:gen:m1.m2.m3.n1.n2:sup | |
55 | +50 adj:sg:inst:f:com | |
56 | +51 adj:sg:inst:f:pos | |
57 | +52 adj:sg:inst:f:sup | |
58 | +53 adj:sg:inst:m1.m2.m3.n1.n2:com | |
59 | +54 adj:sg:inst:m1.m2.m3.n1.n2:pos | |
60 | +55 adj:sg:inst:m1.m2.m3.n1.n2:sup | |
61 | +56 adj:sg:loc:f:com | |
62 | +57 adj:sg:loc:f:pos | |
63 | +58 adj:sg:loc:f:sup | |
64 | +59 adj:sg:loc:m1.m2.m3.n1.n2:com | |
65 | +60 adj:sg:loc:m1.m2.m3.n1.n2:pos | |
66 | +61 adj:sg:loc:m1.m2.m3.n1.n2:sup | |
67 | +62 adj:sg:nom.voc:f:com | |
68 | +63 adj:sg:nom.voc:f:pos | |
69 | +64 adj:sg:nom.voc:f:sup | |
70 | +65 adj:sg:nom.voc:m1.m2.m3:com | |
71 | +66 adj:sg:nom.voc:m1.m2.m3:pos | |
72 | +67 adj:sg:nom.voc:m1.m2.m3:sup | |
73 | +68 adj:sg:nom.voc:n1.n2:com | |
74 | +69 adj:sg:nom.voc:n1.n2:pos | |
75 | +70 adj:sg:nom.voc:n1.n2:sup | |
76 | +71 adj:sg:nom:f:pos | |
77 | +72 adj:sg:nom:m1.m2.m3:pos | |
78 | +73 adj:sg:nom:n1.n2:pos | |
79 | +74 adja | |
80 | +75 adjc | |
81 | +76 adjp | |
82 | +77 adv | |
83 | +78 adv:com | |
84 | +79 adv:pos | |
85 | +80 adv:sup | |
86 | +81 aglt:pl:pri:imperf:nwok | |
87 | +82 aglt:pl:pri:imperf:wok | |
88 | +83 aglt:pl:sec:imperf:nwok | |
89 | +84 aglt:pl:sec:imperf:wok | |
90 | +85 aglt:sg:pri:imperf:nwok | |
91 | +86 aglt:sg:pri:imperf:wok | |
92 | +87 aglt:sg:sec:imperf:nwok | |
93 | +88 aglt:sg:sec:imperf:wok | |
94 | +89 bedzie:pl:pri:imperf | |
95 | +90 bedzie:pl:sec:imperf | |
96 | +91 bedzie:pl:ter:imperf | |
97 | +92 bedzie:sg:pri:imperf | |
98 | +93 bedzie:sg:sec:imperf | |
99 | +94 bedzie:sg:ter:imperf | |
100 | +95 burk | |
101 | +96 comp | |
102 | +97 conj | |
103 | +98 depr:pl:nom:m2 | |
104 | +99 depr:pl:voc:m2 | |
105 | +100 fin:pl:pri:imperf | |
106 | +101 fin:pl:pri:imperf.perf | |
107 | +102 fin:pl:pri:perf | |
108 | +103 fin:pl:sec:imperf | |
109 | +104 fin:pl:sec:imperf.perf | |
110 | +105 fin:pl:sec:perf | |
111 | +106 fin:pl:ter:imperf | |
112 | +107 fin:pl:ter:imperf.perf | |
113 | +108 fin:pl:ter:perf | |
114 | +109 fin:sg:pri:imperf | |
115 | +110 fin:sg:pri:imperf.perf | |
116 | +111 fin:sg:pri:perf | |
117 | +112 fin:sg:sec:imperf | |
118 | +113 fin:sg:sec:imperf.perf | |
119 | +114 fin:sg:sec:perf | |
120 | +115 fin:sg:ter:imperf | |
121 | +116 fin:sg:ter:imperf.perf | |
122 | +117 fin:sg:ter:perf | |
123 | +118 ger:sg:dat.loc:n2:imperf.perf:aff | |
124 | +119 ger:sg:dat.loc:n2:imperf.perf:neg | |
125 | +120 ger:sg:dat.loc:n2:imperf:aff | |
126 | +121 ger:sg:dat.loc:n2:imperf:neg | |
127 | +122 ger:sg:dat.loc:n2:perf:aff | |
128 | +123 ger:sg:dat.loc:n2:perf:neg | |
129 | +124 ger:sg:gen:n2:imperf.perf:aff | |
130 | +125 ger:sg:gen:n2:imperf.perf:neg | |
131 | +126 ger:sg:gen:n2:imperf:aff | |
132 | +127 ger:sg:gen:n2:imperf:neg | |
133 | +128 ger:sg:gen:n2:perf:aff | |
134 | +129 ger:sg:gen:n2:perf:neg | |
135 | +130 ger:sg:inst:n2:imperf.perf:aff | |
136 | +131 ger:sg:inst:n2:imperf.perf:neg | |
137 | +132 ger:sg:inst:n2:imperf:aff | |
138 | +133 ger:sg:inst:n2:imperf:neg | |
139 | +134 ger:sg:inst:n2:perf:aff | |
140 | +135 ger:sg:inst:n2:perf:neg | |
141 | +136 ger:sg:nom.acc:n2:imperf.perf:aff | |
142 | +137 ger:sg:nom.acc:n2:imperf.perf:neg | |
143 | +138 ger:sg:nom.acc:n2:imperf:aff | |
144 | +139 ger:sg:nom.acc:n2:imperf:neg | |
145 | +140 ger:sg:nom.acc:n2:perf:aff | |
146 | +141 ger:sg:nom.acc:n2:perf:neg | |
147 | +142 imps:imperf | |
148 | +143 imps:imperf.perf | |
149 | +144 imps:perf | |
150 | +145 impt:pl:pri:imperf | |
151 | +146 impt:pl:pri:imperf.perf | |
152 | +147 impt:pl:pri:perf | |
153 | +148 impt:pl:sec:imperf | |
154 | +149 impt:pl:sec:imperf.perf | |
155 | +150 impt:pl:sec:perf | |
156 | +151 impt:sg:sec:imperf | |
157 | +152 impt:sg:sec:imperf.perf | |
158 | +153 impt:sg:sec:perf | |
159 | +154 inf:imperf | |
160 | +155 inf:imperf.perf | |
161 | +156 inf:perf | |
162 | +157 interj | |
163 | +158 num:comp | |
164 | +159 num:pl:acc:m1:rec | |
165 | +160 num:pl:dat.loc:n1.p1.p2:congr.rec | |
166 | +161 num:pl:dat:m1.m2.m3.n2.f:congr | |
167 | +162 num:pl:gen.dat.inst.loc:m1.m2.m3.f.n1.n2.p1.p2:congr | |
168 | +163 num:pl:gen.dat.inst.loc:m1.m2.m3.f.n2:congr | |
169 | +164 num:pl:gen.dat.loc:m1.m2.m3.n2.f:congr | |
170 | +165 num:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2:congr | |
171 | +166 num:pl:gen.loc:m1.m2.m3.n2.f:congr | |
172 | +167 num:pl:gen:n1.p1.p2:rec | |
173 | +168 num:pl:inst:f:congr | |
174 | +169 num:pl:inst:m1.m2.m3.f.n1.n2.p1.p2:congr | |
175 | +170 num:pl:inst:m1.m2.m3.f.n2:congr | |
176 | +171 num:pl:inst:m1.m2.m3.n2.f:congr | |
177 | +172 num:pl:inst:m1.m2.m3.n2:congr | |
178 | +173 num:pl:inst:n1.p1.p2:rec | |
179 | +174 num:pl:nom.acc.voc:f:congr | |
180 | +175 num:pl:nom.acc.voc:m1:rec | |
181 | +176 num:pl:nom.acc.voc:m2.m3.f.n1.n2.p1.p2:rec | |
182 | +177 num:pl:nom.acc.voc:m2.m3.f.n2:rec | |
183 | +178 num:pl:nom.acc.voc:m2.m3.n2.f:congr | |
184 | +179 num:pl:nom.acc.voc:m2.m3.n2:congr | |
185 | +180 num:pl:nom.acc.voc:n1.p1.p2:rec | |
186 | +181 num:pl:nom.acc:m1.m2.m3.f.n1.n2.p1.p2:rec | |
187 | +182 num:pl:nom.gen.dat.inst.acc.loc.voc:m1.m2.m3.f.n1.n2.p1.p2:rec | |
188 | +183 num:pl:nom.voc:m1:congr | |
189 | +184 num:pl:nom.voc:m1:rec | |
190 | +185 num:sg:nom.gen.dat.inst.acc.loc.voc:f:rec | |
191 | +186 num:sg:nom.gen.dat.inst.acc.loc.voc:m1.m2.m3.n1.n2:rec | |
192 | +187 pact:pl:acc:m1.p1:imperf.perf:aff | |
193 | +188 pact:pl:acc:m1.p1:imperf.perf:neg | |
194 | +189 pact:pl:acc:m1.p1:imperf:aff | |
195 | +190 pact:pl:acc:m1.p1:imperf:neg | |
196 | +191 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff | |
197 | +192 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg | |
198 | +193 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff | |
199 | +194 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg | |
200 | +195 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff | |
201 | +196 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg | |
202 | +197 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff | |
203 | +198 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg | |
204 | +199 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff | |
205 | +200 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg | |
206 | +201 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff | |
207 | +202 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg | |
208 | +203 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:aff | |
209 | +204 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:neg | |
210 | +205 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff | |
211 | +206 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:neg | |
212 | +207 pact:pl:nom.voc:m1.p1:imperf.perf:aff | |
213 | +208 pact:pl:nom.voc:m1.p1:imperf.perf:neg | |
214 | +209 pact:pl:nom.voc:m1.p1:imperf:aff | |
215 | +210 pact:pl:nom.voc:m1.p1:imperf:neg | |
216 | +211 pact:sg:acc.inst:f:imperf.perf:aff | |
217 | +212 pact:sg:acc.inst:f:imperf.perf:neg | |
218 | +213 pact:sg:acc.inst:f:imperf:aff | |
219 | +214 pact:sg:acc.inst:f:imperf:neg | |
220 | +215 pact:sg:acc:m1.m2:imperf.perf:aff | |
221 | +216 pact:sg:acc:m1.m2:imperf.perf:neg | |
222 | +217 pact:sg:acc:m1.m2:imperf:aff | |
223 | +218 pact:sg:acc:m1.m2:imperf:neg | |
224 | +219 pact:sg:acc:m3:imperf.perf:aff | |
225 | +220 pact:sg:acc:m3:imperf.perf:neg | |
226 | +221 pact:sg:acc:m3:imperf:aff | |
227 | +222 pact:sg:acc:m3:imperf:neg | |
228 | +223 pact:sg:dat:m1.m2.m3.n1.n2:imperf.perf:aff | |
229 | +224 pact:sg:dat:m1.m2.m3.n1.n2:imperf.perf:neg | |
230 | +225 pact:sg:dat:m1.m2.m3.n1.n2:imperf:aff | |
231 | +226 pact:sg:dat:m1.m2.m3.n1.n2:imperf:neg | |
232 | +227 pact:sg:gen.dat.loc:f:imperf.perf:aff | |
233 | +228 pact:sg:gen.dat.loc:f:imperf.perf:neg | |
234 | +229 pact:sg:gen.dat.loc:f:imperf:aff | |
235 | +230 pact:sg:gen.dat.loc:f:imperf:neg | |
236 | +231 pact:sg:gen:m1.m2.m3.n1.n2:imperf.perf:aff | |
237 | +232 pact:sg:gen:m1.m2.m3.n1.n2:imperf.perf:neg | |
238 | +233 pact:sg:gen:m1.m2.m3.n1.n2:imperf:aff | |
239 | +234 pact:sg:gen:m1.m2.m3.n1.n2:imperf:neg | |
240 | +235 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:aff | |
241 | +236 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:neg | |
242 | +237 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff | |
243 | +238 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf:neg | |
244 | +239 pact:sg:nom.acc.voc:n1.n2:imperf.perf:aff | |
245 | +240 pact:sg:nom.acc.voc:n1.n2:imperf.perf:neg | |
246 | +241 pact:sg:nom.acc.voc:n1.n2:imperf:aff | |
247 | +242 pact:sg:nom.acc.voc:n1.n2:imperf:neg | |
248 | +243 pact:sg:nom.voc:f:imperf.perf:aff | |
249 | +244 pact:sg:nom.voc:f:imperf.perf:neg | |
250 | +245 pact:sg:nom.voc:f:imperf:aff | |
251 | +246 pact:sg:nom.voc:f:imperf:neg | |
252 | +247 pact:sg:nom.voc:m1.m2.m3:imperf.perf:aff | |
253 | +248 pact:sg:nom.voc:m1.m2.m3:imperf.perf:neg | |
254 | +249 pact:sg:nom.voc:m1.m2.m3:imperf:aff | |
255 | +250 pact:sg:nom.voc:m1.m2.m3:imperf:neg | |
256 | +251 pant:perf | |
257 | +252 pcon:imperf | |
258 | +253 ppas:pl:acc:m1.p1:imperf.perf:aff | |
259 | +254 ppas:pl:acc:m1.p1:imperf.perf:neg | |
260 | +255 ppas:pl:acc:m1.p1:imperf:aff | |
261 | +256 ppas:pl:acc:m1.p1:imperf:neg | |
262 | +257 ppas:pl:acc:m1.p1:perf:aff | |
263 | +258 ppas:pl:acc:m1.p1:perf:neg | |
264 | +259 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff | |
265 | +260 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg | |
266 | +261 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff | |
267 | +262 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg | |
268 | +263 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff | |
269 | +264 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg | |
270 | +265 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff | |
271 | +266 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg | |
272 | +267 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff | |
273 | +268 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg | |
274 | +269 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff | |
275 | +270 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg | |
276 | +271 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff | |
277 | +272 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg | |
278 | +273 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff | |
279 | +274 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg | |
280 | +275 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff | |
281 | +276 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg | |
282 | +277 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:aff | |
283 | +278 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:neg | |
284 | +279 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff | |
285 | +280 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:neg | |
286 | +281 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:aff | |
287 | +282 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:neg | |
288 | +283 ppas:pl:nom.voc:m1.p1:imperf.perf:aff | |
289 | +284 ppas:pl:nom.voc:m1.p1:imperf.perf:neg | |
290 | +285 ppas:pl:nom.voc:m1.p1:imperf:aff | |
291 | +286 ppas:pl:nom.voc:m1.p1:imperf:neg | |
292 | +287 ppas:pl:nom.voc:m1.p1:perf:aff | |
293 | +288 ppas:pl:nom.voc:m1.p1:perf:neg | |
294 | +289 ppas:sg:acc.inst:f:imperf.perf:aff | |
295 | +290 ppas:sg:acc.inst:f:imperf.perf:neg | |
296 | +291 ppas:sg:acc.inst:f:imperf:aff | |
297 | +292 ppas:sg:acc.inst:f:imperf:neg | |
298 | +293 ppas:sg:acc.inst:f:perf:aff | |
299 | +294 ppas:sg:acc.inst:f:perf:neg | |
300 | +295 ppas:sg:acc:m1.m2:imperf.perf:aff | |
301 | +296 ppas:sg:acc:m1.m2:imperf.perf:neg | |
302 | +297 ppas:sg:acc:m1.m2:imperf:aff | |
303 | +298 ppas:sg:acc:m1.m2:imperf:neg | |
304 | +299 ppas:sg:acc:m1.m2:perf:aff | |
305 | +300 ppas:sg:acc:m1.m2:perf:neg | |
306 | +301 ppas:sg:acc:m3:imperf.perf:aff | |
307 | +302 ppas:sg:acc:m3:imperf.perf:neg | |
308 | +303 ppas:sg:acc:m3:imperf:aff | |
309 | +304 ppas:sg:acc:m3:imperf:neg | |
310 | +305 ppas:sg:acc:m3:perf:aff | |
311 | +306 ppas:sg:acc:m3:perf:neg | |
312 | +307 ppas:sg:dat:m1.m2.m3.n1.n2:imperf.perf:aff | |
313 | +308 ppas:sg:dat:m1.m2.m3.n1.n2:imperf.perf:neg | |
314 | +309 ppas:sg:dat:m1.m2.m3.n1.n2:imperf:aff | |
315 | +310 ppas:sg:dat:m1.m2.m3.n1.n2:imperf:neg | |
316 | +311 ppas:sg:dat:m1.m2.m3.n1.n2:perf:aff | |
317 | +312 ppas:sg:dat:m1.m2.m3.n1.n2:perf:neg | |
318 | +313 ppas:sg:gen.dat.loc:f:imperf.perf:aff | |
319 | +314 ppas:sg:gen.dat.loc:f:imperf.perf:neg | |
320 | +315 ppas:sg:gen.dat.loc:f:imperf:aff | |
321 | +316 ppas:sg:gen.dat.loc:f:imperf:neg | |
322 | +317 ppas:sg:gen.dat.loc:f:perf:aff | |
323 | +318 ppas:sg:gen.dat.loc:f:perf:neg | |
324 | +319 ppas:sg:gen:m1.m2.m3.n1.n2:imperf.perf:aff | |
325 | +320 ppas:sg:gen:m1.m2.m3.n1.n2:imperf.perf:neg | |
326 | +321 ppas:sg:gen:m1.m2.m3.n1.n2:imperf:aff | |
327 | +322 ppas:sg:gen:m1.m2.m3.n1.n2:imperf:neg | |
328 | +323 ppas:sg:gen:m1.m2.m3.n1.n2:perf:aff | |
329 | +324 ppas:sg:gen:m1.m2.m3.n1.n2:perf:neg | |
330 | +325 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:aff | |
331 | +326 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:neg | |
332 | +327 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff | |
333 | +328 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf:neg | |
334 | +329 ppas:sg:inst.loc:m1.m2.m3.n1.n2:perf:aff | |
335 | +330 ppas:sg:inst.loc:m1.m2.m3.n1.n2:perf:neg | |
336 | +331 ppas:sg:nom.acc.voc:n1.n2:imperf.perf:aff | |
337 | +332 ppas:sg:nom.acc.voc:n1.n2:imperf.perf:neg | |
338 | +333 ppas:sg:nom.acc.voc:n1.n2:imperf:aff | |
339 | +334 ppas:sg:nom.acc.voc:n1.n2:imperf:neg | |
340 | +335 ppas:sg:nom.acc.voc:n1.n2:perf:aff | |
341 | +336 ppas:sg:nom.acc.voc:n1.n2:perf:neg | |
342 | +337 ppas:sg:nom.voc:f:imperf.perf:aff | |
343 | +338 ppas:sg:nom.voc:f:imperf.perf:neg | |
344 | +339 ppas:sg:nom.voc:f:imperf:aff | |
345 | +340 ppas:sg:nom.voc:f:imperf:neg | |
346 | +341 ppas:sg:nom.voc:f:perf:aff | |
347 | +342 ppas:sg:nom.voc:f:perf:neg | |
348 | +343 ppas:sg:nom.voc:m1.m2.m3:imperf.perf:aff | |
349 | +344 ppas:sg:nom.voc:m1.m2.m3:imperf.perf:neg | |
350 | +345 ppas:sg:nom.voc:m1.m2.m3:imperf:aff | |
351 | +346 ppas:sg:nom.voc:m1.m2.m3:imperf:neg | |
352 | +347 ppas:sg:nom.voc:m1.m2.m3:perf:aff | |
353 | +348 ppas:sg:nom.voc:m1.m2.m3:perf:neg | |
354 | +349 ppron12:pl:acc:_:pri | |
355 | +350 ppron12:pl:acc:_:sec | |
356 | +351 ppron12:pl:dat:_:pri | |
357 | +352 ppron12:pl:dat:_:sec | |
358 | +353 ppron12:pl:gen:_:pri | |
359 | +354 ppron12:pl:gen:_:sec | |
360 | +355 ppron12:pl:inst:_:pri | |
361 | +356 ppron12:pl:inst:_:sec | |
362 | +357 ppron12:pl:loc:_:pri | |
363 | +358 ppron12:pl:loc:_:sec | |
364 | +359 ppron12:pl:nom:_:pri | |
365 | +360 ppron12:pl:nom:_:sec | |
366 | +361 ppron12:pl:voc:_:pri | |
367 | +362 ppron12:pl:voc:_:sec | |
368 | +363 ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:akc | |
369 | +364 ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:nakc | |
370 | +365 ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:akc | |
371 | +366 ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:nakc | |
372 | +367 ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:akc | |
373 | +368 ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:nakc | |
374 | +369 ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:akc | |
375 | +370 ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:nakc | |
376 | +371 ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:akc | |
377 | +372 ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:nakc | |
378 | +373 ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:akc | |
379 | +374 ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:nakc | |
380 | +375 ppron12:sg:inst:m1.m2.m3.f.n1.n2:pri | |
381 | +376 ppron12:sg:inst:m1.m2.m3.f.n1.n2:sec | |
382 | +377 ppron12:sg:loc:m1.m2.m3.f.n1.n2:pri | |
383 | +378 ppron12:sg:loc:m1.m2.m3.f.n1.n2:sec | |
384 | +379 ppron12:sg:nom:m1.m2.m3.f.n1.n2:pri | |
385 | +380 ppron12:sg:nom:m1.m2.m3.f.n1.n2:sec | |
386 | +381 ppron12:sg:voc:m1.m2.m3.f.n1.n2:pri | |
387 | +382 ppron12:sg:voc:m1.m2.m3.f.n1.n2:sec | |
388 | +383 ppron3:pl:acc:m1.p1:ter:_:npraep | |
389 | +384 ppron3:pl:acc:m1.p1:ter:_:praep | |
390 | +385 ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:_:npraep | |
391 | +386 ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:_:praep | |
392 | +387 ppron3:pl:dat:_:ter:_:npraep | |
393 | +388 ppron3:pl:dat:_:ter:_:praep | |
394 | +389 ppron3:pl:gen:_:ter:_:npraep | |
395 | +390 ppron3:pl:gen:_:ter:_:praep | |
396 | +391 ppron3:pl:inst:_:ter:_:_ | |
397 | +392 ppron3:pl:loc:_:ter:_:_ | |
398 | +393 ppron3:pl:nom:m1.p1:ter:_:_ | |
399 | +394 ppron3:pl:nom:m2.m3.f.n1.n2.p2.p3:ter:_:_ | |
400 | +395 ppron3:sg:acc:f:ter:_:npraep | |
401 | +396 ppron3:sg:acc:f:ter:_:praep | |
402 | +397 ppron3:sg:acc:m1.m2.m3:ter:akc:npraep | |
403 | +398 ppron3:sg:acc:m1.m2.m3:ter:akc:praep | |
404 | +399 ppron3:sg:acc:m1.m2.m3:ter:nakc:npraep | |
405 | +400 ppron3:sg:acc:m1.m2.m3:ter:nakc:praep | |
406 | +401 ppron3:sg:acc:n1.n2:ter:_:npraep | |
407 | +402 ppron3:sg:acc:n1.n2:ter:_:praep | |
408 | +403 ppron3:sg:dat:f:ter:_:npraep | |
409 | +404 ppron3:sg:dat:f:ter:_:praep | |
410 | +405 ppron3:sg:dat:m1.m2.m3:ter:_:praep | |
411 | +406 ppron3:sg:dat:m1.m2.m3:ter:akc:npraep | |
412 | +407 ppron3:sg:dat:m1.m2.m3:ter:nakc:npraep | |
413 | +408 ppron3:sg:dat:n1.n2:ter:_:praep | |
414 | +409 ppron3:sg:dat:n1.n2:ter:akc:npraep | |
415 | +410 ppron3:sg:dat:n1.n2:ter:nakc:npraep | |
416 | +411 ppron3:sg:gen:f:ter:_:npraep | |
417 | +412 ppron3:sg:gen:f:ter:_:praep | |
418 | +413 ppron3:sg:gen:m1.m2.m3:ter:akc:npraep | |
419 | +414 ppron3:sg:gen:m1.m2.m3:ter:akc:praep | |
420 | +415 ppron3:sg:gen:m1.m2.m3:ter:nakc:npraep | |
421 | +416 ppron3:sg:gen:m1.m2.m3:ter:nakc:praep | |
422 | +417 ppron3:sg:gen:n1.n2:ter:_:praep | |
423 | +418 ppron3:sg:gen:n1.n2:ter:akc:npraep | |
424 | +419 ppron3:sg:gen:n1.n2:ter:nakc:npraep | |
425 | +420 ppron3:sg:inst:f:ter:_:praep | |
426 | +421 ppron3:sg:inst:m1.m2.m3:ter:_:_ | |
427 | +422 ppron3:sg:inst:n1.n2:ter:_:_ | |
428 | +423 ppron3:sg:loc:f:ter:_:_ | |
429 | +424 ppron3:sg:loc:m1.m2.m3:ter:_:_ | |
430 | +425 ppron3:sg:loc:n1.n2:ter:_:_ | |
431 | +426 ppron3:sg:nom:f:ter:_:_ | |
432 | +427 ppron3:sg:nom:m1.m2.m3:ter:_:_ | |
433 | +428 ppron3:sg:nom:n1.n2:ter:_:_ | |
434 | +429 praet:pl:m1.p1:imperf | |
435 | +430 praet:pl:m1.p1:imperf.perf | |
436 | +431 praet:pl:m1.p1:perf | |
437 | +432 praet:pl:m2.m3.f.n1.n2.p2.p3:imperf | |
438 | +433 praet:pl:m2.m3.f.n1.n2.p2.p3:imperf.perf | |
439 | +434 praet:pl:m2.m3.f.n1.n2.p2.p3:perf | |
440 | +435 praet:sg:f:imperf | |
441 | +436 praet:sg:f:imperf.perf | |
442 | +437 praet:sg:f:perf | |
443 | +438 praet:sg:m1.m2.m3:imperf | |
444 | +439 praet:sg:m1.m2.m3:imperf.perf | |
445 | +440 praet:sg:m1.m2.m3:imperf:agl | |
446 | +441 praet:sg:m1.m2.m3:imperf:nagl | |
447 | +442 praet:sg:m1.m2.m3:perf | |
448 | +443 praet:sg:m1.m2.m3:perf:agl | |
449 | +444 praet:sg:m1.m2.m3:perf:nagl | |
450 | +445 praet:sg:n1.n2:imperf | |
451 | +446 praet:sg:n1.n2:imperf.perf | |
452 | +447 praet:sg:n1.n2:perf | |
453 | +448 pred | |
454 | +449 prep:acc | |
455 | +450 prep:acc:nwok | |
456 | +451 prep:acc:wok | |
457 | +452 prep:dat | |
458 | +453 prep:gen | |
459 | +454 prep:gen:nwok | |
460 | +455 prep:gen:wok | |
461 | +456 prep:inst | |
462 | +457 prep:inst:nwok | |
463 | +458 prep:inst:wok | |
464 | +459 prep:loc | |
465 | +460 prep:loc:nwok | |
466 | +461 prep:loc:wok | |
467 | +462 prep:nom | |
468 | +463 qub | |
469 | +464 subst:pl:acc:f | |
470 | +465 subst:pl:acc:m1 | |
471 | +466 subst:pl:acc:m2 | |
472 | +467 subst:pl:acc:m3 | |
473 | +468 subst:pl:acc:n1 | |
474 | +469 subst:pl:acc:n2 | |
475 | +470 subst:pl:acc:p1 | |
476 | +471 subst:pl:acc:p2 | |
477 | +472 subst:pl:acc:p3 | |
478 | +473 subst:pl:dat:f | |
479 | +474 subst:pl:dat:m1 | |
480 | +475 subst:pl:dat:m2 | |
481 | +476 subst:pl:dat:m3 | |
482 | +477 subst:pl:dat:n1 | |
483 | +478 subst:pl:dat:n2 | |
484 | +479 subst:pl:dat:p1 | |
485 | +480 subst:pl:dat:p2 | |
486 | +481 subst:pl:dat:p3 | |
487 | +482 subst:pl:gen:f | |
488 | +483 subst:pl:gen:m1 | |
489 | +484 subst:pl:gen:m2 | |
490 | +485 subst:pl:gen:m3 | |
491 | +486 subst:pl:gen:n1 | |
492 | +487 subst:pl:gen:n2 | |
493 | +488 subst:pl:gen:p1 | |
494 | +489 subst:pl:gen:p2 | |
495 | +490 subst:pl:gen:p3 | |
496 | +491 subst:pl:inst:f | |
497 | +492 subst:pl:inst:m1 | |
498 | +493 subst:pl:inst:m2 | |
499 | +494 subst:pl:inst:m3 | |
500 | +495 subst:pl:inst:n1 | |
501 | +496 subst:pl:inst:n2 | |
502 | +497 subst:pl:inst:p1 | |
503 | +498 subst:pl:inst:p2 | |
504 | +499 subst:pl:inst:p3 | |
505 | +500 subst:pl:loc:f | |
506 | +501 subst:pl:loc:m1 | |
507 | +502 subst:pl:loc:m2 | |
508 | +503 subst:pl:loc:m3 | |
509 | +504 subst:pl:loc:n1 | |
510 | +505 subst:pl:loc:n2 | |
511 | +506 subst:pl:loc:p1 | |
512 | +507 subst:pl:loc:p2 | |
513 | +508 subst:pl:loc:p3 | |
514 | +509 subst:pl:nom:f | |
515 | +510 subst:pl:nom:m1 | |
516 | +511 subst:pl:nom:m2 | |
517 | +512 subst:pl:nom:m3 | |
518 | +513 subst:pl:nom:n1 | |
519 | +514 subst:pl:nom:n2 | |
520 | +515 subst:pl:nom:p1 | |
521 | +516 subst:pl:nom:p2 | |
522 | +517 subst:pl:nom:p3 | |
523 | +518 subst:pl:voc:f | |
524 | +519 subst:pl:voc:m1 | |
525 | +520 subst:pl:voc:m2 | |
526 | +521 subst:pl:voc:m3 | |
527 | +522 subst:pl:voc:n1 | |
528 | +523 subst:pl:voc:n2 | |
529 | +524 subst:pl:voc:p1 | |
530 | +525 subst:pl:voc:p2 | |
531 | +526 subst:pl:voc:p3 | |
532 | +527 subst:sg:acc:f | |
533 | +528 subst:sg:acc:m1 | |
534 | +529 subst:sg:acc:m2 | |
535 | +530 subst:sg:acc:m3 | |
536 | +531 subst:sg:acc:n1 | |
537 | +532 subst:sg:acc:n2 | |
538 | +533 subst:sg:dat:f | |
539 | +534 subst:sg:dat:m1 | |
540 | +535 subst:sg:dat:m2 | |
541 | +536 subst:sg:dat:m3 | |
542 | +537 subst:sg:dat:n1 | |
543 | +538 subst:sg:dat:n2 | |
544 | +539 subst:sg:gen:f | |
545 | +540 subst:sg:gen:m1 | |
546 | +541 subst:sg:gen:m2 | |
547 | +542 subst:sg:gen:m3 | |
548 | +543 subst:sg:gen:n1 | |
549 | +544 subst:sg:gen:n2 | |
550 | +545 subst:sg:inst:f | |
551 | +546 subst:sg:inst:m1 | |
552 | +547 subst:sg:inst:m2 | |
553 | +548 subst:sg:inst:m3 | |
554 | +549 subst:sg:inst:n1 | |
555 | +550 subst:sg:inst:n2 | |
556 | +551 subst:sg:loc:f | |
557 | +552 subst:sg:loc:m1 | |
558 | +553 subst:sg:loc:m2 | |
559 | +554 subst:sg:loc:m3 | |
560 | +555 subst:sg:loc:n1 | |
561 | +556 subst:sg:loc:n2 | |
562 | +557 subst:sg:nom:f | |
563 | +558 subst:sg:nom:m1 | |
564 | +559 subst:sg:nom:m2 | |
565 | +560 subst:sg:nom:m3 | |
566 | +561 subst:sg:nom:n1 | |
567 | +562 subst:sg:nom:n2 | |
568 | +563 subst:sg:voc:f | |
569 | +564 subst:sg:voc:m1 | |
570 | +565 subst:sg:voc:m2 | |
571 | +566 subst:sg:voc:m3 | |
572 | +567 subst:sg:voc:n1 | |
573 | +568 subst:sg:voc:n2 | |
574 | +569 winien:pl:m1.p1:imperf | |
575 | +570 winien:pl:m2.m3.f.n1.n2.p2.p3:imperf | |
576 | +571 winien:sg:f:imperf | |
577 | +572 winien:sg:m1.m2.m3:imperf | |
578 | +573 winien:sg:n1.n2:imperf | |
579 | + | |
580 | +[NAMES] | |
581 | + | |
582 | +0 | |
583 | +1 etnonim | |
584 | +2 geograficzna | |
585 | +3 imię | |
586 | +4 nazwisko | |
587 | +5 określenie dodatkowe | |
588 | +6 organizacja | |
589 | +7 osoba | |
590 | +8 pospolita | |
591 | +9 własna | |
592 | +10 wydarzenie | |
593 | +11 wytwór | |
594 | + | |
... | ... |
fsabuilder/morfeuszbuilder/segrules/test/preprocessorTest.py
0 → 100644
1 | +''' | |
2 | +Created on 18 lut 2014 | |
3 | + | |
4 | +@author: mlenart | |
5 | +''' | |
6 | +import unittest | |
7 | +import codecs | |
8 | +import os | |
9 | + | |
10 | +from morfeuszbuilder.segrules import preprocessor | |
11 | +from morfeuszbuilder.utils import configFile | |
12 | + | |
13 | + | |
14 | +class Test(unittest.TestCase): | |
15 | + | |
16 | + | |
17 | + def testPreprocess(self): | |
18 | + filename = os.path.join(os.path.dirname(__file__), 'segmenty.dat') | |
19 | + parsedFile = configFile.ConfigFile(filename, ['options', 'combinations', 'tags', 'lexemes']) | |
20 | + linesEnum = parsedFile.enumerateLinesInSection('combinations') | |
21 | + for lineNum, line in preprocessor.preprocess(linesEnum, ['extra', 'superextra']): | |
22 | + print (lineNum, line) | |
23 | + | |
24 | + | |
25 | +if __name__ == "__main__": | |
26 | + #import sys;sys.argv = ['', 'Test.testPreprocess'] | |
27 | + unittest.main() | |
0 | 28 | \ No newline at end of file |
... | ... |
fsabuilder/morfeuszbuilder/segrules/test/segmenty.dat
0 → 100644
1 | +[options] | |
2 | +aggl=permissive strict isolated | |
3 | +praet=split composite | |
4 | + | |
5 | +[combinations] | |
6 | +(dupa|dupa) | |
7 | +#define wsz_interp (interp|kropka|dywiz)* | |
8 | + | |
9 | +#define moze_interp(segmenty) wsz_interp segmenty wsz_interp | |
10 | + | |
11 | +# Segmenty występujące samodzielnie: | |
12 | +# | |
13 | +# domyślny typ segmentu samodzielnego: | |
14 | +moze_interp(samodz) | |
15 | + | |
16 | +# segment samotny, który nie dopuszcza nawet znaku interpunkcyjnego po | |
17 | +# sobie | |
18 | +samotny | |
19 | + | |
20 | +# przeszlik pojedynczy w formie nieaglutynacyjnej, np. „gniótł”: | |
21 | +moze_interp(praet_sg_na) | |
22 | + | |
23 | +# przeszlik pojedynczy w formie niezróżnicowanej aglutynacyjnie, np. „czytał”: | |
24 | +moze_interp(praet_sg) | |
25 | + | |
26 | +# przeszlik mnogi, np. „czytali”: | |
27 | +moze_interp(praet_pl) | |
28 | + | |
29 | +# partykuła „by”: | |
30 | +moze_interp(by) | |
31 | + | |
32 | +# inne segmenty, które dopuszczają po sobie aglutynant, | |
33 | +# np. „powininna”, „czyżby”: | |
34 | +moze_interp(z_aglt) | |
35 | + | |
36 | +# forma przymiotnikowa (dopuszcza adja): | |
37 | +moze_interp(adj) | |
38 | + | |
39 | +# dywiz (jako samodzielny segment jest tyko błędnym użyciem w funkcji | |
40 | +# myślnika, ale trzeba to dopuścić): | |
41 | +dywiz | |
42 | + | |
43 | +#ifdef isolated | |
44 | +adja | |
45 | +#endif | |
46 | + | |
47 | + | |
48 | +# Połączenia z aglutynantami: | |
49 | +# | |
50 | +#ifdef split | |
51 | +# Czas przeszły: | |
52 | +# np. „gniotł·am” | |
53 | +moze_interp( praet_sg_agl aglsg ) | |
54 | +# np. „czytał·em” | |
55 | +moze_interp(praet_sg aglsg) | |
56 | +# np. „czytali·ście” | |
57 | +moze_interp(praet_pl aglpl) | |
58 | + | |
59 | +# Tryb warunkowy: | |
60 | +# np. „gniótł·by” | |
61 | +moze_interp(praet_sg_na by) | |
62 | +# np. „czytało·by” | |
63 | +moze_interp(praet_sg by) | |
64 | +# np. „gnietli·by” | |
65 | +moze_interp(praet_pl by) | |
66 | +# np. „gniótł·by·ś” | |
67 | +moze_interp(praet_sg_na by aglsg) | |
68 | +# np. „czytał·by·m” | |
69 | +moze_interp(praet_sg by aglsg) | |
70 | +# np. „gnietli·by·śmy” | |
71 | +moze_interp(praet_pl by aglpl) | |
72 | +#else | |
73 | +moze_interp(praetcond) | |
74 | +#endif | |
75 | +# np. „by·ś” | |
76 | +moze_interp(by aglsg) | |
77 | +# np. „by·ście” | |
78 | +moze_interp(by aglpl) | |
79 | + | |
80 | +# np. „gdyby·m” | |
81 | +moze_interp(z_aglt aglsg) | |
82 | +# np. „gdyby·ście” | |
83 | +moze_interp(z_aglt aglpl) | |
84 | + | |
85 | +# To jest dużo za dużo, ale tytułem eksperymentu: | |
86 | +#ifdef permissive | |
87 | +moze_interp(samodz aglsg) | |
88 | +moze_interp(samodz aglpl) | |
89 | +#endif | |
90 | + | |
91 | +# Złożone formy przymiotnikowe | |
92 | +# np. „biało·-·czerwony” | |
93 | +moze_interp( (adja dywiz)+ adj ) | |
94 | +# poniższe załatwione przez + powyżej: | |
95 | +# # np. „niebiesko·-·biało·-·czerwona” | |
96 | +# adja dywiz adja dywiz adj interp? | |
97 | +# # itd. (zatrzymujemy się pragmatycznie na 5 członach) | |
98 | +# adja dywiz adja dywiz adja dywiz adj interp? | |
99 | +# adja dywiz adja dywiz adja dywiz adja dywiz adj interp? | |
100 | + | |
101 | +# Stopień najwyższy: | |
102 | +# np. „naj·zieleńszy”, „naj·mądrzej” | |
103 | +moze_interp( naj> adj_sup ) | |
104 | + | |
105 | +# Formy „zanegowane” gerundiów i imiesłowów: | |
106 | +# np. „nie·czytanie”, „nie·przeczytany”, „nie·czytający”: | |
107 | +moze_interp( nie > negat ) | |
108 | + | |
109 | +# Przyimki akceptujące krótką formę „-ń” | |
110 | +moze_interp(z_on_agl) | |
111 | +# np. „do·ń” | |
112 | +moze_interp(z_on_agl on_agl) | |
113 | + | |
114 | +# Liczba zapisana jako ciąg cyfr: | |
115 | +moze_interp( dig>* dig ) | |
116 | + | |
117 | +# Formacje prefiksalne | |
118 | +#### trzeba wydzielić odpowiednie samodze! | |
119 | +# rzeczownikowe i przymiotnikowe | |
120 | +# np. „euro·sodoma”, „e-·papieros”, „euro·sodomski”, „bez·argumentowy” | |
121 | +moze_interp( prefs samodz ) | |
122 | +# czasownikowe np. „po·nakapywać” | |
123 | +moze_interp( prefv samodz ) | |
124 | + | |
125 | +# Apozycje z dywizem | |
126 | +# np. „kobieta-prezydent” | |
127 | +moze_interp( samodz dywiz samodz ) | |
128 | +# poniższe do sprawdzenia, najwyraźniej obecne w tekstach, skoro wprowadziliśmy: | |
129 | +# ? | |
130 | +adj dywiz adj | |
131 | +# ? | |
132 | +adj dywiz samodz | |
133 | +# ? | |
134 | +samodz dywiz adj | |
135 | + | |
136 | + | |
137 | +[tags] | |
138 | +naj naj | |
139 | +nie nie | |
140 | +prefs prefs | |
141 | +prefv prefv | |
142 | +dig dig | |
143 | +adja adja | |
144 | +adj adj:%:pos | |
145 | +adj_sup adj:%:sup | |
146 | +adj_sup adv:sup | |
147 | +negat ger:%:neg | |
148 | +negat pact:%:neg | |
149 | +negat ppas:%:neg | |
150 | +on_agl ppron3:sg:gen.acc:m1.m2.m3:ter:nakc:praep | |
151 | +z_on_agl prep:% | |
152 | +samotny brev:pun | |
153 | +samotny brev:npun | |
154 | +samotny intrj | |
155 | +interp interp | |
156 | +aglsg aglt:sg:% | |
157 | +aglpl aglt:pl:% | |
158 | +praetcond cond:% | |
159 | +praetcond praet:%:pri:% | |
160 | +praetcond praet:%:sec:% | |
161 | +praetcond praet:%:ter:% | |
162 | +praet_sg_agl praet:sg:%:agl | |
163 | +praet_sg_na praet:sg:%:nagl | |
164 | +praet_sg praet:sg:% | |
165 | +praet_pl praet:pl:% | |
166 | +praet_sg winien:sg:% | |
167 | +praet_pl winien:pl:% | |
168 | +samodz % | |
169 | + | |
170 | +[lexemes] | |
171 | +z_aglt aby:comp | |
172 | +z_aglt bowiem:comp | |
173 | +by by:qub | |
174 | +z_aglt by:comp | |
175 | +z_aglt cóż:subst | |
176 | +z_aglt czemu:adv | |
177 | +z_aglt czyżby:qub | |
178 | +z_aglt choćby:comp | |
179 | +z_aglt chociażby:comp | |
180 | +z_aglt dlaczego:adv | |
181 | +z_aglt dopóki:comp | |
182 | +z_aglt dopóty:conj | |
183 | +z_aglt gdyby:comp | |
184 | +z_aglt gdzie:qub | |
185 | +z_aglt gdzie:adv | |
186 | +z_aglt jakby:comp | |
187 | +z_aglt jakoby:comp | |
188 | +z_aglt kiedy:adv | |
189 | +z_aglt kiedy:comp | |
190 | +z_aglt tylko:qub | |
191 | +z_aglt żeby:comp | |
192 | +dywiz -:interp | |
193 | +kropka .:interp | |
... | ... |
fsabuilder/morfeuszbuilder/tagset/__init__.py
0 → 100644
fsabuilder/morfeuszbuilder/tagset/segtypes.py
0 → 100644
1 | +''' | |
2 | +Created on 17 lut 2014 | |
3 | + | |
4 | +@author: mlenart | |
5 | +''' | |
6 | +import re | |
7 | + | |
8 | +class Segtypes(object): | |
9 | + | |
10 | + def __init__(self, tagset, segrulesFile): | |
11 | + | |
12 | + self.tagset = tagset | |
13 | + | |
14 | + self.segrulesConfigFile = segrulesFile | |
15 | + | |
16 | + self.segtype2Segnum = {} | |
17 | + self.patternsList = [] | |
18 | + | |
19 | + def readTags(self, lines): | |
20 | + inTags = False | |
21 | + for lineNum, line in enumerate(lines, start=1): | |
22 | + header = self._getHeaderValue(line, lineNum) | |
23 | + if header == 'tags': | |
24 | + inTags = True | |
25 | + elif header: | |
26 | + inTags = False | |
27 | + elif inTags: | |
28 | + segtype, pattern = line.strip().split('\t') | |
29 | + self._validate( | |
30 | + u'Segment type must be a lowercase alphanumeric with optional underscores', | |
31 | + lineNum, | |
32 | + re.match(r'[a-z_]+', segtype)) | |
33 | + self._validate( | |
34 | + u'Pattern must contain only ":", "%", "." and lowercase alphanumeric letters', | |
35 | + lineNum, | |
36 | + re.match(r'[a-z_\.\:\%]+', pattern)) | |
37 | + | |
38 | + if segtype in self.segtype2Segnum: | |
39 | + segnum = self.segtype2Segnum[segtype] | |
40 | + else: | |
41 | + segnum = len(self.segtype2Segnum) | |
42 | + self.segtype2Segnum[segtype] = segnum | |
43 | + | |
44 | + self.patternsList.append(SegtypePattern(None, pattern, segnum)) | |
45 | + | |
46 | + def readLexemes(self, lines): | |
47 | + inLexemes = False | |
48 | + for lineNum, line in enumerate(lines, start=1): | |
49 | + header = self._getHeaderValue(line, lineNum) | |
50 | + if header == 'lexemes': | |
51 | + inLexemes = True | |
52 | + elif header: | |
53 | + inLexemes = False | |
54 | + elif inLexemes: | |
55 | + segtype, pattern = line.strip().split('\t') | |
56 | + self._validate( | |
57 | + u'Segment type must be a lowercase alphanumeric with optional underscores', | |
58 | + lineNum, | |
59 | + re.match(r'[a-z_]+', segtype)) | |
60 | + self._validate( | |
61 | + u'Pattern must contain lemma and POS', | |
62 | + lineNum, | |
63 | + re.match(r'\w+\:[a-z_]+', pattern, re.U)) | |
64 | + | |
65 | + if segtype in self.segtype2Segnum: | |
66 | + segnum = self.segtype2Segnum[segtype] | |
67 | + else: | |
68 | + segnum = len(self.segtype2Segnum) | |
69 | + self.segtype2Segnum[segtype] = segnum | |
70 | + | |
71 | + lemma, pos = pattern.split(':') | |
72 | + | |
73 | + self.patternsList.append(SegtypePattern(lemma, pos + ':%', segnum)) | |
74 | + | |
75 | + def lexeme2Segnum(self, lemma, tag): | |
76 | + for p in self.patternsList: | |
77 | + res = p.tryToMatch(lemma, tag) | |
78 | + if res >= 0: | |
79 | + return res | |
80 | + raise SegtypesException('Cannot find segment type for given tag: %s' % tag) | |
81 | + | |
82 | +class SegtypePattern(object): | |
83 | + | |
84 | + def __init__(self, lemma, pattern, segnum): | |
85 | + self.lemma = lemma | |
86 | + self.pattern = pattern | |
87 | + self.segnum = segnum | |
88 | + | |
89 | + def tryToMatch(self, lemma, tag): | |
90 | + if (self.lemma is None or self.lemma == lemma) \ | |
91 | + and re.match(self.pattern.replace('%', '.*'), tag): | |
92 | + return self.segnum | |
93 | + else: | |
94 | + return -1 | |
95 | + | |
96 | +class SegtypesException(Exception): | |
97 | + | |
98 | + def __init__(self, msg): | |
99 | + self.msg = msg | |
100 | + | |
101 | + def __str__(self): | |
102 | + return u'Error in segment rules: %s' % self.msg | |
... | ... |
fsabuilder/morfeuszbuilder/tagset/tagset.py
0 → 100644
1 | +''' | |
2 | +Created on 17 lut 2014 | |
3 | + | |
4 | +@author: mlenart | |
5 | +''' | |
6 | + | |
7 | +import codecs | |
8 | + | |
9 | +class Tagset(object): | |
10 | + | |
11 | + TAGS = 1 | |
12 | + NAMES = 2 | |
13 | + SEP = '\t' | |
14 | + | |
15 | + def __init__(self, filename, encoding='utf8'): | |
16 | + self.tag2tagnum = {} | |
17 | + self.name2namenum = {} | |
18 | + self._doInit(filename, encoding) | |
19 | + self.tagnum2tag = dict(map(lambda (k, v): (v, k), self.tag2tagnum.iteritems())) | |
20 | + | |
21 | + def _doInit(self, filename, encoding): | |
22 | + addingTo = None | |
23 | + with codecs.open(filename, 'r', encoding) as f: | |
24 | + for line in f: | |
25 | + line = line.strip('\n') | |
26 | + if line == u'[TAGS]': | |
27 | + addingTo = Tagset.TAGS | |
28 | + elif line == u'[NAMES]': | |
29 | + addingTo = Tagset.NAMES | |
30 | + elif line and not line.startswith(u'#'): | |
31 | + assert addingTo in [Tagset.TAGS, Tagset.NAMES] | |
32 | + res = {Tagset.TAGS: self.tag2tagnum, | |
33 | + Tagset.NAMES: self.name2namenum}[addingTo] | |
34 | + tagNum = line.split(Tagset.SEP)[0] | |
35 | + tag = line.split(Tagset.SEP)[1] | |
36 | + assert tag not in res | |
37 | + res[tag] = int(tagNum) | |
38 | + | |
39 | + def getTag4Tagnum(self, tagnum): | |
40 | + return self.tagnum2tag[tagnum] | |
0 | 41 | \ No newline at end of file |
... | ... |
fsabuilder/morfeuszbuilder/utils/configFile.py
0 → 100644
1 | +''' | |
2 | +Created on 18 lut 2014 | |
3 | + | |
4 | +@author: mlenart | |
5 | +''' | |
6 | + | |
7 | +import re | |
8 | +import codecs | |
9 | + | |
10 | +def getHeaderValue(line, lineNum): | |
11 | + m = re.match(ur'\s*\[(.*?)\]\s*(\#.*)?', line) | |
12 | + if m: | |
13 | + return m.group(1) | |
14 | + else: | |
15 | + return None | |
16 | + | |
17 | +class ConfigFile(object): | |
18 | + | |
19 | + def __init__(self, filename, sectionNames): | |
20 | + self.filename = filename | |
21 | + self.sectionNames = sectionNames | |
22 | + self.section2Lines = {} | |
23 | + self.currSection = None | |
24 | + self._parse() | |
25 | + | |
26 | + def _addSectionStart(self, sectionName, lineNum): | |
27 | + if not sectionName in self.sectionNames: | |
28 | + raise ConfigFileException(self.filename, lineNum, 'Invalid section: %s' % sectionName) | |
29 | + if sectionName in self.section2Lines: | |
30 | + raise ConfigFileException(self.filename, lineNum, 'Duplicate section: %s' % sectionName) | |
31 | + self.section2Lines[sectionName] = [] | |
32 | + self.currSection = sectionName | |
33 | + | |
34 | + def _addLine(self, line, lineNum): | |
35 | + line = line.strip() | |
36 | + if line: | |
37 | + if self.currSection is None and not line.startswith('#'): | |
38 | + raise ConfigFileException(self.filename, lineNum, 'Text outside of any section') | |
39 | + self.section2Lines[self.currSection].append((lineNum, line)) | |
40 | + | |
41 | + def _getHeaderValue(self, line, lineNum): | |
42 | + m = re.match(ur'\s*\[(.*?)\]\s*(\#.*)?', line) | |
43 | + if m: | |
44 | + return m.group(1) | |
45 | + else: | |
46 | + return None | |
47 | + | |
48 | + def enumerateLinesInSection(self, sectionName): | |
49 | + return self.section2Lines[sectionName] | |
50 | + | |
51 | + def _parse(self): | |
52 | + with codecs.open(self.filename, 'r', 'utf8') as f: | |
53 | + for lineNum, line in enumerate(f, start=1): | |
54 | + header = self._getHeaderValue(line, lineNum) | |
55 | + if header: | |
56 | + self._addSectionStart(header, lineNum) | |
57 | + else: | |
58 | + self._addLine(line, lineNum) | |
59 | + | |
60 | +class ConfigFileException(Exception): | |
61 | + | |
62 | + def __init__(self, filename, lineNum, msg): | |
63 | + self.filename = filename | |
64 | + self.lineNum = lineNum | |
65 | + self.msg = msg | |
66 | + | |
67 | + def __str__(self): | |
68 | + return u'%s:%d - %s' % (self.filename, self.lineNum, self.msg) | |
... | ... |