Commit 1c1bf6777d2888a58f0faf084f903a5534c74a60

Authored by Michał Lenart
1 parent 28f11d57

- różne poprawki w parsowaniu tagsetu

- praca nad parsowaniem reguł zlepiania segmentów

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@85 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
fsabuilder/.settings/org.eclipse.core.resources.prefs
1 1 eclipse.preferences.version=1
  2 +encoding//morfeuszbuilder/fsa/test/testConstruction.py=utf-8
2 3 encoding/buildfsa.py=utf-8
... ...
fsabuilder/.settings/org.eclipse.ltk.core.refactoring.prefs 0 → 100644
  1 +eclipse.preferences.version=1
  2 +org.eclipse.ltk.core.refactoring.enable.project.refactoring.history=false
... ...
fsabuilder/morfeuszbuilder/fsa/common.py
... ... @@ -77,34 +77,3 @@ class Interpretation4Generator(object):
77 77  
78 78 def __repr__(self):
79 79 return unicode(self)
80   -
81   -class Tagset(object):
82   -
83   - TAGS = 1
84   - NAMES = 2
85   - SEP = '\t'
86   -
87   - def __init__(self, filename, encoding='utf8'):
88   - self.tag2tagnum = {}
89   - self.name2namenum = {}
90   - self._doInit(filename, encoding)
91   -# print self.tag2tagnum
92   -# print self.name2namenum
93   -
94   - def _doInit(self, filename, encoding):
95   - addingTo = None
96   - with codecs.open(filename, 'r', encoding) as f:
97   - for line in f:
98   - line = line.strip('\n')
99   - if line == u'[TAGS]':
100   - addingTo = Tagset.TAGS
101   - elif line == u'[NAMES]':
102   - addingTo = Tagset.NAMES
103   - elif line and not line.startswith(u'#'):
104   - assert addingTo in [Tagset.TAGS, Tagset.NAMES]
105   - res = {Tagset.TAGS: self.tag2tagnum,
106   - Tagset.NAMES: self.name2namenum}[addingTo]
107   - tagNum = line.split(Tagset.SEP)[0]
108   - tag = line.split(Tagset.SEP)[1]
109   - assert tag not in res
110   - res[tag] = int(tagNum)
... ...
fsabuilder/morfeuszbuilder/fsa/test/testConstruction.py
... ... @@ -6,62 +6,62 @@ Created on Oct 8, 2013
6 6 '''
7 7 import unittest
8 8 import os
9   -from fsa import fsa, visualizer, encode, buildfsa
10   -from fsa.serializer import SimpleSerializer
  9 +from morfeuszbuilder.fsa import fsa, visualizer, encode
  10 +from morfeuszbuilder.fsa.serializer import SimpleSerializer
11 11  
12 12 class Test(unittest.TestCase):
13   -
14   - def testSimpleConstruction(self):
15   - a = fsa.FSA(encode.SimpleEncoder())
16   - input = sorted([
17   - (u'bić', ''),
18   - (u'bij', ''),
19   - (u'biją', ''),
20   - (u'bijcie', ''),
21   - (u'bije', ''),
22   - (u'bijecie', ''),
23   - (u'bijemy', ''),
24   - (u'bijesz', ''),
25   - (u'biję', ''),
26   - (u'bijmy', ''),
27   - (u'bili', 'asd'),
28   - (u'biliby', ''),
29   - (u'bilibyście', ''),
30   - (u'bilibyśmy', ''),
31   - (u'biliście', 'asdfas'),
32   - (u'biliśmy', ''),
33   - (u'bił', 'wersadfas'),
34   - (u'biła', 'asdfasd'),
35   - (u'biłaby', 'asdfa'),
36   - (u'biłabym', ''),
37   - (u'biłabyś', 'asdfa'),
38   - (u'biłam', 'dfas'),
39   - (u'biłaś', 'asdfas'),
40   - (u'biłby', ''),
41   - (u'biłbym', 'asdfa'),
42   - (u'biłbyś', ''),
43   - (u'biłem', ''),
44   - (u'biłeś', 'sadfa'),
45   - (u'biły', ''),
46   - (u'biłyby', ''),
47   - (u'biłybyście', ''),
48   - (u'biłybyśmy', ''),
49   - (u'biłyście', ''),
50   - (u'biłyśmy', ''),
51   - ], key=lambda w: bytearray(w[0], 'utf8'))
52   - a.feed(input)
53   - for w, res in input:
54   - recognized = a.tryToRecognize(w)
55   - assert recognized == res
56   - a.calculateOffsets(lambda state: 1 + 4 * len(state.transitionsMap.keys()) + (len(state.encodedData) if state.isAccepting() else 0))
57   - visualizer.Visualizer().visualize(a)
58   -
59   - def testPolimorfConstruction(self):
60   - inputFile = os.path.join(os.path.dirname(__file__), 'PoliMorfSmall.tab')
61   - tagsetFile = os.path.join(os.path.dirname(__file__), 'polimorf.tagset')
62   - fsa = buildfsa.buildFromPoliMorf(inputFile, tagsetFile)
63   - serializer = SimpleSerializer(fsa)
64   - serializer.serialize2BinaryFile('/tmp/test0.fsa')
  13 + pass
  14 +# def testSimpleConstruction(self):
  15 +# a = fsa.FSA(encode.SimpleEncoder())
  16 +# input = sorted([
  17 +# (u'bić', ''),
  18 +# (u'bij', ''),
  19 +# (u'biją', ''),
  20 +# (u'bijcie', ''),
  21 +# (u'bije', ''),
  22 +# (u'bijecie', ''),
  23 +# (u'bijemy', ''),
  24 +# (u'bijesz', ''),
  25 +# (u'biję', ''),
  26 +# (u'bijmy', ''),
  27 +# (u'bili', 'asd'),
  28 +# (u'biliby', ''),
  29 +# (u'bilibyście', ''),
  30 +# (u'bilibyśmy', ''),
  31 +# (u'biliście', 'asdfas'),
  32 +# (u'biliśmy', ''),
  33 +# (u'bił', 'wersadfas'),
  34 +# (u'biła', 'asdfasd'),
  35 +# (u'biłaby', 'asdfa'),
  36 +# (u'biłabym', ''),
  37 +# (u'biłabyś', 'asdfa'),
  38 +# (u'biłam', 'dfas'),
  39 +# (u'biłaś', 'asdfas'),
  40 +# (u'biłby', ''),
  41 +# (u'biłbym', 'asdfa'),
  42 +# (u'biłbyś', ''),
  43 +# (u'biłem', ''),
  44 +# (u'biłeś', 'sadfa'),
  45 +# (u'biły', ''),
  46 +# (u'biłyby', ''),
  47 +# (u'biłybyście', ''),
  48 +# (u'biłybyśmy', ''),
  49 +# (u'biłyście', ''),
  50 +# (u'biłyśmy', ''),
  51 +# ], key=lambda w: bytearray(w[0], 'utf8'))
  52 +# a.feed(input)
  53 +# for w, res in input:
  54 +# recognized = a.tryToRecognize(w)
  55 +# assert recognized == res
  56 +# a.calculateOffsets(lambda state: 1 + 4 * len(state.transitionsMap.keys()) + (len(state.encodedData) if state.isAccepting() else 0))
  57 +# visualizer.Visualizer().visualize(a)
  58 +#
  59 +# def testPolimorfConstruction(self):
  60 +# inputFile = os.path.join(os.path.dirname(__file__), 'PoliMorfSmall.tab')
  61 +# tagsetFile = os.path.join(os.path.dirname(__file__), 'polimorf.tagset')
  62 +# fsa = buildfsa.buildFromPoliMorf(inputFile, tagsetFile)
  63 +# serializer = SimpleSerializer(fsa)
  64 +# serializer.serialize2BinaryFile('/tmp/test0.fsa')
65 65 # visualizer.Visualizer().visualize(fsa)
66 66  
67 67 if __name__ == "__main__":
... ...
fsabuilder/morfeuszbuilder/segrules/preprocessor.py
... ... @@ -7,6 +7,7 @@ import re
7 7 from pyparsing import *
8 8  
9 9 identifier = Word(alphas, bodyChars=alphanums+'_')
  10 +token = Word(alphas, bodyChars=alphanums+'_+>')
10 11 define = Keyword('#define').suppress() + identifier + Optional(Suppress('(') + identifier + Suppress(')')) + restOfLine + LineEnd() + StringEnd()
11 12 ifdef = Keyword('#ifdef').suppress() + identifier + LineEnd() + StringEnd()
12 13 endif = Keyword('#endif').suppress() + LineEnd() + StringEnd()
... ... @@ -64,7 +65,7 @@ def _processLine(line, defines):
64 65 defineInstance = Forward()
65 66 localId = identifier.copy()
66 67  
67   - rule << OneOrMore(localId ^ defineInstance ^ Word('*|+?'))
  68 + rule << OneOrMore(defineInstance ^ localId ^ Word('*|+?>') ^ (Literal('(') + rule + Literal(')')))
68 69 defineInstance << localId + Suppress('(') + rule + Suppress(')')
69 70  
70 71 rule.setParseAction(lambda s, l, t: ' '.join(t))
... ... @@ -77,25 +78,25 @@ def _processLine(line, defines):
77 78 def preprocess(inputLines, defs):
78 79 defines = {}
79 80 ifdefsStack = []
80   - for lineNum, line in enumerate(inputLines, start=1):
  81 + for lineNum, line in inputLines:
81 82 if line.startswith('#define'):
82   - try:
83   - parsedDefine = list(define.parseString(line))
84   - if len(parsedDefine) == 2:
85   - name, val = parsedDefine
86   - defines[name] = NonArgDefine(name, val)
87   - else:
88   - name, arg, val = parsedDefine
89   - localDefines = defines.copy()
90   - localDefines[arg] = NonArgDefine(arg, arg)
91   - val = _processLine(val, localDefines)
92   - defines[name] = ArgDefine(name, arg, val)
93   - except:
94   - pass
  83 + parsedDefine = list(define.parseString(line))
  84 + if len(parsedDefine) == 2:
  85 + name, val = parsedDefine
  86 + defines[name] = NonArgDefine(name, val)
  87 + else:
  88 + name, arg, val = parsedDefine
  89 + localDefines = defines.copy()
  90 + localDefines[arg] = NonArgDefine(arg, arg)
  91 + val = _processLine(val, localDefines)
  92 + defines[name] = ArgDefine(name, arg, val)
95 93 elif line.startswith('#ifdef'):
96 94 name = ifdef.parseString(line)[0]
97 95 ifdefsStack.append(name)
98 96 elif line.startswith('#endif'):
99 97 ifdefsStack.pop()
  98 + elif line.startswith('#'):
  99 + yield lineNum, line
100 100 elif len(ifdefsStack) == 0 or all(map(lambda name: name in defs, ifdefsStack)):
101   - yield _processLine(line, defines)
  101 + yield lineNum, _processLine(line, defines)
  102 +
102 103 \ No newline at end of file
... ...
fsabuilder/morfeuszbuilder/segrules/segrules.py renamed to fsabuilder/morfeuszbuilder/segrules/rules.py
... ... @@ -15,38 +15,45 @@ class SegmentRule(object):
15 15 Constructor
16 16 '''
17 17  
18   -class SimpleRule(SegmentRule):
  18 +class TagRule(SegmentRule):
19 19  
20   - def __init__(self, name, typeId):
21   - self.name = name
22   - self.identifier = typeId
  20 + def __init__(self, tagType, line):
  21 + self.tagType = tagType
  22 + self.line = line
  23 +
  24 +class UnaryRule(SegmentRule):
  25 +
  26 + def __init__(self, child, line):
  27 + self.child = child
  28 + self.line = line
23 29  
24 30 class ComplexRule(SegmentRule):
25 31  
26   - def __init__(self, children):
  32 + def __init__(self, children, line):
27 33 self.children = children
  34 + self.line = line
28 35  
29 36 class ConcatRule(ComplexRule):
30 37  
31   - def __init__(self, children):
32   - super(ConcatRule, self).__init__(children)
  38 + def __init__(self, children, line):
  39 + super(ConcatRule, self).__init__(children, line)
33 40  
34 41 class OrRule(ComplexRule):
35 42  
36   - def __init__(self, children):
37   - super(OrRule, self).__init__(children)
38   -
39   -class UnaryRule(SegmentRule):
40   -
41   - def __init__(self, child):
42   - self.child = child
  43 + def __init__(self, children, line):
  44 + super(OrRule, self).__init__(children, line)
43 45  
44 46 class ZeroOrMoreRule(UnaryRule):
45 47  
46   - def __init__(self, child):
47   - super(ZeroOrMoreRule, self).__init__(child)
  48 + def __init__(self, child, line):
  49 + super(ZeroOrMoreRule, self).__init__(child, line)
  50 +
  51 +class OneOrMoreRule(UnaryRule):
  52 +
  53 + def __init__(self, child, line):
  54 + super(OneOrMoreRule, self).__init__(child, line)
48 55  
49 56 class IgnoreOrthRule(UnaryRule):
50 57  
51   - def __init__(self, child):
52   - super(IgnoreOrthRule, self).__init__(child)
  58 + def __init__(self, child, line):
  59 + super(IgnoreOrthRule, self).__init__(child, line)
... ...
fsabuilder/morfeuszbuilder/segrules/rulesParser.py 0 → 100644
  1 +
  2 +from pyparsing import *
  3 +from morfeuszbuilder.tagset import segtypes
  4 +from morfeuszbuilder.utils import configFile
  5 +from morfeuszbuilder.segrules import preprocessor
  6 +import codecs
  7 +import re
  8 +
  9 +import itertools
  10 +import logging
  11 +import segsfsa
  12 +
  13 +# header = Suppress('[') + Word(alphas, bodyChars=alphanums+'_') + Suppress(']')
  14 +# define = Keyword('#define').suppress() + identifier + Optional(Suppress('(') + identifier + Suppress(')')) + restOfLine + LineEnd() + StringEnd()
  15 +# ifdef = Keyword('#ifdef').suppress() + identifier + LineEnd() + StringEnd()
  16 +# endif = Keyword('#endif').suppress() + LineEnd() + StringEnd()
  17 +
  18 +def doprint(toks):
  19 + print toks
  20 +
  21 +class RulesParser(object):
  22 +
  23 + def __init__(self, tagset):
  24 + self.tagset = tagset
  25 +
  26 + def _getKey2Defs(self, segtypesConfigFile):
  27 + res = {}
  28 + for lineNum, line in segtypesConfigFile.enumerateLinesInSection('options'):
  29 + lineToParse = Word(alphanums+'_') + Suppress('=') + Group(OneOrMore(Word(alphanums+'_'))) + LineEnd().suppress()
  30 + try:
  31 + key, defs = lineToParse.parseString(line)
  32 + res[key] = tuple(defs)
  33 + except Exception as ex:
  34 + raise configFile.ConfigFileException(segtypesConfigFile.filename, lineNum, u'Error in [options] section: %s' % str(ex))
  35 + return res
  36 +
  37 + def parse(self, filename):
  38 + res = []
  39 +
  40 + segtypesConfigFile = configFile.ConfigFile(filename, ['options', 'combinations', 'tags', 'lexemes'])
  41 + key2Defs = self._getKey2Defs(segtypesConfigFile)
  42 + segtypesHelper = segtypes.Segtypes(self.tagset, segtypesConfigFile)
  43 +
  44 + def2Key = {}
  45 + for key, defs in key2Defs.iteritems():
  46 + for define in defs:
  47 + def2Key[define] = key
  48 +
  49 + for defs in itertools.product(*key2Defs.values()):
  50 + key2Def = dict([(def2Key[define], define) for define in defs])
  51 + fsa = segsfsa.SegmentsFSA(key2Def)
  52 + combinationEnumeratedLines = segtypesConfigFile.enumerateLinesInSection('combinations')
  53 + combinationEnumeratedLines = list(preprocessor.preprocess(combinationEnumeratedLines, defs))
  54 + for rule in self._doParse(combinationEnumeratedLines, segtypesHelper):
  55 + fsa.addSegmentRule(rule)
  56 + res.append(fsa)
  57 + return res
  58 +
  59 + def _doParse(self, combinationEnumeratedLines, segtypesHelper):
  60 + for lineNum, line in combinationEnumeratedLines:
  61 + if not line.startswith('#'):
  62 + yield self._doParseOneLine(lineNum, line, segtypesHelper)
  63 +
  64 + def _doParseOneLine(self, lineNum, line, segtypesHelper):
  65 + rule = Forward()
  66 + tagRule = Word(alphanums+'_')
  67 + ignoreOrthRule = tagRule + Suppress('>')
  68 + parenRule = Suppress('(') + rule + Suppress(')')
  69 + atomicRule = tagRule ^ ignoreOrthRule ^ parenRule
  70 + zeroOrMoreRule = atomicRule + Suppress('*')
  71 + oneOrMoreRule = atomicRule + Suppress('+')
  72 + unaryRule = atomicRule ^ zeroOrMoreRule ^ oneOrMoreRule
  73 + oneOfRule = delimitedList(unaryRule, delim='|')
  74 + complexRule = unaryRule ^ oneOfRule
  75 + concatRule = OneOrMore(complexRule)
  76 + rule << concatRule
  77 +# rule << tagRule ^ ignoreOrthRule ^ zeroOrMoreRule ^ oneOrMoreRule ^ orRule ^ concatRule ^ parenRule
  78 +
  79 +# tagRule.setParseAction(lambda s,l,toks: doprint(toks))
  80 +# print lineNum, line
  81 + parsedLine = rule.parseString(line, parseAll=True)
  82 +# print parsedLine
... ...
fsabuilder/morfeuszbuilder/segrules/segsfsa.py
... ... @@ -14,7 +14,7 @@ class SegmentsFSAState(object):
14 14  
15 15 class SegmentsFSA(object):
16 16  
17   - def __init__(self):
  17 + def __init__(self, key2Def={}):
18 18 self.initialState = SegmentsFSAState()
19 19  
20 20 def addSegmentRule(self, segmentRule):
... ... @@ -23,3 +23,5 @@ class SegmentsFSA(object):
23 23 def serialize(self):
24 24 res = bytearray()
25 25 return res
  26 +
  27 +
26 28 \ No newline at end of file
... ...
fsabuilder/morfeuszbuilder/segrules/test.py
... ... @@ -4,7 +4,7 @@ Created on 24 sty 2014
4 4 @author: mlenart
5 5 '''
6 6  
7   -import preprocessor
  7 +from morfeuszbuilder.segrules import preprocessor
8 8  
9 9 if __name__ == '__main__':
10 10 text = '''
... ... @@ -13,8 +13,8 @@ dupa
13 13 #define X(x) a x b
14 14 #define Y(x) X(x) c
15 15 #define B(x) X(x)
16   -#define Z(x) Y(X(x)) d
17   -#define AB(asd) dupa asd dupa
  16 +#define Z(x) Y( X(x) jhg) d
  17 +#define A_B(asd) dupa asd dupa asfda_asdfa
18 18 Y(Z(a) b X(c) Y(d))
19 19 #ifdef extra
20 20 asdfasa
... ... @@ -30,7 +30,7 @@ aaaa asd
30 30 asdfasdfada
31 31 #endif
32 32  
33   -AB(x)
  33 +A_B( (x)+ x)
34 34 '''
35   - for line in preprocessor.preprocess(text.split('\n'), ['extra', 'superextra']):
  35 + for line in preprocessor.preprocess(enumerate(text.split('\n')), ['extra', 'superextra']):
36 36 print line
37 37 \ No newline at end of file
... ...
fsabuilder/morfeuszbuilder/segrules/test/__init__.py 0 → 100644
fsabuilder/morfeuszbuilder/segrules/test/parserTest.py 0 → 100644
  1 +'''
  2 +Created on 18 lut 2014
  3 +
  4 +@author: mlenart
  5 +'''
  6 +import unittest
  7 +import os
  8 +from morfeuszbuilder.segrules import rulesParser
  9 +from morfeuszbuilder.tagset import tagset
  10 +
  11 +class Test(unittest.TestCase):
  12 + t = tagset.Tagset(os.path.join(os.path.dirname(__file__), 'polimorf.tagset'))
  13 + parser = rulesParser.RulesParser(t)
  14 + parser.parse(os.path.join(os.path.dirname(__file__), 'segmenty.dat'))
  15 +
  16 +if __name__ == "__main__":
  17 + unittest.main()
  18 +# testParser()
0 19 \ No newline at end of file
... ...
fsabuilder/morfeuszbuilder/segrules/test/polimorf.tagset 0 → 100644
  1 +#!MORFEUSZ-TAGSET 0.1
  2 +
  3 +[TAGS]
  4 +
  5 +0 adj:pl:acc:m1.p1:com
  6 +1 adj:pl:acc:m1.p1:pos
  7 +2 adj:pl:acc:m1.p1:sup
  8 +3 adj:pl:acc:m2.m3.f.n1.n2.p2.p3:com
  9 +4 adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos
  10 +5 adj:pl:acc:m2.m3.f.n1.n2.p2.p3:sup
  11 +6 adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:com
  12 +7 adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos
  13 +8 adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:sup
  14 +9 adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:com
  15 +10 adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos
  16 +11 adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:sup
  17 +12 adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:com
  18 +13 adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos
  19 +14 adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:sup
  20 +15 adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:com
  21 +16 adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos
  22 +17 adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:sup
  23 +18 adj:pl:nom.voc:m1.p1:com
  24 +19 adj:pl:nom.voc:m1.p1:pos
  25 +20 adj:pl:nom.voc:m1.p1:sup
  26 +21 adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:com
  27 +22 adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos
  28 +23 adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:sup
  29 +24 adj:pl:nom:m1.p1:pos
  30 +25 adj:pl:nom:m2.m3.f.n1.n2.p2.p3:pos
  31 +26 adj:sg:acc:f:com
  32 +27 adj:sg:acc:f:pos
  33 +28 adj:sg:acc:f:sup
  34 +29 adj:sg:acc:m1.m2:com
  35 +30 adj:sg:acc:m1.m2:pos
  36 +31 adj:sg:acc:m1.m2:sup
  37 +32 adj:sg:acc:m3:com
  38 +33 adj:sg:acc:m3:pos
  39 +34 adj:sg:acc:m3:sup
  40 +35 adj:sg:acc:n1.n2:com
  41 +36 adj:sg:acc:n1.n2:pos
  42 +37 adj:sg:acc:n1.n2:sup
  43 +38 adj:sg:dat:f:com
  44 +39 adj:sg:dat:f:pos
  45 +40 adj:sg:dat:f:sup
  46 +41 adj:sg:dat:m1.m2.m3.n1.n2:com
  47 +42 adj:sg:dat:m1.m2.m3.n1.n2:pos
  48 +43 adj:sg:dat:m1.m2.m3.n1.n2:sup
  49 +44 adj:sg:gen:f:com
  50 +45 adj:sg:gen:f:pos
  51 +46 adj:sg:gen:f:sup
  52 +47 adj:sg:gen:m1.m2.m3.n1.n2:com
  53 +48 adj:sg:gen:m1.m2.m3.n1.n2:pos
  54 +49 adj:sg:gen:m1.m2.m3.n1.n2:sup
  55 +50 adj:sg:inst:f:com
  56 +51 adj:sg:inst:f:pos
  57 +52 adj:sg:inst:f:sup
  58 +53 adj:sg:inst:m1.m2.m3.n1.n2:com
  59 +54 adj:sg:inst:m1.m2.m3.n1.n2:pos
  60 +55 adj:sg:inst:m1.m2.m3.n1.n2:sup
  61 +56 adj:sg:loc:f:com
  62 +57 adj:sg:loc:f:pos
  63 +58 adj:sg:loc:f:sup
  64 +59 adj:sg:loc:m1.m2.m3.n1.n2:com
  65 +60 adj:sg:loc:m1.m2.m3.n1.n2:pos
  66 +61 adj:sg:loc:m1.m2.m3.n1.n2:sup
  67 +62 adj:sg:nom.voc:f:com
  68 +63 adj:sg:nom.voc:f:pos
  69 +64 adj:sg:nom.voc:f:sup
  70 +65 adj:sg:nom.voc:m1.m2.m3:com
  71 +66 adj:sg:nom.voc:m1.m2.m3:pos
  72 +67 adj:sg:nom.voc:m1.m2.m3:sup
  73 +68 adj:sg:nom.voc:n1.n2:com
  74 +69 adj:sg:nom.voc:n1.n2:pos
  75 +70 adj:sg:nom.voc:n1.n2:sup
  76 +71 adj:sg:nom:f:pos
  77 +72 adj:sg:nom:m1.m2.m3:pos
  78 +73 adj:sg:nom:n1.n2:pos
  79 +74 adja
  80 +75 adjc
  81 +76 adjp
  82 +77 adv
  83 +78 adv:com
  84 +79 adv:pos
  85 +80 adv:sup
  86 +81 aglt:pl:pri:imperf:nwok
  87 +82 aglt:pl:pri:imperf:wok
  88 +83 aglt:pl:sec:imperf:nwok
  89 +84 aglt:pl:sec:imperf:wok
  90 +85 aglt:sg:pri:imperf:nwok
  91 +86 aglt:sg:pri:imperf:wok
  92 +87 aglt:sg:sec:imperf:nwok
  93 +88 aglt:sg:sec:imperf:wok
  94 +89 bedzie:pl:pri:imperf
  95 +90 bedzie:pl:sec:imperf
  96 +91 bedzie:pl:ter:imperf
  97 +92 bedzie:sg:pri:imperf
  98 +93 bedzie:sg:sec:imperf
  99 +94 bedzie:sg:ter:imperf
  100 +95 burk
  101 +96 comp
  102 +97 conj
  103 +98 depr:pl:nom:m2
  104 +99 depr:pl:voc:m2
  105 +100 fin:pl:pri:imperf
  106 +101 fin:pl:pri:imperf.perf
  107 +102 fin:pl:pri:perf
  108 +103 fin:pl:sec:imperf
  109 +104 fin:pl:sec:imperf.perf
  110 +105 fin:pl:sec:perf
  111 +106 fin:pl:ter:imperf
  112 +107 fin:pl:ter:imperf.perf
  113 +108 fin:pl:ter:perf
  114 +109 fin:sg:pri:imperf
  115 +110 fin:sg:pri:imperf.perf
  116 +111 fin:sg:pri:perf
  117 +112 fin:sg:sec:imperf
  118 +113 fin:sg:sec:imperf.perf
  119 +114 fin:sg:sec:perf
  120 +115 fin:sg:ter:imperf
  121 +116 fin:sg:ter:imperf.perf
  122 +117 fin:sg:ter:perf
  123 +118 ger:sg:dat.loc:n2:imperf.perf:aff
  124 +119 ger:sg:dat.loc:n2:imperf.perf:neg
  125 +120 ger:sg:dat.loc:n2:imperf:aff
  126 +121 ger:sg:dat.loc:n2:imperf:neg
  127 +122 ger:sg:dat.loc:n2:perf:aff
  128 +123 ger:sg:dat.loc:n2:perf:neg
  129 +124 ger:sg:gen:n2:imperf.perf:aff
  130 +125 ger:sg:gen:n2:imperf.perf:neg
  131 +126 ger:sg:gen:n2:imperf:aff
  132 +127 ger:sg:gen:n2:imperf:neg
  133 +128 ger:sg:gen:n2:perf:aff
  134 +129 ger:sg:gen:n2:perf:neg
  135 +130 ger:sg:inst:n2:imperf.perf:aff
  136 +131 ger:sg:inst:n2:imperf.perf:neg
  137 +132 ger:sg:inst:n2:imperf:aff
  138 +133 ger:sg:inst:n2:imperf:neg
  139 +134 ger:sg:inst:n2:perf:aff
  140 +135 ger:sg:inst:n2:perf:neg
  141 +136 ger:sg:nom.acc:n2:imperf.perf:aff
  142 +137 ger:sg:nom.acc:n2:imperf.perf:neg
  143 +138 ger:sg:nom.acc:n2:imperf:aff
  144 +139 ger:sg:nom.acc:n2:imperf:neg
  145 +140 ger:sg:nom.acc:n2:perf:aff
  146 +141 ger:sg:nom.acc:n2:perf:neg
  147 +142 imps:imperf
  148 +143 imps:imperf.perf
  149 +144 imps:perf
  150 +145 impt:pl:pri:imperf
  151 +146 impt:pl:pri:imperf.perf
  152 +147 impt:pl:pri:perf
  153 +148 impt:pl:sec:imperf
  154 +149 impt:pl:sec:imperf.perf
  155 +150 impt:pl:sec:perf
  156 +151 impt:sg:sec:imperf
  157 +152 impt:sg:sec:imperf.perf
  158 +153 impt:sg:sec:perf
  159 +154 inf:imperf
  160 +155 inf:imperf.perf
  161 +156 inf:perf
  162 +157 interj
  163 +158 num:comp
  164 +159 num:pl:acc:m1:rec
  165 +160 num:pl:dat.loc:n1.p1.p2:congr.rec
  166 +161 num:pl:dat:m1.m2.m3.n2.f:congr
  167 +162 num:pl:gen.dat.inst.loc:m1.m2.m3.f.n1.n2.p1.p2:congr
  168 +163 num:pl:gen.dat.inst.loc:m1.m2.m3.f.n2:congr
  169 +164 num:pl:gen.dat.loc:m1.m2.m3.n2.f:congr
  170 +165 num:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2:congr
  171 +166 num:pl:gen.loc:m1.m2.m3.n2.f:congr
  172 +167 num:pl:gen:n1.p1.p2:rec
  173 +168 num:pl:inst:f:congr
  174 +169 num:pl:inst:m1.m2.m3.f.n1.n2.p1.p2:congr
  175 +170 num:pl:inst:m1.m2.m3.f.n2:congr
  176 +171 num:pl:inst:m1.m2.m3.n2.f:congr
  177 +172 num:pl:inst:m1.m2.m3.n2:congr
  178 +173 num:pl:inst:n1.p1.p2:rec
  179 +174 num:pl:nom.acc.voc:f:congr
  180 +175 num:pl:nom.acc.voc:m1:rec
  181 +176 num:pl:nom.acc.voc:m2.m3.f.n1.n2.p1.p2:rec
  182 +177 num:pl:nom.acc.voc:m2.m3.f.n2:rec
  183 +178 num:pl:nom.acc.voc:m2.m3.n2.f:congr
  184 +179 num:pl:nom.acc.voc:m2.m3.n2:congr
  185 +180 num:pl:nom.acc.voc:n1.p1.p2:rec
  186 +181 num:pl:nom.acc:m1.m2.m3.f.n1.n2.p1.p2:rec
  187 +182 num:pl:nom.gen.dat.inst.acc.loc.voc:m1.m2.m3.f.n1.n2.p1.p2:rec
  188 +183 num:pl:nom.voc:m1:congr
  189 +184 num:pl:nom.voc:m1:rec
  190 +185 num:sg:nom.gen.dat.inst.acc.loc.voc:f:rec
  191 +186 num:sg:nom.gen.dat.inst.acc.loc.voc:m1.m2.m3.n1.n2:rec
  192 +187 pact:pl:acc:m1.p1:imperf.perf:aff
  193 +188 pact:pl:acc:m1.p1:imperf.perf:neg
  194 +189 pact:pl:acc:m1.p1:imperf:aff
  195 +190 pact:pl:acc:m1.p1:imperf:neg
  196 +191 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff
  197 +192 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg
  198 +193 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff
  199 +194 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg
  200 +195 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff
  201 +196 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg
  202 +197 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff
  203 +198 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg
  204 +199 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff
  205 +200 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg
  206 +201 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff
  207 +202 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg
  208 +203 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:aff
  209 +204 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:neg
  210 +205 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff
  211 +206 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:neg
  212 +207 pact:pl:nom.voc:m1.p1:imperf.perf:aff
  213 +208 pact:pl:nom.voc:m1.p1:imperf.perf:neg
  214 +209 pact:pl:nom.voc:m1.p1:imperf:aff
  215 +210 pact:pl:nom.voc:m1.p1:imperf:neg
  216 +211 pact:sg:acc.inst:f:imperf.perf:aff
  217 +212 pact:sg:acc.inst:f:imperf.perf:neg
  218 +213 pact:sg:acc.inst:f:imperf:aff
  219 +214 pact:sg:acc.inst:f:imperf:neg
  220 +215 pact:sg:acc:m1.m2:imperf.perf:aff
  221 +216 pact:sg:acc:m1.m2:imperf.perf:neg
  222 +217 pact:sg:acc:m1.m2:imperf:aff
  223 +218 pact:sg:acc:m1.m2:imperf:neg
  224 +219 pact:sg:acc:m3:imperf.perf:aff
  225 +220 pact:sg:acc:m3:imperf.perf:neg
  226 +221 pact:sg:acc:m3:imperf:aff
  227 +222 pact:sg:acc:m3:imperf:neg
  228 +223 pact:sg:dat:m1.m2.m3.n1.n2:imperf.perf:aff
  229 +224 pact:sg:dat:m1.m2.m3.n1.n2:imperf.perf:neg
  230 +225 pact:sg:dat:m1.m2.m3.n1.n2:imperf:aff
  231 +226 pact:sg:dat:m1.m2.m3.n1.n2:imperf:neg
  232 +227 pact:sg:gen.dat.loc:f:imperf.perf:aff
  233 +228 pact:sg:gen.dat.loc:f:imperf.perf:neg
  234 +229 pact:sg:gen.dat.loc:f:imperf:aff
  235 +230 pact:sg:gen.dat.loc:f:imperf:neg
  236 +231 pact:sg:gen:m1.m2.m3.n1.n2:imperf.perf:aff
  237 +232 pact:sg:gen:m1.m2.m3.n1.n2:imperf.perf:neg
  238 +233 pact:sg:gen:m1.m2.m3.n1.n2:imperf:aff
  239 +234 pact:sg:gen:m1.m2.m3.n1.n2:imperf:neg
  240 +235 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:aff
  241 +236 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:neg
  242 +237 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff
  243 +238 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf:neg
  244 +239 pact:sg:nom.acc.voc:n1.n2:imperf.perf:aff
  245 +240 pact:sg:nom.acc.voc:n1.n2:imperf.perf:neg
  246 +241 pact:sg:nom.acc.voc:n1.n2:imperf:aff
  247 +242 pact:sg:nom.acc.voc:n1.n2:imperf:neg
  248 +243 pact:sg:nom.voc:f:imperf.perf:aff
  249 +244 pact:sg:nom.voc:f:imperf.perf:neg
  250 +245 pact:sg:nom.voc:f:imperf:aff
  251 +246 pact:sg:nom.voc:f:imperf:neg
  252 +247 pact:sg:nom.voc:m1.m2.m3:imperf.perf:aff
  253 +248 pact:sg:nom.voc:m1.m2.m3:imperf.perf:neg
  254 +249 pact:sg:nom.voc:m1.m2.m3:imperf:aff
  255 +250 pact:sg:nom.voc:m1.m2.m3:imperf:neg
  256 +251 pant:perf
  257 +252 pcon:imperf
  258 +253 ppas:pl:acc:m1.p1:imperf.perf:aff
  259 +254 ppas:pl:acc:m1.p1:imperf.perf:neg
  260 +255 ppas:pl:acc:m1.p1:imperf:aff
  261 +256 ppas:pl:acc:m1.p1:imperf:neg
  262 +257 ppas:pl:acc:m1.p1:perf:aff
  263 +258 ppas:pl:acc:m1.p1:perf:neg
  264 +259 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff
  265 +260 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg
  266 +261 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff
  267 +262 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg
  268 +263 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff
  269 +264 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg
  270 +265 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff
  271 +266 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg
  272 +267 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff
  273 +268 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg
  274 +269 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff
  275 +270 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg
  276 +271 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff
  277 +272 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg
  278 +273 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff
  279 +274 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg
  280 +275 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff
  281 +276 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg
  282 +277 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:aff
  283 +278 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:neg
  284 +279 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff
  285 +280 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:neg
  286 +281 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:aff
  287 +282 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:neg
  288 +283 ppas:pl:nom.voc:m1.p1:imperf.perf:aff
  289 +284 ppas:pl:nom.voc:m1.p1:imperf.perf:neg
  290 +285 ppas:pl:nom.voc:m1.p1:imperf:aff
  291 +286 ppas:pl:nom.voc:m1.p1:imperf:neg
  292 +287 ppas:pl:nom.voc:m1.p1:perf:aff
  293 +288 ppas:pl:nom.voc:m1.p1:perf:neg
  294 +289 ppas:sg:acc.inst:f:imperf.perf:aff
  295 +290 ppas:sg:acc.inst:f:imperf.perf:neg
  296 +291 ppas:sg:acc.inst:f:imperf:aff
  297 +292 ppas:sg:acc.inst:f:imperf:neg
  298 +293 ppas:sg:acc.inst:f:perf:aff
  299 +294 ppas:sg:acc.inst:f:perf:neg
  300 +295 ppas:sg:acc:m1.m2:imperf.perf:aff
  301 +296 ppas:sg:acc:m1.m2:imperf.perf:neg
  302 +297 ppas:sg:acc:m1.m2:imperf:aff
  303 +298 ppas:sg:acc:m1.m2:imperf:neg
  304 +299 ppas:sg:acc:m1.m2:perf:aff
  305 +300 ppas:sg:acc:m1.m2:perf:neg
  306 +301 ppas:sg:acc:m3:imperf.perf:aff
  307 +302 ppas:sg:acc:m3:imperf.perf:neg
  308 +303 ppas:sg:acc:m3:imperf:aff
  309 +304 ppas:sg:acc:m3:imperf:neg
  310 +305 ppas:sg:acc:m3:perf:aff
  311 +306 ppas:sg:acc:m3:perf:neg
  312 +307 ppas:sg:dat:m1.m2.m3.n1.n2:imperf.perf:aff
  313 +308 ppas:sg:dat:m1.m2.m3.n1.n2:imperf.perf:neg
  314 +309 ppas:sg:dat:m1.m2.m3.n1.n2:imperf:aff
  315 +310 ppas:sg:dat:m1.m2.m3.n1.n2:imperf:neg
  316 +311 ppas:sg:dat:m1.m2.m3.n1.n2:perf:aff
  317 +312 ppas:sg:dat:m1.m2.m3.n1.n2:perf:neg
  318 +313 ppas:sg:gen.dat.loc:f:imperf.perf:aff
  319 +314 ppas:sg:gen.dat.loc:f:imperf.perf:neg
  320 +315 ppas:sg:gen.dat.loc:f:imperf:aff
  321 +316 ppas:sg:gen.dat.loc:f:imperf:neg
  322 +317 ppas:sg:gen.dat.loc:f:perf:aff
  323 +318 ppas:sg:gen.dat.loc:f:perf:neg
  324 +319 ppas:sg:gen:m1.m2.m3.n1.n2:imperf.perf:aff
  325 +320 ppas:sg:gen:m1.m2.m3.n1.n2:imperf.perf:neg
  326 +321 ppas:sg:gen:m1.m2.m3.n1.n2:imperf:aff
  327 +322 ppas:sg:gen:m1.m2.m3.n1.n2:imperf:neg
  328 +323 ppas:sg:gen:m1.m2.m3.n1.n2:perf:aff
  329 +324 ppas:sg:gen:m1.m2.m3.n1.n2:perf:neg
  330 +325 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:aff
  331 +326 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:neg
  332 +327 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff
  333 +328 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf:neg
  334 +329 ppas:sg:inst.loc:m1.m2.m3.n1.n2:perf:aff
  335 +330 ppas:sg:inst.loc:m1.m2.m3.n1.n2:perf:neg
  336 +331 ppas:sg:nom.acc.voc:n1.n2:imperf.perf:aff
  337 +332 ppas:sg:nom.acc.voc:n1.n2:imperf.perf:neg
  338 +333 ppas:sg:nom.acc.voc:n1.n2:imperf:aff
  339 +334 ppas:sg:nom.acc.voc:n1.n2:imperf:neg
  340 +335 ppas:sg:nom.acc.voc:n1.n2:perf:aff
  341 +336 ppas:sg:nom.acc.voc:n1.n2:perf:neg
  342 +337 ppas:sg:nom.voc:f:imperf.perf:aff
  343 +338 ppas:sg:nom.voc:f:imperf.perf:neg
  344 +339 ppas:sg:nom.voc:f:imperf:aff
  345 +340 ppas:sg:nom.voc:f:imperf:neg
  346 +341 ppas:sg:nom.voc:f:perf:aff
  347 +342 ppas:sg:nom.voc:f:perf:neg
  348 +343 ppas:sg:nom.voc:m1.m2.m3:imperf.perf:aff
  349 +344 ppas:sg:nom.voc:m1.m2.m3:imperf.perf:neg
  350 +345 ppas:sg:nom.voc:m1.m2.m3:imperf:aff
  351 +346 ppas:sg:nom.voc:m1.m2.m3:imperf:neg
  352 +347 ppas:sg:nom.voc:m1.m2.m3:perf:aff
  353 +348 ppas:sg:nom.voc:m1.m2.m3:perf:neg
  354 +349 ppron12:pl:acc:_:pri
  355 +350 ppron12:pl:acc:_:sec
  356 +351 ppron12:pl:dat:_:pri
  357 +352 ppron12:pl:dat:_:sec
  358 +353 ppron12:pl:gen:_:pri
  359 +354 ppron12:pl:gen:_:sec
  360 +355 ppron12:pl:inst:_:pri
  361 +356 ppron12:pl:inst:_:sec
  362 +357 ppron12:pl:loc:_:pri
  363 +358 ppron12:pl:loc:_:sec
  364 +359 ppron12:pl:nom:_:pri
  365 +360 ppron12:pl:nom:_:sec
  366 +361 ppron12:pl:voc:_:pri
  367 +362 ppron12:pl:voc:_:sec
  368 +363 ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:akc
  369 +364 ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:nakc
  370 +365 ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:akc
  371 +366 ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:nakc
  372 +367 ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:akc
  373 +368 ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:nakc
  374 +369 ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:akc
  375 +370 ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:nakc
  376 +371 ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:akc
  377 +372 ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:nakc
  378 +373 ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:akc
  379 +374 ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:nakc
  380 +375 ppron12:sg:inst:m1.m2.m3.f.n1.n2:pri
  381 +376 ppron12:sg:inst:m1.m2.m3.f.n1.n2:sec
  382 +377 ppron12:sg:loc:m1.m2.m3.f.n1.n2:pri
  383 +378 ppron12:sg:loc:m1.m2.m3.f.n1.n2:sec
  384 +379 ppron12:sg:nom:m1.m2.m3.f.n1.n2:pri
  385 +380 ppron12:sg:nom:m1.m2.m3.f.n1.n2:sec
  386 +381 ppron12:sg:voc:m1.m2.m3.f.n1.n2:pri
  387 +382 ppron12:sg:voc:m1.m2.m3.f.n1.n2:sec
  388 +383 ppron3:pl:acc:m1.p1:ter:_:npraep
  389 +384 ppron3:pl:acc:m1.p1:ter:_:praep
  390 +385 ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:_:npraep
  391 +386 ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:_:praep
  392 +387 ppron3:pl:dat:_:ter:_:npraep
  393 +388 ppron3:pl:dat:_:ter:_:praep
  394 +389 ppron3:pl:gen:_:ter:_:npraep
  395 +390 ppron3:pl:gen:_:ter:_:praep
  396 +391 ppron3:pl:inst:_:ter:_:_
  397 +392 ppron3:pl:loc:_:ter:_:_
  398 +393 ppron3:pl:nom:m1.p1:ter:_:_
  399 +394 ppron3:pl:nom:m2.m3.f.n1.n2.p2.p3:ter:_:_
  400 +395 ppron3:sg:acc:f:ter:_:npraep
  401 +396 ppron3:sg:acc:f:ter:_:praep
  402 +397 ppron3:sg:acc:m1.m2.m3:ter:akc:npraep
  403 +398 ppron3:sg:acc:m1.m2.m3:ter:akc:praep
  404 +399 ppron3:sg:acc:m1.m2.m3:ter:nakc:npraep
  405 +400 ppron3:sg:acc:m1.m2.m3:ter:nakc:praep
  406 +401 ppron3:sg:acc:n1.n2:ter:_:npraep
  407 +402 ppron3:sg:acc:n1.n2:ter:_:praep
  408 +403 ppron3:sg:dat:f:ter:_:npraep
  409 +404 ppron3:sg:dat:f:ter:_:praep
  410 +405 ppron3:sg:dat:m1.m2.m3:ter:_:praep
  411 +406 ppron3:sg:dat:m1.m2.m3:ter:akc:npraep
  412 +407 ppron3:sg:dat:m1.m2.m3:ter:nakc:npraep
  413 +408 ppron3:sg:dat:n1.n2:ter:_:praep
  414 +409 ppron3:sg:dat:n1.n2:ter:akc:npraep
  415 +410 ppron3:sg:dat:n1.n2:ter:nakc:npraep
  416 +411 ppron3:sg:gen:f:ter:_:npraep
  417 +412 ppron3:sg:gen:f:ter:_:praep
  418 +413 ppron3:sg:gen:m1.m2.m3:ter:akc:npraep
  419 +414 ppron3:sg:gen:m1.m2.m3:ter:akc:praep
  420 +415 ppron3:sg:gen:m1.m2.m3:ter:nakc:npraep
  421 +416 ppron3:sg:gen:m1.m2.m3:ter:nakc:praep
  422 +417 ppron3:sg:gen:n1.n2:ter:_:praep
  423 +418 ppron3:sg:gen:n1.n2:ter:akc:npraep
  424 +419 ppron3:sg:gen:n1.n2:ter:nakc:npraep
  425 +420 ppron3:sg:inst:f:ter:_:praep
  426 +421 ppron3:sg:inst:m1.m2.m3:ter:_:_
  427 +422 ppron3:sg:inst:n1.n2:ter:_:_
  428 +423 ppron3:sg:loc:f:ter:_:_
  429 +424 ppron3:sg:loc:m1.m2.m3:ter:_:_
  430 +425 ppron3:sg:loc:n1.n2:ter:_:_
  431 +426 ppron3:sg:nom:f:ter:_:_
  432 +427 ppron3:sg:nom:m1.m2.m3:ter:_:_
  433 +428 ppron3:sg:nom:n1.n2:ter:_:_
  434 +429 praet:pl:m1.p1:imperf
  435 +430 praet:pl:m1.p1:imperf.perf
  436 +431 praet:pl:m1.p1:perf
  437 +432 praet:pl:m2.m3.f.n1.n2.p2.p3:imperf
  438 +433 praet:pl:m2.m3.f.n1.n2.p2.p3:imperf.perf
  439 +434 praet:pl:m2.m3.f.n1.n2.p2.p3:perf
  440 +435 praet:sg:f:imperf
  441 +436 praet:sg:f:imperf.perf
  442 +437 praet:sg:f:perf
  443 +438 praet:sg:m1.m2.m3:imperf
  444 +439 praet:sg:m1.m2.m3:imperf.perf
  445 +440 praet:sg:m1.m2.m3:imperf:agl
  446 +441 praet:sg:m1.m2.m3:imperf:nagl
  447 +442 praet:sg:m1.m2.m3:perf
  448 +443 praet:sg:m1.m2.m3:perf:agl
  449 +444 praet:sg:m1.m2.m3:perf:nagl
  450 +445 praet:sg:n1.n2:imperf
  451 +446 praet:sg:n1.n2:imperf.perf
  452 +447 praet:sg:n1.n2:perf
  453 +448 pred
  454 +449 prep:acc
  455 +450 prep:acc:nwok
  456 +451 prep:acc:wok
  457 +452 prep:dat
  458 +453 prep:gen
  459 +454 prep:gen:nwok
  460 +455 prep:gen:wok
  461 +456 prep:inst
  462 +457 prep:inst:nwok
  463 +458 prep:inst:wok
  464 +459 prep:loc
  465 +460 prep:loc:nwok
  466 +461 prep:loc:wok
  467 +462 prep:nom
  468 +463 qub
  469 +464 subst:pl:acc:f
  470 +465 subst:pl:acc:m1
  471 +466 subst:pl:acc:m2
  472 +467 subst:pl:acc:m3
  473 +468 subst:pl:acc:n1
  474 +469 subst:pl:acc:n2
  475 +470 subst:pl:acc:p1
  476 +471 subst:pl:acc:p2
  477 +472 subst:pl:acc:p3
  478 +473 subst:pl:dat:f
  479 +474 subst:pl:dat:m1
  480 +475 subst:pl:dat:m2
  481 +476 subst:pl:dat:m3
  482 +477 subst:pl:dat:n1
  483 +478 subst:pl:dat:n2
  484 +479 subst:pl:dat:p1
  485 +480 subst:pl:dat:p2
  486 +481 subst:pl:dat:p3
  487 +482 subst:pl:gen:f
  488 +483 subst:pl:gen:m1
  489 +484 subst:pl:gen:m2
  490 +485 subst:pl:gen:m3
  491 +486 subst:pl:gen:n1
  492 +487 subst:pl:gen:n2
  493 +488 subst:pl:gen:p1
  494 +489 subst:pl:gen:p2
  495 +490 subst:pl:gen:p3
  496 +491 subst:pl:inst:f
  497 +492 subst:pl:inst:m1
  498 +493 subst:pl:inst:m2
  499 +494 subst:pl:inst:m3
  500 +495 subst:pl:inst:n1
  501 +496 subst:pl:inst:n2
  502 +497 subst:pl:inst:p1
  503 +498 subst:pl:inst:p2
  504 +499 subst:pl:inst:p3
  505 +500 subst:pl:loc:f
  506 +501 subst:pl:loc:m1
  507 +502 subst:pl:loc:m2
  508 +503 subst:pl:loc:m3
  509 +504 subst:pl:loc:n1
  510 +505 subst:pl:loc:n2
  511 +506 subst:pl:loc:p1
  512 +507 subst:pl:loc:p2
  513 +508 subst:pl:loc:p3
  514 +509 subst:pl:nom:f
  515 +510 subst:pl:nom:m1
  516 +511 subst:pl:nom:m2
  517 +512 subst:pl:nom:m3
  518 +513 subst:pl:nom:n1
  519 +514 subst:pl:nom:n2
  520 +515 subst:pl:nom:p1
  521 +516 subst:pl:nom:p2
  522 +517 subst:pl:nom:p3
  523 +518 subst:pl:voc:f
  524 +519 subst:pl:voc:m1
  525 +520 subst:pl:voc:m2
  526 +521 subst:pl:voc:m3
  527 +522 subst:pl:voc:n1
  528 +523 subst:pl:voc:n2
  529 +524 subst:pl:voc:p1
  530 +525 subst:pl:voc:p2
  531 +526 subst:pl:voc:p3
  532 +527 subst:sg:acc:f
  533 +528 subst:sg:acc:m1
  534 +529 subst:sg:acc:m2
  535 +530 subst:sg:acc:m3
  536 +531 subst:sg:acc:n1
  537 +532 subst:sg:acc:n2
  538 +533 subst:sg:dat:f
  539 +534 subst:sg:dat:m1
  540 +535 subst:sg:dat:m2
  541 +536 subst:sg:dat:m3
  542 +537 subst:sg:dat:n1
  543 +538 subst:sg:dat:n2
  544 +539 subst:sg:gen:f
  545 +540 subst:sg:gen:m1
  546 +541 subst:sg:gen:m2
  547 +542 subst:sg:gen:m3
  548 +543 subst:sg:gen:n1
  549 +544 subst:sg:gen:n2
  550 +545 subst:sg:inst:f
  551 +546 subst:sg:inst:m1
  552 +547 subst:sg:inst:m2
  553 +548 subst:sg:inst:m3
  554 +549 subst:sg:inst:n1
  555 +550 subst:sg:inst:n2
  556 +551 subst:sg:loc:f
  557 +552 subst:sg:loc:m1
  558 +553 subst:sg:loc:m2
  559 +554 subst:sg:loc:m3
  560 +555 subst:sg:loc:n1
  561 +556 subst:sg:loc:n2
  562 +557 subst:sg:nom:f
  563 +558 subst:sg:nom:m1
  564 +559 subst:sg:nom:m2
  565 +560 subst:sg:nom:m3
  566 +561 subst:sg:nom:n1
  567 +562 subst:sg:nom:n2
  568 +563 subst:sg:voc:f
  569 +564 subst:sg:voc:m1
  570 +565 subst:sg:voc:m2
  571 +566 subst:sg:voc:m3
  572 +567 subst:sg:voc:n1
  573 +568 subst:sg:voc:n2
  574 +569 winien:pl:m1.p1:imperf
  575 +570 winien:pl:m2.m3.f.n1.n2.p2.p3:imperf
  576 +571 winien:sg:f:imperf
  577 +572 winien:sg:m1.m2.m3:imperf
  578 +573 winien:sg:n1.n2:imperf
  579 +
  580 +[NAMES]
  581 +
  582 +0
  583 +1 etnonim
  584 +2 geograficzna
  585 +3 imię
  586 +4 nazwisko
  587 +5 określenie dodatkowe
  588 +6 organizacja
  589 +7 osoba
  590 +8 pospolita
  591 +9 własna
  592 +10 wydarzenie
  593 +11 wytwór
  594 +
... ...
fsabuilder/morfeuszbuilder/segrules/test/preprocessorTest.py 0 → 100644
  1 +'''
  2 +Created on 18 lut 2014
  3 +
  4 +@author: mlenart
  5 +'''
  6 +import unittest
  7 +import codecs
  8 +import os
  9 +
  10 +from morfeuszbuilder.segrules import preprocessor
  11 +from morfeuszbuilder.utils import configFile
  12 +
  13 +
  14 +class Test(unittest.TestCase):
  15 +
  16 +
  17 + def testPreprocess(self):
  18 + filename = os.path.join(os.path.dirname(__file__), 'segmenty.dat')
  19 + parsedFile = configFile.ConfigFile(filename, ['options', 'combinations', 'tags', 'lexemes'])
  20 + linesEnum = parsedFile.enumerateLinesInSection('combinations')
  21 + for lineNum, line in preprocessor.preprocess(linesEnum, ['extra', 'superextra']):
  22 + print (lineNum, line)
  23 +
  24 +
  25 +if __name__ == "__main__":
  26 + #import sys;sys.argv = ['', 'Test.testPreprocess']
  27 + unittest.main()
0 28 \ No newline at end of file
... ...
fsabuilder/morfeuszbuilder/segrules/test/segmenty.dat 0 → 100644
  1 +[options]
  2 +aggl=permissive strict isolated
  3 +praet=split composite
  4 +
  5 +[combinations]
  6 +(dupa|dupa)
  7 +#define wsz_interp (interp|kropka|dywiz)*
  8 +
  9 +#define moze_interp(segmenty) wsz_interp segmenty wsz_interp
  10 +
  11 +# Segmenty występujące samodzielnie:
  12 +#
  13 +# domyślny typ segmentu samodzielnego:
  14 +moze_interp(samodz)
  15 +
  16 +# segment samotny, który nie dopuszcza nawet znaku interpunkcyjnego po
  17 +# sobie
  18 +samotny
  19 +
  20 +# przeszlik pojedynczy w formie nieaglutynacyjnej, np. „gniótł”:
  21 +moze_interp(praet_sg_na)
  22 +
  23 +# przeszlik pojedynczy w formie niezróżnicowanej aglutynacyjnie, np. „czytał”:
  24 +moze_interp(praet_sg)
  25 +
  26 +# przeszlik mnogi, np. „czytali”:
  27 +moze_interp(praet_pl)
  28 +
  29 +# partykuła „by”:
  30 +moze_interp(by)
  31 +
  32 +# inne segmenty, które dopuszczają po sobie aglutynant,
  33 +# np. „powininna”, „czyżby”:
  34 +moze_interp(z_aglt)
  35 +
  36 +# forma przymiotnikowa (dopuszcza adja):
  37 +moze_interp(adj)
  38 +
  39 +# dywiz (jako samodzielny segment jest tyko błędnym użyciem w funkcji
  40 +# myślnika, ale trzeba to dopuścić):
  41 +dywiz
  42 +
  43 +#ifdef isolated
  44 +adja
  45 +#endif
  46 +
  47 +
  48 +# Połączenia z aglutynantami:
  49 +#
  50 +#ifdef split
  51 +# Czas przeszły:
  52 +# np. „gniotł·am”
  53 +moze_interp( praet_sg_agl aglsg )
  54 +# np. „czytał·em”
  55 +moze_interp(praet_sg aglsg)
  56 +# np. „czytali·ście”
  57 +moze_interp(praet_pl aglpl)
  58 +
  59 +# Tryb warunkowy:
  60 +# np. „gniótł·by”
  61 +moze_interp(praet_sg_na by)
  62 +# np. „czytało·by”
  63 +moze_interp(praet_sg by)
  64 +# np. „gnietli·by”
  65 +moze_interp(praet_pl by)
  66 +# np. „gniótł·by·ś”
  67 +moze_interp(praet_sg_na by aglsg)
  68 +# np. „czytał·by·m”
  69 +moze_interp(praet_sg by aglsg)
  70 +# np. „gnietli·by·śmy”
  71 +moze_interp(praet_pl by aglpl)
  72 +#else
  73 +moze_interp(praetcond)
  74 +#endif
  75 +# np. „by·ś”
  76 +moze_interp(by aglsg)
  77 +# np. „by·ście”
  78 +moze_interp(by aglpl)
  79 +
  80 +# np. „gdyby·m”
  81 +moze_interp(z_aglt aglsg)
  82 +# np. „gdyby·ście”
  83 +moze_interp(z_aglt aglpl)
  84 +
  85 +# To jest dużo za dużo, ale tytułem eksperymentu:
  86 +#ifdef permissive
  87 +moze_interp(samodz aglsg)
  88 +moze_interp(samodz aglpl)
  89 +#endif
  90 +
  91 +# Złożone formy przymiotnikowe
  92 +# np. „biało·-·czerwony”
  93 +moze_interp( (adja dywiz)+ adj )
  94 +# poniższe załatwione przez + powyżej:
  95 +# # np. „niebiesko·-·biało·-·czerwona”
  96 +# adja dywiz adja dywiz adj interp?
  97 +# # itd. (zatrzymujemy się pragmatycznie na 5 członach)
  98 +# adja dywiz adja dywiz adja dywiz adj interp?
  99 +# adja dywiz adja dywiz adja dywiz adja dywiz adj interp?
  100 +
  101 +# Stopień najwyższy:
  102 +# np. „naj·zieleńszy”, „naj·mądrzej”
  103 +moze_interp( naj> adj_sup )
  104 +
  105 +# Formy „zanegowane” gerundiów i imiesłowów:
  106 +# np. „nie·czytanie”, „nie·przeczytany”, „nie·czytający”:
  107 +moze_interp( nie > negat )
  108 +
  109 +# Przyimki akceptujące krótką formę „-ń”
  110 +moze_interp(z_on_agl)
  111 +# np. „do·ń”
  112 +moze_interp(z_on_agl on_agl)
  113 +
  114 +# Liczba zapisana jako ciąg cyfr:
  115 +moze_interp( dig>* dig )
  116 +
  117 +# Formacje prefiksalne
  118 +#### trzeba wydzielić odpowiednie samodze!
  119 +# rzeczownikowe i przymiotnikowe
  120 +# np. „euro·sodoma”, „e-·papieros”, „euro·sodomski”, „bez·argumentowy”
  121 +moze_interp( prefs samodz )
  122 +# czasownikowe np. „po·nakapywać”
  123 +moze_interp( prefv samodz )
  124 +
  125 +# Apozycje z dywizem
  126 +# np. „kobieta-prezydent”
  127 +moze_interp( samodz dywiz samodz )
  128 +# poniższe do sprawdzenia, najwyraźniej obecne w tekstach, skoro wprowadziliśmy:
  129 +# ?
  130 +adj dywiz adj
  131 +# ?
  132 +adj dywiz samodz
  133 +# ?
  134 +samodz dywiz adj
  135 +
  136 +
  137 +[tags]
  138 +naj naj
  139 +nie nie
  140 +prefs prefs
  141 +prefv prefv
  142 +dig dig
  143 +adja adja
  144 +adj adj:%:pos
  145 +adj_sup adj:%:sup
  146 +adj_sup adv:sup
  147 +negat ger:%:neg
  148 +negat pact:%:neg
  149 +negat ppas:%:neg
  150 +on_agl ppron3:sg:gen.acc:m1.m2.m3:ter:nakc:praep
  151 +z_on_agl prep:%
  152 +samotny brev:pun
  153 +samotny brev:npun
  154 +samotny intrj
  155 +interp interp
  156 +aglsg aglt:sg:%
  157 +aglpl aglt:pl:%
  158 +praetcond cond:%
  159 +praetcond praet:%:pri:%
  160 +praetcond praet:%:sec:%
  161 +praetcond praet:%:ter:%
  162 +praet_sg_agl praet:sg:%:agl
  163 +praet_sg_na praet:sg:%:nagl
  164 +praet_sg praet:sg:%
  165 +praet_pl praet:pl:%
  166 +praet_sg winien:sg:%
  167 +praet_pl winien:pl:%
  168 +samodz %
  169 +
  170 +[lexemes]
  171 +z_aglt aby:comp
  172 +z_aglt bowiem:comp
  173 +by by:qub
  174 +z_aglt by:comp
  175 +z_aglt cóż:subst
  176 +z_aglt czemu:adv
  177 +z_aglt czyżby:qub
  178 +z_aglt choćby:comp
  179 +z_aglt chociażby:comp
  180 +z_aglt dlaczego:adv
  181 +z_aglt dopóki:comp
  182 +z_aglt dopóty:conj
  183 +z_aglt gdyby:comp
  184 +z_aglt gdzie:qub
  185 +z_aglt gdzie:adv
  186 +z_aglt jakby:comp
  187 +z_aglt jakoby:comp
  188 +z_aglt kiedy:adv
  189 +z_aglt kiedy:comp
  190 +z_aglt tylko:qub
  191 +z_aglt żeby:comp
  192 +dywiz -:interp
  193 +kropka .:interp
... ...
fsabuilder/morfeuszbuilder/tagset/__init__.py 0 → 100644
fsabuilder/morfeuszbuilder/tagset/segtypes.py 0 → 100644
  1 +'''
  2 +Created on 17 lut 2014
  3 +
  4 +@author: mlenart
  5 +'''
  6 +import re
  7 +
  8 +class Segtypes(object):
  9 +
  10 + def __init__(self, tagset, segrulesFile):
  11 +
  12 + self.tagset = tagset
  13 +
  14 + self.segrulesConfigFile = segrulesFile
  15 +
  16 + self.segtype2Segnum = {}
  17 + self.patternsList = []
  18 +
  19 + def readTags(self, lines):
  20 + inTags = False
  21 + for lineNum, line in enumerate(lines, start=1):
  22 + header = self._getHeaderValue(line, lineNum)
  23 + if header == 'tags':
  24 + inTags = True
  25 + elif header:
  26 + inTags = False
  27 + elif inTags:
  28 + segtype, pattern = line.strip().split('\t')
  29 + self._validate(
  30 + u'Segment type must be a lowercase alphanumeric with optional underscores',
  31 + lineNum,
  32 + re.match(r'[a-z_]+', segtype))
  33 + self._validate(
  34 + u'Pattern must contain only ":", "%", "." and lowercase alphanumeric letters',
  35 + lineNum,
  36 + re.match(r'[a-z_\.\:\%]+', pattern))
  37 +
  38 + if segtype in self.segtype2Segnum:
  39 + segnum = self.segtype2Segnum[segtype]
  40 + else:
  41 + segnum = len(self.segtype2Segnum)
  42 + self.segtype2Segnum[segtype] = segnum
  43 +
  44 + self.patternsList.append(SegtypePattern(None, pattern, segnum))
  45 +
  46 + def readLexemes(self, lines):
  47 + inLexemes = False
  48 + for lineNum, line in enumerate(lines, start=1):
  49 + header = self._getHeaderValue(line, lineNum)
  50 + if header == 'lexemes':
  51 + inLexemes = True
  52 + elif header:
  53 + inLexemes = False
  54 + elif inLexemes:
  55 + segtype, pattern = line.strip().split('\t')
  56 + self._validate(
  57 + u'Segment type must be a lowercase alphanumeric with optional underscores',
  58 + lineNum,
  59 + re.match(r'[a-z_]+', segtype))
  60 + self._validate(
  61 + u'Pattern must contain lemma and POS',
  62 + lineNum,
  63 + re.match(r'\w+\:[a-z_]+', pattern, re.U))
  64 +
  65 + if segtype in self.segtype2Segnum:
  66 + segnum = self.segtype2Segnum[segtype]
  67 + else:
  68 + segnum = len(self.segtype2Segnum)
  69 + self.segtype2Segnum[segtype] = segnum
  70 +
  71 + lemma, pos = pattern.split(':')
  72 +
  73 + self.patternsList.append(SegtypePattern(lemma, pos + ':%', segnum))
  74 +
  75 + def lexeme2Segnum(self, lemma, tag):
  76 + for p in self.patternsList:
  77 + res = p.tryToMatch(lemma, tag)
  78 + if res >= 0:
  79 + return res
  80 + raise SegtypesException('Cannot find segment type for given tag: %s' % tag)
  81 +
  82 +class SegtypePattern(object):
  83 +
  84 + def __init__(self, lemma, pattern, segnum):
  85 + self.lemma = lemma
  86 + self.pattern = pattern
  87 + self.segnum = segnum
  88 +
  89 + def tryToMatch(self, lemma, tag):
  90 + if (self.lemma is None or self.lemma == lemma) \
  91 + and re.match(self.pattern.replace('%', '.*'), tag):
  92 + return self.segnum
  93 + else:
  94 + return -1
  95 +
  96 +class SegtypesException(Exception):
  97 +
  98 + def __init__(self, msg):
  99 + self.msg = msg
  100 +
  101 + def __str__(self):
  102 + return u'Error in segment rules: %s' % self.msg
... ...
fsabuilder/morfeuszbuilder/tagset/tagset.py 0 → 100644
  1 +'''
  2 +Created on 17 lut 2014
  3 +
  4 +@author: mlenart
  5 +'''
  6 +
  7 +import codecs
  8 +
  9 +class Tagset(object):
  10 +
  11 + TAGS = 1
  12 + NAMES = 2
  13 + SEP = '\t'
  14 +
  15 + def __init__(self, filename, encoding='utf8'):
  16 + self.tag2tagnum = {}
  17 + self.name2namenum = {}
  18 + self._doInit(filename, encoding)
  19 + self.tagnum2tag = dict(map(lambda (k, v): (v, k), self.tag2tagnum.iteritems()))
  20 +
  21 + def _doInit(self, filename, encoding):
  22 + addingTo = None
  23 + with codecs.open(filename, 'r', encoding) as f:
  24 + for line in f:
  25 + line = line.strip('\n')
  26 + if line == u'[TAGS]':
  27 + addingTo = Tagset.TAGS
  28 + elif line == u'[NAMES]':
  29 + addingTo = Tagset.NAMES
  30 + elif line and not line.startswith(u'#'):
  31 + assert addingTo in [Tagset.TAGS, Tagset.NAMES]
  32 + res = {Tagset.TAGS: self.tag2tagnum,
  33 + Tagset.NAMES: self.name2namenum}[addingTo]
  34 + tagNum = line.split(Tagset.SEP)[0]
  35 + tag = line.split(Tagset.SEP)[1]
  36 + assert tag not in res
  37 + res[tag] = int(tagNum)
  38 +
  39 + def getTag4Tagnum(self, tagnum):
  40 + return self.tagnum2tag[tagnum]
0 41 \ No newline at end of file
... ...
fsabuilder/morfeuszbuilder/utils/configFile.py 0 → 100644
  1 +'''
  2 +Created on 18 lut 2014
  3 +
  4 +@author: mlenart
  5 +'''
  6 +
  7 +import re
  8 +import codecs
  9 +
  10 +def getHeaderValue(line, lineNum):
  11 + m = re.match(ur'\s*\[(.*?)\]\s*(\#.*)?', line)
  12 + if m:
  13 + return m.group(1)
  14 + else:
  15 + return None
  16 +
  17 +class ConfigFile(object):
  18 +
  19 + def __init__(self, filename, sectionNames):
  20 + self.filename = filename
  21 + self.sectionNames = sectionNames
  22 + self.section2Lines = {}
  23 + self.currSection = None
  24 + self._parse()
  25 +
  26 + def _addSectionStart(self, sectionName, lineNum):
  27 + if not sectionName in self.sectionNames:
  28 + raise ConfigFileException(self.filename, lineNum, 'Invalid section: %s' % sectionName)
  29 + if sectionName in self.section2Lines:
  30 + raise ConfigFileException(self.filename, lineNum, 'Duplicate section: %s' % sectionName)
  31 + self.section2Lines[sectionName] = []
  32 + self.currSection = sectionName
  33 +
  34 + def _addLine(self, line, lineNum):
  35 + line = line.strip()
  36 + if line:
  37 + if self.currSection is None and not line.startswith('#'):
  38 + raise ConfigFileException(self.filename, lineNum, 'Text outside of any section')
  39 + self.section2Lines[self.currSection].append((lineNum, line))
  40 +
  41 + def _getHeaderValue(self, line, lineNum):
  42 + m = re.match(ur'\s*\[(.*?)\]\s*(\#.*)?', line)
  43 + if m:
  44 + return m.group(1)
  45 + else:
  46 + return None
  47 +
  48 + def enumerateLinesInSection(self, sectionName):
  49 + return self.section2Lines[sectionName]
  50 +
  51 + def _parse(self):
  52 + with codecs.open(self.filename, 'r', 'utf8') as f:
  53 + for lineNum, line in enumerate(f, start=1):
  54 + header = self._getHeaderValue(line, lineNum)
  55 + if header:
  56 + self._addSectionStart(header, lineNum)
  57 + else:
  58 + self._addLine(line, lineNum)
  59 +
  60 +class ConfigFileException(Exception):
  61 +
  62 + def __init__(self, filename, lineNum, msg):
  63 + self.filename = filename
  64 + self.lineNum = lineNum
  65 + self.msg = msg
  66 +
  67 + def __str__(self):
  68 + return u'%s:%d - %s' % (self.filename, self.lineNum, self.msg)
... ...