Commit 8586011643aea0cb82e1b88b61597fea2a9753e1
1 parent
40c79141
- małe porządki w budowniczym automatów
- dodanie preprocesora (w stylu C), który będzie używany przez parser reguł zlepiania segmentów git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@83 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
7 changed files
with
137 additions
and
0 deletions
fsabuilder/morfeuszbuilder/caseconv/__init__.py renamed to fsabuilder/morfeuszbuilder/segrules/__init__.py
fsabuilder/morfeuszbuilder/segrules/preprocessor.py
0 → 100644
1 | +''' | ||
2 | +Created on 23 sty 2014 | ||
3 | + | ||
4 | +@author: mlenart | ||
5 | +''' | ||
6 | +import re | ||
7 | +from pyparsing import * | ||
8 | + | ||
9 | +identifier = Word(alphas, bodyChars=alphanums+'_') | ||
10 | +define = Keyword('#define').suppress() + identifier + Optional(Suppress('(') + identifier + Suppress(')')) + restOfLine + LineEnd() + StringEnd() | ||
11 | +ifdef = Keyword('#ifdef').suppress() + identifier + LineEnd() + StringEnd() | ||
12 | +endif = Keyword('#endif').suppress() + LineEnd() + StringEnd() | ||
13 | + | ||
14 | +class NonArgDefine(object): | ||
15 | + | ||
16 | + def __init__(self, name, val): | ||
17 | + self.name = name | ||
18 | + self.val = val | ||
19 | + | ||
20 | + def hasArg(self): | ||
21 | + return False | ||
22 | + | ||
23 | +class ArgDefine(object): | ||
24 | + | ||
25 | + def __init__(self, name, arg, val): | ||
26 | + self.name = name | ||
27 | + self.arg = arg | ||
28 | + self.val = val | ||
29 | + | ||
30 | + def hasArg(self): | ||
31 | + return True | ||
32 | + | ||
33 | + def __str__(self): | ||
34 | + return '%s(%s) %s' % (self.name, self.arg, self.val) | ||
35 | + | ||
36 | +class PreprocessorException(Exception): | ||
37 | + | ||
38 | + def __init__(self, msg, line): | ||
39 | + pass | ||
40 | + | ||
41 | +def _tryToSubstituteArgDefine(s, t, defines): | ||
42 | + defineName = t[0] | ||
43 | + substituteValue = t[1] | ||
44 | + if defineName in defines and defines[defineName].hasArg(): | ||
45 | + define = defines[defineName] | ||
46 | + return re.sub(r'\b%s\b' % define.arg, substituteValue, define.val) | ||
47 | + elif defineName in defines: | ||
48 | + return '%s ( %s )' % (defines[defineName].val, substituteValue) | ||
49 | + else: | ||
50 | + return ' '.join(t) | ||
51 | + | ||
52 | +def _tryToSubstituteNonArgDefine(s, t, defines): | ||
53 | + defineName = t[0] | ||
54 | + | ||
55 | + if defineName in defines and not defines[defineName].hasArg(): | ||
56 | + return defines[defineName].val | ||
57 | + else: | ||
58 | + return defineName | ||
59 | + | ||
60 | +def _processLine(line, defines): | ||
61 | + if line.strip(): | ||
62 | + | ||
63 | + rule = Forward() | ||
64 | + defineInstance = Forward() | ||
65 | + localId = identifier.copy() | ||
66 | + | ||
67 | + rule << OneOrMore(localId ^ defineInstance ^ Word('*|+?')) | ||
68 | + defineInstance << localId + Suppress('(') + rule + Suppress(')') | ||
69 | + | ||
70 | + rule.setParseAction(lambda s, l, t: ' '.join(t)) | ||
71 | + defineInstance.setParseAction(lambda s, l, t: _tryToSubstituteArgDefine(s, t, defines)) | ||
72 | + localId.setParseAction(lambda s, l, t: _tryToSubstituteNonArgDefine(s, t, defines)) | ||
73 | + return rule.parseString(line, parseAll=True)[0] | ||
74 | + else: | ||
75 | + return line | ||
76 | + | ||
77 | +def preprocess(inputLines, defs): | ||
78 | + defines = {} | ||
79 | + ifdefsStack = [] | ||
80 | + for lineNum, line in enumerate(inputLines, start=1): | ||
81 | + if line.startswith('#define'): | ||
82 | + try: | ||
83 | + parsedDefine = list(define.parseString(line)) | ||
84 | + if len(parsedDefine) == 2: | ||
85 | + name, val = parsedDefine | ||
86 | + defines[name] = NonArgDefine(name, val) | ||
87 | + else: | ||
88 | + name, arg, val = parsedDefine | ||
89 | + localDefines = defines.copy() | ||
90 | + localDefines[arg] = NonArgDefine(arg, arg) | ||
91 | + val = _processLine(val, localDefines) | ||
92 | + defines[name] = ArgDefine(name, arg, val) | ||
93 | + except: | ||
94 | + pass | ||
95 | + elif line.startswith('#ifdef'): | ||
96 | + name = ifdef.parseString(line)[0] | ||
97 | + ifdefsStack.append(name) | ||
98 | + elif line.startswith('#endif'): | ||
99 | + ifdefsStack.pop() | ||
100 | + elif len(ifdefsStack) == 0 or all(map(lambda name: name in defs, ifdefsStack)): | ||
101 | + yield _processLine(line, defines) |
fsabuilder/morfeuszbuilder/segrules/test.py
0 → 100644
1 | +''' | ||
2 | +Created on 24 sty 2014 | ||
3 | + | ||
4 | +@author: mlenart | ||
5 | +''' | ||
6 | + | ||
7 | +import preprocessor | ||
8 | + | ||
9 | +if __name__ == '__main__': | ||
10 | + text = ''' | ||
11 | +dupa | ||
12 | +#define asd XXX | ||
13 | +#define X(x) a x b | ||
14 | +#define Y(x) X(x) c | ||
15 | +#define B(x) X(x) | ||
16 | +#define Z(x) Y(X(x)) d | ||
17 | +#define AB(asd) dupa asd dupa | ||
18 | +Y(Z(a) b X(c) Y(d)) | ||
19 | +#ifdef extra | ||
20 | +asdfasa | ||
21 | +#ifdef extra | ||
22 | +asdfasdfasdfa | ||
23 | +#endif | ||
24 | +#ifdef superextra | ||
25 | +aaaa asd | ||
26 | +#endif | ||
27 | +#endif | ||
28 | + | ||
29 | +#ifdef superextra | ||
30 | +asdfasdfada | ||
31 | +#endif | ||
32 | + | ||
33 | +AB(x) | ||
34 | +''' | ||
35 | + for line in preprocessor.preprocess(text.split('\n'), ['extra', 'superextra']): | ||
36 | + print line | ||
0 | \ No newline at end of file | 37 | \ No newline at end of file |
fsabuilder/morfeuszbuilder/caseconv/CaseFolding.txt renamed to fsabuilder/morfeuszbuilder/utils/caseconv/CaseFolding.txt
fsabuilder/morfeuszbuilder/caseconv/UnicodeData.txt renamed to fsabuilder/morfeuszbuilder/utils/caseconv/UnicodeData.txt
fsabuilder/morfeuszbuilder/input/__init__.py renamed to fsabuilder/morfeuszbuilder/utils/caseconv/__init__.py
fsabuilder/morfeuszbuilder/caseconv/generate.py renamed to fsabuilder/morfeuszbuilder/utils/caseconv/generate.py