Commit 8586011643aea0cb82e1b88b61597fea2a9753e1
1 parent
40c79141
- małe porządki w budowniczym automatów
- dodanie preprocesora (w stylu C), który będzie używany przez parser reguł zlepiania segmentów git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@83 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
7 changed files
with
137 additions
and
0 deletions
fsabuilder/morfeuszbuilder/caseconv/__init__.py renamed to fsabuilder/morfeuszbuilder/segrules/__init__.py
fsabuilder/morfeuszbuilder/segrules/preprocessor.py
0 → 100644
1 | +''' | |
2 | +Created on 23 sty 2014 | |
3 | + | |
4 | +@author: mlenart | |
5 | +''' | |
6 | +import re | |
7 | +from pyparsing import * | |
8 | + | |
9 | +identifier = Word(alphas, bodyChars=alphanums+'_') | |
10 | +define = Keyword('#define').suppress() + identifier + Optional(Suppress('(') + identifier + Suppress(')')) + restOfLine + LineEnd() + StringEnd() | |
11 | +ifdef = Keyword('#ifdef').suppress() + identifier + LineEnd() + StringEnd() | |
12 | +endif = Keyword('#endif').suppress() + LineEnd() + StringEnd() | |
13 | + | |
14 | +class NonArgDefine(object): | |
15 | + | |
16 | + def __init__(self, name, val): | |
17 | + self.name = name | |
18 | + self.val = val | |
19 | + | |
20 | + def hasArg(self): | |
21 | + return False | |
22 | + | |
23 | +class ArgDefine(object): | |
24 | + | |
25 | + def __init__(self, name, arg, val): | |
26 | + self.name = name | |
27 | + self.arg = arg | |
28 | + self.val = val | |
29 | + | |
30 | + def hasArg(self): | |
31 | + return True | |
32 | + | |
33 | + def __str__(self): | |
34 | + return '%s(%s) %s' % (self.name, self.arg, self.val) | |
35 | + | |
36 | +class PreprocessorException(Exception): | |
37 | + | |
38 | + def __init__(self, msg, line): | |
39 | + pass | |
40 | + | |
41 | +def _tryToSubstituteArgDefine(s, t, defines): | |
42 | + defineName = t[0] | |
43 | + substituteValue = t[1] | |
44 | + if defineName in defines and defines[defineName].hasArg(): | |
45 | + define = defines[defineName] | |
46 | + return re.sub(r'\b%s\b' % define.arg, substituteValue, define.val) | |
47 | + elif defineName in defines: | |
48 | + return '%s ( %s )' % (defines[defineName].val, substituteValue) | |
49 | + else: | |
50 | + return ' '.join(t) | |
51 | + | |
52 | +def _tryToSubstituteNonArgDefine(s, t, defines): | |
53 | + defineName = t[0] | |
54 | + | |
55 | + if defineName in defines and not defines[defineName].hasArg(): | |
56 | + return defines[defineName].val | |
57 | + else: | |
58 | + return defineName | |
59 | + | |
60 | +def _processLine(line, defines): | |
61 | + if line.strip(): | |
62 | + | |
63 | + rule = Forward() | |
64 | + defineInstance = Forward() | |
65 | + localId = identifier.copy() | |
66 | + | |
67 | + rule << OneOrMore(localId ^ defineInstance ^ Word('*|+?')) | |
68 | + defineInstance << localId + Suppress('(') + rule + Suppress(')') | |
69 | + | |
70 | + rule.setParseAction(lambda s, l, t: ' '.join(t)) | |
71 | + defineInstance.setParseAction(lambda s, l, t: _tryToSubstituteArgDefine(s, t, defines)) | |
72 | + localId.setParseAction(lambda s, l, t: _tryToSubstituteNonArgDefine(s, t, defines)) | |
73 | + return rule.parseString(line, parseAll=True)[0] | |
74 | + else: | |
75 | + return line | |
76 | + | |
77 | +def preprocess(inputLines, defs): | |
78 | + defines = {} | |
79 | + ifdefsStack = [] | |
80 | + for lineNum, line in enumerate(inputLines, start=1): | |
81 | + if line.startswith('#define'): | |
82 | + try: | |
83 | + parsedDefine = list(define.parseString(line)) | |
84 | + if len(parsedDefine) == 2: | |
85 | + name, val = parsedDefine | |
86 | + defines[name] = NonArgDefine(name, val) | |
87 | + else: | |
88 | + name, arg, val = parsedDefine | |
89 | + localDefines = defines.copy() | |
90 | + localDefines[arg] = NonArgDefine(arg, arg) | |
91 | + val = _processLine(val, localDefines) | |
92 | + defines[name] = ArgDefine(name, arg, val) | |
93 | + except: | |
94 | + pass | |
95 | + elif line.startswith('#ifdef'): | |
96 | + name = ifdef.parseString(line)[0] | |
97 | + ifdefsStack.append(name) | |
98 | + elif line.startswith('#endif'): | |
99 | + ifdefsStack.pop() | |
100 | + elif len(ifdefsStack) == 0 or all(map(lambda name: name in defs, ifdefsStack)): | |
101 | + yield _processLine(line, defines) | |
... | ... |
fsabuilder/morfeuszbuilder/segrules/test.py
0 → 100644
1 | +''' | |
2 | +Created on 24 sty 2014 | |
3 | + | |
4 | +@author: mlenart | |
5 | +''' | |
6 | + | |
7 | +import preprocessor | |
8 | + | |
9 | +if __name__ == '__main__': | |
10 | + text = ''' | |
11 | +dupa | |
12 | +#define asd XXX | |
13 | +#define X(x) a x b | |
14 | +#define Y(x) X(x) c | |
15 | +#define B(x) X(x) | |
16 | +#define Z(x) Y(X(x)) d | |
17 | +#define AB(asd) dupa asd dupa | |
18 | +Y(Z(a) b X(c) Y(d)) | |
19 | +#ifdef extra | |
20 | +asdfasa | |
21 | +#ifdef extra | |
22 | +asdfasdfasdfa | |
23 | +#endif | |
24 | +#ifdef superextra | |
25 | +aaaa asd | |
26 | +#endif | |
27 | +#endif | |
28 | + | |
29 | +#ifdef superextra | |
30 | +asdfasdfada | |
31 | +#endif | |
32 | + | |
33 | +AB(x) | |
34 | +''' | |
35 | + for line in preprocessor.preprocess(text.split('\n'), ['extra', 'superextra']): | |
36 | + print line | |
0 | 37 | \ No newline at end of file |
... | ... |
fsabuilder/morfeuszbuilder/caseconv/CaseFolding.txt renamed to fsabuilder/morfeuszbuilder/utils/caseconv/CaseFolding.txt
fsabuilder/morfeuszbuilder/caseconv/UnicodeData.txt renamed to fsabuilder/morfeuszbuilder/utils/caseconv/UnicodeData.txt
fsabuilder/morfeuszbuilder/input/__init__.py renamed to fsabuilder/morfeuszbuilder/utils/caseconv/__init__.py
fsabuilder/morfeuszbuilder/caseconv/generate.py renamed to fsabuilder/morfeuszbuilder/utils/caseconv/generate.py