preprocessor.py
3.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# -*- coding:utf-8 -*-
'''
Created on 23 sty 2014
@author: mlenart
'''
import re
from pyparsing import *
from morfeuszbuilder.utils import exceptions
identifier = Word(alphas, bodyChars=alphanums+u'_>*+!')
define = Keyword('#define').suppress() + identifier + Optional(Suppress('(') + identifier + Suppress(')')) + restOfLine + LineEnd() + StringEnd()
ifdef = Keyword('#ifdef').suppress() + identifier + LineEnd() + StringEnd()
endif = Keyword('#endif').suppress() + LineEnd() + StringEnd()
class NonArgDefine(object):
def __init__(self, name, val):
self.name = name
self.val = val
def hasArg(self):
return False
class ArgDefine(object):
def __init__(self, name, arg, val):
self.name = name
self.arg = arg
self.val = val
def hasArg(self):
return True
def __str__(self):
return '%s(%s) %s' % (self.name, self.arg, self.val)
def _tryToSubstituteArgDefine(s, t, defines):
defineName = t[0]
substituteValue = t[1]
if defineName in defines and defines[defineName].hasArg():
define = defines[defineName]
return re.sub(r'\b%s\b' % define.arg, substituteValue, define.val)
elif defineName in defines:
return '%s ( %s )' % (defines[defineName].val, substituteValue)
else:
return ' '.join(t)
def _tryToSubstituteNonArgDefine(s, t, defines):
defineName = t[0]
if defineName in defines and not defines[defineName].hasArg():
return defines[defineName].val
else:
return defineName
def _processLine(lineNum, line, defines):
if line.strip():
rule = Forward()
defineInstance = Forward()
localId = identifier.copy()
rule << OneOrMore(defineInstance ^ localId ^ Word('*|+?>') ^ (Literal('(') + rule + Literal(')')))
defineInstance << localId + Suppress('(') + rule + Suppress(')')
rule.setParseAction(lambda s, l, t: ' '.join(t))
defineInstance.setParseAction(lambda s, l, t: _tryToSubstituteArgDefine(s, t, defines))
localId.setParseAction(lambda s, l, t: _tryToSubstituteNonArgDefine(s, t, defines))
try:
return rule.parseString(line, parseAll=True)[0]
except ParseException as ex:
msg = u'Preprocessing of segmentation rules failed.\n'
msg += line + '\n'
msg += (ex.col - 1) * ' ' + '^\n'
msg += ex.msg
# print unicode(exceptions.SegtypesException(msg)).encode('utf8')
raise exceptions.SegtypesException(msg)
else:
return line
def preprocess(inputLines, defs):
defines = {}
ifdefsStack = []
for lineNum, line in inputLines:
if line.startswith('#define'):
parsedDefine = list(define.parseString(line))
if len(parsedDefine) == 2:
name, val = parsedDefine
defines[name] = NonArgDefine(name, val)
else:
name, arg, val = parsedDefine
localDefines = defines.copy()
localDefines[arg] = NonArgDefine(arg, arg)
val = _processLine(lineNum, val, localDefines)
defines[name] = ArgDefine(name, arg, val)
elif line.startswith('#ifdef'):
name = ifdef.parseString(line)[0]
ifdefsStack.append(name)
elif line.startswith('#endif'):
ifdefsStack.pop()
elif line.startswith('#'):
yield lineNum, line
elif len(ifdefsStack) == 0 or all(map(lambda name: name in defs, ifdefsStack)):
yield lineNum, _processLine(lineNum, line, defines)