Commit 8586011643aea0cb82e1b88b61597fea2a9753e1

Authored by Michał Lenart
1 parent 40c79141

- małe porządki w budowniczym automatów

- dodanie preprocesora (w stylu C), który będzie używany przez parser reguł zlepiania segmentów

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@83 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
fsabuilder/morfeuszbuilder/caseconv/__init__.py renamed to fsabuilder/morfeuszbuilder/segrules/__init__.py
fsabuilder/morfeuszbuilder/segrules/preprocessor.py 0 → 100644
  1 +'''
  2 +Created on 23 sty 2014
  3 +
  4 +@author: mlenart
  5 +'''
  6 +import re
  7 +from pyparsing import *
  8 +
  9 +identifier = Word(alphas, bodyChars=alphanums+'_')
  10 +define = Keyword('#define').suppress() + identifier + Optional(Suppress('(') + identifier + Suppress(')')) + restOfLine + LineEnd() + StringEnd()
  11 +ifdef = Keyword('#ifdef').suppress() + identifier + LineEnd() + StringEnd()
  12 +endif = Keyword('#endif').suppress() + LineEnd() + StringEnd()
  13 +
  14 +class NonArgDefine(object):
  15 +
  16 + def __init__(self, name, val):
  17 + self.name = name
  18 + self.val = val
  19 +
  20 + def hasArg(self):
  21 + return False
  22 +
  23 +class ArgDefine(object):
  24 +
  25 + def __init__(self, name, arg, val):
  26 + self.name = name
  27 + self.arg = arg
  28 + self.val = val
  29 +
  30 + def hasArg(self):
  31 + return True
  32 +
  33 + def __str__(self):
  34 + return '%s(%s) %s' % (self.name, self.arg, self.val)
  35 +
  36 +class PreprocessorException(Exception):
  37 +
  38 + def __init__(self, msg, line):
  39 + pass
  40 +
  41 +def _tryToSubstituteArgDefine(s, t, defines):
  42 + defineName = t[0]
  43 + substituteValue = t[1]
  44 + if defineName in defines and defines[defineName].hasArg():
  45 + define = defines[defineName]
  46 + return re.sub(r'\b%s\b' % define.arg, substituteValue, define.val)
  47 + elif defineName in defines:
  48 + return '%s ( %s )' % (defines[defineName].val, substituteValue)
  49 + else:
  50 + return ' '.join(t)
  51 +
  52 +def _tryToSubstituteNonArgDefine(s, t, defines):
  53 + defineName = t[0]
  54 +
  55 + if defineName in defines and not defines[defineName].hasArg():
  56 + return defines[defineName].val
  57 + else:
  58 + return defineName
  59 +
  60 +def _processLine(line, defines):
  61 + if line.strip():
  62 +
  63 + rule = Forward()
  64 + defineInstance = Forward()
  65 + localId = identifier.copy()
  66 +
  67 + rule << OneOrMore(localId ^ defineInstance ^ Word('*|+?'))
  68 + defineInstance << localId + Suppress('(') + rule + Suppress(')')
  69 +
  70 + rule.setParseAction(lambda s, l, t: ' '.join(t))
  71 + defineInstance.setParseAction(lambda s, l, t: _tryToSubstituteArgDefine(s, t, defines))
  72 + localId.setParseAction(lambda s, l, t: _tryToSubstituteNonArgDefine(s, t, defines))
  73 + return rule.parseString(line, parseAll=True)[0]
  74 + else:
  75 + return line
  76 +
  77 +def preprocess(inputLines, defs):
  78 + defines = {}
  79 + ifdefsStack = []
  80 + for lineNum, line in enumerate(inputLines, start=1):
  81 + if line.startswith('#define'):
  82 + try:
  83 + parsedDefine = list(define.parseString(line))
  84 + if len(parsedDefine) == 2:
  85 + name, val = parsedDefine
  86 + defines[name] = NonArgDefine(name, val)
  87 + else:
  88 + name, arg, val = parsedDefine
  89 + localDefines = defines.copy()
  90 + localDefines[arg] = NonArgDefine(arg, arg)
  91 + val = _processLine(val, localDefines)
  92 + defines[name] = ArgDefine(name, arg, val)
  93 + except:
  94 + pass
  95 + elif line.startswith('#ifdef'):
  96 + name = ifdef.parseString(line)[0]
  97 + ifdefsStack.append(name)
  98 + elif line.startswith('#endif'):
  99 + ifdefsStack.pop()
  100 + elif len(ifdefsStack) == 0 or all(map(lambda name: name in defs, ifdefsStack)):
  101 + yield _processLine(line, defines)
... ...
fsabuilder/morfeuszbuilder/segrules/test.py 0 → 100644
  1 +'''
  2 +Created on 24 sty 2014
  3 +
  4 +@author: mlenart
  5 +'''
  6 +
  7 +import preprocessor
  8 +
  9 +if __name__ == '__main__':
  10 + text = '''
  11 +dupa
  12 +#define asd XXX
  13 +#define X(x) a x b
  14 +#define Y(x) X(x) c
  15 +#define B(x) X(x)
  16 +#define Z(x) Y(X(x)) d
  17 +#define AB(asd) dupa asd dupa
  18 +Y(Z(a) b X(c) Y(d))
  19 +#ifdef extra
  20 +asdfasa
  21 +#ifdef extra
  22 +asdfasdfasdfa
  23 +#endif
  24 +#ifdef superextra
  25 +aaaa asd
  26 +#endif
  27 +#endif
  28 +
  29 +#ifdef superextra
  30 +asdfasdfada
  31 +#endif
  32 +
  33 +AB(x)
  34 +'''
  35 + for line in preprocessor.preprocess(text.split('\n'), ['extra', 'superextra']):
  36 + print line
0 37 \ No newline at end of file
... ...
fsabuilder/morfeuszbuilder/caseconv/CaseFolding.txt renamed to fsabuilder/morfeuszbuilder/utils/caseconv/CaseFolding.txt
fsabuilder/morfeuszbuilder/caseconv/UnicodeData.txt renamed to fsabuilder/morfeuszbuilder/utils/caseconv/UnicodeData.txt
fsabuilder/morfeuszbuilder/input/__init__.py renamed to fsabuilder/morfeuszbuilder/utils/caseconv/__init__.py
fsabuilder/morfeuszbuilder/caseconv/generate.py renamed to fsabuilder/morfeuszbuilder/utils/caseconv/generate.py