Commit 8d5a878e6650c4130784b81026b903e2ffd965c8

Authored by Michał Lenart
1 parent 1c1bf677

- praca nad budowaniem automatu dla zlepiacza segmentów

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@86 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
fsabuilder/buildfsa.spec 0 → 100644
  1 +# -*- mode: python -*-
  2 +a = Analysis(['fsa/buildfsa.py'],
  3 + pathex=['/home/lennyn/xxx/morfeusz/fsabuilder'],
  4 + hiddenimports=[],
  5 + hookspath=None,
  6 + runtime_hooks=None)
  7 +pyz = PYZ(a.pure)
  8 +exe = EXE(pyz,
  9 + a.scripts,
  10 + exclude_binaries=True,
  11 + name='buildfsa',
  12 + debug=False,
  13 + strip=None,
  14 + upx=True,
  15 + console=True )
  16 +coll = COLLECT(exe,
  17 + a.binaries,
  18 + a.zipfiles,
  19 + a.datas,
  20 + strip=None,
  21 + upx=True,
  22 + name='buildfsa')
... ...
fsabuilder/morfeuszbuilder/fsa/fsa.py
... ... @@ -14,11 +14,12 @@ class FSA(object):
14 14 '''
15 15  
16 16  
17   - def __init__(self, encoder, tagset=None, encodeData=True):
18   - self.encodeWord = encoder.encodeWord
  17 + def __init__(self, encoder, tagset=None, encodeData=True, encodeWords=True):
  18 + self.encodeWord = encoder.encodeWord if encodeWords else lambda x: x
19 19 self.encodeData = encoder.encodeData if encodeData else lambda x: x
20 20 self.decodeData = encoder.decodeData if encodeData else lambda x: x
21 21 self.encodedPrevWord = None
  22 +
22 23 self.tagset = tagset
23 24 self.initialState = state.State()
24 25 self.register = register.Register()
... ...
fsabuilder/morfeuszbuilder/fsa/state.py
... ... @@ -9,7 +9,7 @@ class State(object):
9 9 A state in an automaton
10 10 '''
11 11  
12   - def __init__(self):
  12 + def __init__(self, additionalData=None):
13 13 self.transitionsMap = {}
14 14 self.freq = 0
15 15 self.encodedData = None
... ... @@ -17,6 +17,7 @@ class State(object):
17 17 self.offset = None
18 18 self.label2Freq = {}
19 19 self.serializeAsArray = False
  20 + self.additionalData = additionalData
20 21  
21 22 @property
22 23 def transitionsNum(self):
... ...
fsabuilder/morfeuszbuilder/segrules/preprocessor.py
... ... @@ -34,11 +34,6 @@ class ArgDefine(object):
34 34 def __str__(self):
35 35 return '%s(%s) %s' % (self.name, self.arg, self.val)
36 36  
37   -class PreprocessorException(Exception):
38   -
39   - def __init__(self, msg, line):
40   - pass
41   -
42 37 def _tryToSubstituteArgDefine(s, t, defines):
43 38 defineName = t[0]
44 39 substituteValue = t[1]
... ...
fsabuilder/morfeuszbuilder/segrules/rules.py
... ... @@ -4,6 +4,8 @@ Created on 24 sty 2014
4 4 @author: mlenart
5 5 '''
6 6  
  7 +from morfeuszbuilder.segrules.rulesNFA import RulesNFAState
  8 +
7 9 class SegmentRule(object):
8 10 '''
9 11 classdocs
... ... @@ -14,46 +16,91 @@ class SegmentRule(object):
14 16 '''
15 17 Constructor
16 18 '''
  19 +
  20 + def addToNFA(self, fsa):
  21 + raise NotImplementedError()
  22 +
  23 + def _doAddToNFA(self, startStates, endState):
  24 + raise NotImplementedError()
17 25  
18 26 class TagRule(SegmentRule):
19 27  
20   - def __init__(self, tagType, line):
21   - self.tagType = tagType
22   - self.line = line
  28 + def __init__(self, segnum):
  29 + self.segnum = segnum
  30 +
  31 + def addToNFA(self, fsa):
  32 + endState = RulesNFAState(final=True)
  33 + self._doAddToNFA(fsa.initialState, endState)
  34 +
  35 + def _doAddToNFA(self, startState, endState):
  36 + startState.addTransition(self.segnum, endState)
23 37  
24 38 class UnaryRule(SegmentRule):
25 39  
26   - def __init__(self, child, line):
  40 + def __init__(self, child):
27 41 self.child = child
28   - self.line = line
29 42  
30 43 class ComplexRule(SegmentRule):
31 44  
32   - def __init__(self, children, line):
  45 + def __init__(self, children):
33 46 self.children = children
34   - self.line = line
  47 +
  48 + def addToNFA(self, fsa):
  49 + endState = RulesNFAState(final=True)
  50 + self._doAddToNFA(fsa.initialState, endState)
35 51  
36 52 class ConcatRule(ComplexRule):
37 53  
38   - def __init__(self, children, line):
39   - super(ConcatRule, self).__init__(children, line)
  54 + def __init__(self, children):
  55 + super(ConcatRule, self).__init__(children)
  56 +
  57 + def _doAddToNFA(self, startState, endState):
  58 + currStartState = startState
  59 + for child in self.children[:-1]:
  60 + currEndState = RulesNFAState()
  61 + child._doAddToNFA(currStartState, currEndState)
  62 + nextStartState = RulesNFAState()
  63 + currEndState.addTransition(None, nextStartState)
  64 + currStartState = nextStartState
  65 + lastChild = self.children[-1]
  66 + lastChild._doAddToNFA(currStartState, endState)
40 67  
41 68 class OrRule(ComplexRule):
42 69  
43   - def __init__(self, children, line):
44   - super(OrRule, self).__init__(children, line)
  70 + def __init__(self, children):
  71 + super(OrRule, self).__init__(children)
  72 +
  73 + def _doAddToNFA(self, startState, endState):
  74 + for child in self.children:
  75 + intermStartState = RulesNFAState()
  76 + intermEndState = RulesNFAState()
  77 + startState.addTransition(None, intermStartState)
  78 + child._doAddToNFA(intermStartState, intermEndState)
  79 + intermEndState.addTransition(None, endState)
45 80  
46 81 class ZeroOrMoreRule(UnaryRule):
47 82  
48   - def __init__(self, child, line):
49   - super(ZeroOrMoreRule, self).__init__(child, line)
50   -
51   -class OneOrMoreRule(UnaryRule):
  83 + def __init__(self, child):
  84 + super(ZeroOrMoreRule, self).__init__(child)
  85 +
  86 + def addToNFA(self, fsa):
  87 + raise ValueError()
52 88  
53   - def __init__(self, child, line):
54   - super(OneOrMoreRule, self).__init__(child, line)
  89 + def _doAddToNFA(self, startState, endState):
  90 + intermStartState = RulesNFAState()
  91 + intermEndState = RulesNFAState()
  92 +
  93 + startState.addTransition(None, intermStartState)
  94 + startState.addTransition(None, endState)
  95 + self.child._doAddToNFA(intermStartState, intermEndState)
  96 + intermEndState.addTransition(None, endState)
  97 + endState.addTransition(None, intermStartState)
55 98  
56 99 class IgnoreOrthRule(UnaryRule):
57 100  
58   - def __init__(self, child, line):
59   - super(IgnoreOrthRule, self).__init__(child, line)
  101 + def __init__(self, child):
  102 + super(IgnoreOrthRule, self).__init__(child)
  103 +
  104 + def _doAddToNFA(self, startState, endState):
  105 + startState.addTransition(self.child.segnum, endState, ignoreOrth=True)
  106 +
... ...
fsabuilder/morfeuszbuilder/segrules/rulesNFA.py 0 → 100644
  1 +'''
  2 +Created on 24 sty 2014
  3 +
  4 +@author: mlenart
  5 +'''
  6 +
  7 +from morfeuszbuilder.fsa import fsa, state, encode
  8 +
  9 +class RulesNFAState(object):
  10 +
  11 + def __init__(self, initial=False, final=False):
  12 + self.transitionsMap = {}
  13 + self.initial = initial
  14 + self.final = final
  15 +
  16 + def addTransition(self, label, targetState, ignoreOrth=False):
  17 + assert not ignoreOrth or label is not None
  18 + self.transitionsMap.setdefault((label, ignoreOrth), set())
  19 + self.transitionsMap[(label, ignoreOrth)].add(targetState)
  20 +
  21 +class RulesNFA(object):
  22 +
  23 + def __init__(self, key2Def={}):
  24 + self.initialState = RulesNFAState(initial=True)
  25 +
  26 + def _doConvertState(self, dfaState, nfaStates):
  27 + for label, (nextIgnoreOrth, nextNFAStates) in self._groupOutputByLabels(nfaStates).iteritems():
  28 + nextDFAState = state.State(additionalData=nextIgnoreOrth)
  29 + dfaState.setTransition(label, nextDFAState)
  30 + dfaState.encodedData = bytearray()
  31 + self._doConvertState(nextDFAState, nextNFAStates)
  32 +
  33 + def convertToDFA(self):
  34 + dfa = fsa.FSA(encoder=None, encodeWords=False)
  35 + startStates = self.initialState.getClosure()
  36 + assert not any(filter(lambda s: s.final, startStates))
  37 + dfa.initialState = state.State(additionalData=False)
  38 + self._doConvertState(dfa.initialState, startStates)
  39 +
  40 +
0 41 \ No newline at end of file
... ...
fsabuilder/morfeuszbuilder/segrules/rulesParser.py
1 1  
2 2 from pyparsing import *
  3 +ParserElement.enablePackrat()
3 4 from morfeuszbuilder.tagset import segtypes
4   -from morfeuszbuilder.utils import configFile
5   -from morfeuszbuilder.segrules import preprocessor
  5 +from morfeuszbuilder.utils import configFile, exceptions
  6 +from morfeuszbuilder.segrules import preprocessor, rules
6 7 import codecs
7 8 import re
8 9  
9 10 import itertools
10 11 import logging
11   -import segsfsa
12   -
13   -# header = Suppress('[') + Word(alphas, bodyChars=alphanums+'_') + Suppress(']')
14   -# define = Keyword('#define').suppress() + identifier + Optional(Suppress('(') + identifier + Suppress(')')) + restOfLine + LineEnd() + StringEnd()
15   -# ifdef = Keyword('#ifdef').suppress() + identifier + LineEnd() + StringEnd()
16   -# endif = Keyword('#endif').suppress() + LineEnd() + StringEnd()
17   -
18   -def doprint(toks):
19   - print toks
  12 +from morfeuszbuilder.segrules import rulesNFA
20 13  
21 14 class RulesParser(object):
22 15  
... ... @@ -31,7 +24,7 @@ class RulesParser(object):
31 24 key, defs = lineToParse.parseString(line)
32 25 res[key] = tuple(defs)
33 26 except Exception as ex:
34   - raise configFile.ConfigFileException(segtypesConfigFile.filename, lineNum, u'Error in [options] section: %s' % str(ex))
  27 + raise exceptions.ConfigFileException(segtypesConfigFile.filename, lineNum, u'Error in [options] section: %s' % str(ex))
35 28 return res
36 29  
37 30 def parse(self, filename):
... ... @@ -48,12 +41,12 @@ class RulesParser(object):
48 41  
49 42 for defs in itertools.product(*key2Defs.values()):
50 43 key2Def = dict([(def2Key[define], define) for define in defs])
51   - fsa = segsfsa.SegmentsFSA(key2Def)
  44 + nfa = rulesNFA.RulesNFA(key2Def)
52 45 combinationEnumeratedLines = segtypesConfigFile.enumerateLinesInSection('combinations')
53 46 combinationEnumeratedLines = list(preprocessor.preprocess(combinationEnumeratedLines, defs))
54 47 for rule in self._doParse(combinationEnumeratedLines, segtypesHelper):
55   - fsa.addSegmentRule(rule)
56   - res.append(fsa)
  48 + rule.addToNFA(nfa)
  49 + res.append(nfa)
57 50 return res
58 51  
59 52 def _doParse(self, combinationEnumeratedLines, segtypesHelper):
... ... @@ -61,6 +54,12 @@ class RulesParser(object):
61 54 if not line.startswith('#'):
62 55 yield self._doParseOneLine(lineNum, line, segtypesHelper)
63 56  
  57 + def _createNewTagRule(self, segtype, lineNum, line, segtypesHelper):
  58 + if not segtypesHelper.hasSegtype(segtype):
  59 + raise exceptions.ConfigFileException(segtypesHelper.filename, lineNum, u'%s - invalid segment type: %s' % (line, segtype))
  60 + else:
  61 + return rules.TagRule(segtypesHelper.getSegnum4Segtype(segtype))
  62 +
64 63 def _doParseOneLine(self, lineNum, line, segtypesHelper):
65 64 rule = Forward()
66 65 tagRule = Word(alphanums+'_')
... ... @@ -74,9 +73,21 @@ class RulesParser(object):
74 73 complexRule = unaryRule ^ oneOfRule
75 74 concatRule = OneOrMore(complexRule)
76 75 rule << concatRule
  76 +
  77 + tagRule.setParseAction(lambda string, loc, toks: self._createNewTagRule(toks[0], lineNum, line, segtypesHelper))
  78 + ignoreOrthRule.setParseAction(lambda string, loc, toks: rules.IgnoreOrthRule(toks[0]))
  79 +# parenRule.setParseAction(lambda string, loc, toks: toks[0])
  80 + zeroOrMoreRule.setParseAction(lambda string, loc, toks: rules.ZeroOrMoreRule(toks[0]))
  81 + oneOrMoreRule.setParseAction(lambda string, loc, toks: rules.ConcatRule([toks[0], rules.ZeroOrMoreRule(toks[0])]))
  82 + oneOfRule.setParseAction(lambda string, loc, toks: rules.OrRule(toks))
  83 + concatRule.setParseAction(lambda string, loc, toks: toks[0] if len(toks) == 1 else rules.ConcatRule(toks))
  84 +
  85 +
77 86 # rule << tagRule ^ ignoreOrthRule ^ zeroOrMoreRule ^ oneOrMoreRule ^ orRule ^ concatRule ^ parenRule
78 87  
79 88 # tagRule.setParseAction(lambda s,l,toks: doprint(toks))
80 89 # print lineNum, line
81   - parsedLine = rule.parseString(line, parseAll=True)
  90 + parsedRule = rule.parseString(line, parseAll=True)[0]
  91 + print parsedRule
  92 + return parsedRule
82 93 # print parsedLine
... ...
fsabuilder/morfeuszbuilder/segrules/segsfsa.py deleted
1   -'''
2   -Created on 24 sty 2014
3   -
4   -@author: mlenart
5   -'''
6   -
7   -class SegmentsFSAState(object):
8   -
9   - def __init__(self):
10   - self.transitionsMap = {}
11   -
12   - def addSegmentRule(self, segmentRule):
13   - pass
14   -
15   -class SegmentsFSA(object):
16   -
17   - def __init__(self, key2Def={}):
18   - self.initialState = SegmentsFSAState()
19   -
20   - def addSegmentRule(self, segmentRule):
21   - self.initialState.addSegmentRule(segmentRule)
22   -
23   - def serialize(self):
24   - res = bytearray()
25   - return res
26   -
27   -
28 0 \ No newline at end of file
fsabuilder/morfeuszbuilder/segrules/test/parserTest.py
... ... @@ -9,9 +9,11 @@ from morfeuszbuilder.segrules import rulesParser
9 9 from morfeuszbuilder.tagset import tagset
10 10  
11 11 class Test(unittest.TestCase):
  12 + print 'do test'
12 13 t = tagset.Tagset(os.path.join(os.path.dirname(__file__), 'polimorf.tagset'))
13 14 parser = rulesParser.RulesParser(t)
14 15 parser.parse(os.path.join(os.path.dirname(__file__), 'segmenty.dat'))
  16 + print 'done'
15 17  
16 18 if __name__ == "__main__":
17 19 unittest.main()
... ...
fsabuilder/morfeuszbuilder/segrules/test/segmenty.dat
... ... @@ -3,7 +3,6 @@ aggl=permissive strict isolated
3 3 praet=split composite
4 4  
5 5 [combinations]
6   -(dupa|dupa)
7 6 #define wsz_interp (interp|kropka|dywiz)*
8 7  
9 8 #define moze_interp(segmenty) wsz_interp segmenty wsz_interp
... ...
fsabuilder/morfeuszbuilder/tagset/segtypes.py
... ... @@ -4,80 +4,85 @@ Created on 17 lut 2014
4 4 @author: mlenart
5 5 '''
6 6 import re
  7 +from morfeuszbuilder.utils import exceptions
7 8  
8 9 class Segtypes(object):
9 10  
10   - def __init__(self, tagset, segrulesFile):
  11 + def __init__(self, tagset, segrulesConfigFile):
11 12  
12 13 self.tagset = tagset
13 14  
14   - self.segrulesConfigFile = segrulesFile
  15 + self.filename = segrulesConfigFile.filename
15 16  
16 17 self.segtype2Segnum = {}
17 18 self.patternsList = []
  19 + self._readLexemes(segrulesConfigFile)
  20 + self._readTags(segrulesConfigFile)
  21 +
  22 + def _validate(self, msg, lineNum, cond):
  23 + if not cond:
  24 + raise exceptions.ConfigFileException(self.filename, lineNum, msg)
  25 +
  26 + def _readTags(self, segrulesConfigFile):
  27 + for lineNum, line in segrulesConfigFile.enumerateLinesInSection('tags'):
  28 + print lineNum, line
  29 + splitLine = re.split(r'\s+', line.strip())
  30 + self._validate(
  31 + u'Line in [tags] section must contain exactly two fields - segment type and tag pattern',
  32 + lineNum,
  33 + len(splitLine) == 2)
  34 + segtype, pattern = splitLine
  35 + self._validate(
  36 + u'Segment type must be a lowercase alphanumeric with optional underscores',
  37 + lineNum,
  38 + re.match(r'[a-z_]+', segtype))
  39 + self._validate(
  40 + u'Pattern must contain only ":", "%", "." and lowercase alphanumeric letters',
  41 + lineNum,
  42 + re.match(r'[a-z_\.\:\%]+', pattern))
  43 +
  44 + if segtype in self.segtype2Segnum:
  45 + segnum = self.segtype2Segnum[segtype]
  46 + else:
  47 + segnum = len(self.segtype2Segnum)
  48 + self.segtype2Segnum[segtype] = segnum
  49 +
  50 + self.patternsList.append(SegtypePattern(None, pattern, segnum))
  51 +
  52 + def _readLexemes(self, segrulesConfigFile):
  53 + for lineNum, line in segrulesConfigFile.enumerateLinesInSection('lexemes'):
  54 + segtype, pattern = line.strip().split('\t')
  55 + self._validate(
  56 + u'Segment type must be a lowercase alphanumeric with optional underscores',
  57 + lineNum,
  58 + re.match(r'[a-z_]+', segtype))
  59 + self._validate(
  60 + u'Pattern must contain lemma and POS',
  61 + lineNum,
  62 + re.match(r'.+\:[a-z_]+', pattern, re.U))
  63 +
  64 + if segtype in self.segtype2Segnum:
  65 + segnum = self.segtype2Segnum[segtype]
  66 + else:
  67 + segnum = len(self.segtype2Segnum)
  68 + self.segtype2Segnum[segtype] = segnum
  69 +
  70 + lemma, pos = pattern.split(':')
  71 +
  72 + self.patternsList.append(SegtypePattern(lemma, pos + ':%', segnum))
18 73  
19   - def readTags(self, lines):
20   - inTags = False
21   - for lineNum, line in enumerate(lines, start=1):
22   - header = self._getHeaderValue(line, lineNum)
23   - if header == 'tags':
24   - inTags = True
25   - elif header:
26   - inTags = False
27   - elif inTags:
28   - segtype, pattern = line.strip().split('\t')
29   - self._validate(
30   - u'Segment type must be a lowercase alphanumeric with optional underscores',
31   - lineNum,
32   - re.match(r'[a-z_]+', segtype))
33   - self._validate(
34   - u'Pattern must contain only ":", "%", "." and lowercase alphanumeric letters',
35   - lineNum,
36   - re.match(r'[a-z_\.\:\%]+', pattern))
37   -
38   - if segtype in self.segtype2Segnum:
39   - segnum = self.segtype2Segnum[segtype]
40   - else:
41   - segnum = len(self.segtype2Segnum)
42   - self.segtype2Segnum[segtype] = segnum
43   -
44   - self.patternsList.append(SegtypePattern(None, pattern, segnum))
  74 + def hasSegtype(self, segTypeString):
  75 + return segTypeString in self.segtype2Segnum
45 76  
46   - def readLexemes(self, lines):
47   - inLexemes = False
48   - for lineNum, line in enumerate(lines, start=1):
49   - header = self._getHeaderValue(line, lineNum)
50   - if header == 'lexemes':
51   - inLexemes = True
52   - elif header:
53   - inLexemes = False
54   - elif inLexemes:
55   - segtype, pattern = line.strip().split('\t')
56   - self._validate(
57   - u'Segment type must be a lowercase alphanumeric with optional underscores',
58   - lineNum,
59   - re.match(r'[a-z_]+', segtype))
60   - self._validate(
61   - u'Pattern must contain lemma and POS',
62   - lineNum,
63   - re.match(r'\w+\:[a-z_]+', pattern, re.U))
64   -
65   - if segtype in self.segtype2Segnum:
66   - segnum = self.segtype2Segnum[segtype]
67   - else:
68   - segnum = len(self.segtype2Segnum)
69   - self.segtype2Segnum[segtype] = segnum
70   -
71   - lemma, pos = pattern.split(':')
72   -
73   - self.patternsList.append(SegtypePattern(lemma, pos + ':%', segnum))
  77 + def getSegnum4Segtype(self, segTypeString):
  78 + return self.segtype2Segnum[segTypeString]
74 79  
75 80 def lexeme2Segnum(self, lemma, tag):
76 81 for p in self.patternsList:
77 82 res = p.tryToMatch(lemma, tag)
78 83 if res >= 0:
79 84 return res
80   - raise SegtypesException('Cannot find segment type for given tag: %s' % tag)
  85 + return None
81 86  
82 87 class SegtypePattern(object):
83 88  
... ... @@ -92,11 +97,3 @@ class SegtypePattern(object):
92 97 return self.segnum
93 98 else:
94 99 return -1
95   -
96   -class SegtypesException(Exception):
97   -
98   - def __init__(self, msg):
99   - self.msg = msg
100   -
101   - def __str__(self):
102   - return u'Error in segment rules: %s' % self.msg
... ...
fsabuilder/morfeuszbuilder/tagset/tagset.py
... ... @@ -12,10 +12,11 @@ class Tagset(object):
12 12 NAMES = 2
13 13 SEP = '\t'
14 14  
15   - def __init__(self, filename, encoding='utf8'):
  15 + def __init__(self, filename=None, encoding='utf8'):
16 16 self.tag2tagnum = {}
17 17 self.name2namenum = {}
18   - self._doInit(filename, encoding)
  18 + if filename:
  19 + self._doInit(filename, encoding)
19 20 self.tagnum2tag = dict(map(lambda (k, v): (v, k), self.tag2tagnum.iteritems()))
20 21  
21 22 def _doInit(self, filename, encoding):
... ... @@ -37,4 +38,4 @@ class Tagset(object):
37 38 res[tag] = int(tagNum)
38 39  
39 40 def getTag4Tagnum(self, tagnum):
40   - return self.tagnum2tag[tagnum]
41 41 \ No newline at end of file
  42 + return self.tagnum2tag[tagnum]
... ...
fsabuilder/morfeuszbuilder/utils/configFile.py
... ... @@ -6,6 +6,7 @@ Created on 18 lut 2014
6 6  
7 7 import re
8 8 import codecs
  9 +import exceptions
9 10  
10 11 def getHeaderValue(line, lineNum):
11 12 m = re.match(ur'\s*\[(.*?)\]\s*(\#.*)?', line)
... ... @@ -25,9 +26,9 @@ class ConfigFile(object):
25 26  
26 27 def _addSectionStart(self, sectionName, lineNum):
27 28 if not sectionName in self.sectionNames:
28   - raise ConfigFileException(self.filename, lineNum, 'Invalid section: %s' % sectionName)
  29 + raise exceptions.ConfigFileException(self.filename, lineNum, 'Invalid section: %s' % sectionName)
29 30 if sectionName in self.section2Lines:
30   - raise ConfigFileException(self.filename, lineNum, 'Duplicate section: %s' % sectionName)
  31 + raise exceptions.ConfigFileException(self.filename, lineNum, 'Duplicate section: %s' % sectionName)
31 32 self.section2Lines[sectionName] = []
32 33 self.currSection = sectionName
33 34  
... ... @@ -35,7 +36,7 @@ class ConfigFile(object):
35 36 line = line.strip()
36 37 if line:
37 38 if self.currSection is None and not line.startswith('#'):
38   - raise ConfigFileException(self.filename, lineNum, 'Text outside of any section')
  39 + raise exceptions.ConfigFileException(self.filename, lineNum, 'Text outside of any section')
39 40 self.section2Lines[self.currSection].append((lineNum, line))
40 41  
41 42 def _getHeaderValue(self, line, lineNum):
... ... @@ -56,13 +57,3 @@ class ConfigFile(object):
56 57 self._addSectionStart(header, lineNum)
57 58 else:
58 59 self._addLine(line, lineNum)
59   -
60   -class ConfigFileException(Exception):
61   -
62   - def __init__(self, filename, lineNum, msg):
63   - self.filename = filename
64   - self.lineNum = lineNum
65   - self.msg = msg
66   -
67   - def __str__(self):
68   - return u'%s:%d - %s' % (self.filename, self.lineNum, self.msg)
... ...
fsabuilder/morfeuszbuilder/utils/exceptions.py 0 → 100644
  1 +'''
  2 +Created on Feb 19, 2014
  3 +
  4 +@author: lennyn
  5 +'''
  6 +
  7 +class FSABuilderException(Exception):
  8 + '''
  9 + Exception in configFile module
  10 + '''
  11 +
  12 + def __init__(self, msg):
  13 + self.msg = msg
  14 +
  15 + def __str__(self):
  16 + return 'Failed to create FSA files: ' + self.msg
  17 +
  18 +class SegtypesException(FSABuilderException):
  19 +
  20 + def __init__(self, msg):
  21 + self.msg = msg
  22 +
  23 + def __str__(self):
  24 + return u'Error in segment rules: %s' % self.msg
  25 +
  26 +class ConfigFileException(FSABuilderException):
  27 +
  28 + def __init__(self, filename, lineNum, msg):
  29 + self.filename = filename
  30 + self.lineNum = lineNum
  31 + self.msg = msg
  32 +
  33 + def __str__(self):
  34 + return u'%s:%d - %s' % (self.filename, self.lineNum, self.msg)
... ...