From 8d5a878e6650c4130784b81026b903e2ffd965c8 Mon Sep 17 00:00:00 2001
From: Michał Lenart <michall@ipipan.waw.pl>
Date: Wed, 19 Feb 2014 22:06:53 +0000
Subject: [PATCH] - praca nad budowaniem automatu dla zlepiacza segmentów
---
fsabuilder/buildfsa.spec | 22 ++++++++++++++++++++++
fsabuilder/morfeuszbuilder/fsa/fsa.py | 5 +++--
fsabuilder/morfeuszbuilder/fsa/state.py | 3 ++-
fsabuilder/morfeuszbuilder/segrules/preprocessor.py | 5 -----
fsabuilder/morfeuszbuilder/segrules/rules.py | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
fsabuilder/morfeuszbuilder/segrules/rulesNFA.py | 40 ++++++++++++++++++++++++++++++++++++++++
fsabuilder/morfeuszbuilder/segrules/rulesParser.py | 43 +++++++++++++++++++++++++++----------------
fsabuilder/morfeuszbuilder/segrules/segsfsa.py | 27 ---------------------------
fsabuilder/morfeuszbuilder/segrules/test/parserTest.py | 2 ++
fsabuilder/morfeuszbuilder/segrules/test/segmenty.dat | 1 -
fsabuilder/morfeuszbuilder/tagset/segtypes.py | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------------------------------------------
fsabuilder/morfeuszbuilder/tagset/tagset.py | 7 ++++---
fsabuilder/morfeuszbuilder/utils/configFile.py | 17 ++++-------------
fsabuilder/morfeuszbuilder/utils/exceptions.py | 34 ++++++++++++++++++++++++++++++++++
14 files changed, 266 insertions(+), 152 deletions(-)
create mode 100644 fsabuilder/buildfsa.spec
create mode 100644 fsabuilder/morfeuszbuilder/segrules/rulesNFA.py
delete mode 100644 fsabuilder/morfeuszbuilder/segrules/segsfsa.py
create mode 100644 fsabuilder/morfeuszbuilder/utils/exceptions.py
diff --git a/fsabuilder/buildfsa.spec b/fsabuilder/buildfsa.spec
new file mode 100644
index 0000000..be8cd20
--- /dev/null
+++ b/fsabuilder/buildfsa.spec
@@ -0,0 +1,22 @@
+# -*- mode: python -*-
+a = Analysis(['fsa/buildfsa.py'],
+ pathex=['/home/lennyn/xxx/morfeusz/fsabuilder'],
+ hiddenimports=[],
+ hookspath=None,
+ runtime_hooks=None)
+pyz = PYZ(a.pure)
+exe = EXE(pyz,
+ a.scripts,
+ exclude_binaries=True,
+ name='buildfsa',
+ debug=False,
+ strip=None,
+ upx=True,
+ console=True )
+coll = COLLECT(exe,
+ a.binaries,
+ a.zipfiles,
+ a.datas,
+ strip=None,
+ upx=True,
+ name='buildfsa')
diff --git a/fsabuilder/morfeuszbuilder/fsa/fsa.py b/fsabuilder/morfeuszbuilder/fsa/fsa.py
index 83b0837..7f94fc0 100644
--- a/fsabuilder/morfeuszbuilder/fsa/fsa.py
+++ b/fsabuilder/morfeuszbuilder/fsa/fsa.py
@@ -14,11 +14,12 @@ class FSA(object):
'''
- def __init__(self, encoder, tagset=None, encodeData=True):
- self.encodeWord = encoder.encodeWord
+ def __init__(self, encoder, tagset=None, encodeData=True, encodeWords=True):
+ self.encodeWord = encoder.encodeWord if encodeWords else lambda x: x
self.encodeData = encoder.encodeData if encodeData else lambda x: x
self.decodeData = encoder.decodeData if encodeData else lambda x: x
self.encodedPrevWord = None
+
self.tagset = tagset
self.initialState = state.State()
self.register = register.Register()
diff --git a/fsabuilder/morfeuszbuilder/fsa/state.py b/fsabuilder/morfeuszbuilder/fsa/state.py
index 66873e9..1ae33ea 100644
--- a/fsabuilder/morfeuszbuilder/fsa/state.py
+++ b/fsabuilder/morfeuszbuilder/fsa/state.py
@@ -9,7 +9,7 @@ class State(object):
A state in an automaton
'''
- def __init__(self):
+ def __init__(self, additionalData=None):
self.transitionsMap = {}
self.freq = 0
self.encodedData = None
@@ -17,6 +17,7 @@ class State(object):
self.offset = None
self.label2Freq = {}
self.serializeAsArray = False
+ self.additionalData = additionalData
@property
def transitionsNum(self):
diff --git a/fsabuilder/morfeuszbuilder/segrules/preprocessor.py b/fsabuilder/morfeuszbuilder/segrules/preprocessor.py
index b48005b..1e3250b 100644
--- a/fsabuilder/morfeuszbuilder/segrules/preprocessor.py
+++ b/fsabuilder/morfeuszbuilder/segrules/preprocessor.py
@@ -34,11 +34,6 @@ class ArgDefine(object):
def __str__(self):
return '%s(%s) %s' % (self.name, self.arg, self.val)
-class PreprocessorException(Exception):
-
- def __init__(self, msg, line):
- pass
-
def _tryToSubstituteArgDefine(s, t, defines):
defineName = t[0]
substituteValue = t[1]
diff --git a/fsabuilder/morfeuszbuilder/segrules/rules.py b/fsabuilder/morfeuszbuilder/segrules/rules.py
index a929c19..1376a9c 100644
--- a/fsabuilder/morfeuszbuilder/segrules/rules.py
+++ b/fsabuilder/morfeuszbuilder/segrules/rules.py
@@ -4,6 +4,8 @@ Created on 24 sty 2014
@author: mlenart
'''
+from morfeuszbuilder.segrules.rulesNFA import RulesNFAState
+
class SegmentRule(object):
'''
classdocs
@@ -14,46 +16,91 @@ class SegmentRule(object):
'''
Constructor
'''
+
+ def addToNFA(self, fsa):
+ raise NotImplementedError()
+
+ def _doAddToNFA(self, startStates, endState):
+ raise NotImplementedError()
class TagRule(SegmentRule):
- def __init__(self, tagType, line):
- self.tagType = tagType
- self.line = line
+ def __init__(self, segnum):
+ self.segnum = segnum
+
+ def addToNFA(self, fsa):
+ endState = RulesNFAState(final=True)
+ self._doAddToNFA(fsa.initialState, endState)
+
+ def _doAddToNFA(self, startState, endState):
+ startState.addTransition(self.segnum, endState)
class UnaryRule(SegmentRule):
- def __init__(self, child, line):
+ def __init__(self, child):
self.child = child
- self.line = line
class ComplexRule(SegmentRule):
- def __init__(self, children, line):
+ def __init__(self, children):
self.children = children
- self.line = line
+
+ def addToNFA(self, fsa):
+ endState = RulesNFAState(final=True)
+ self._doAddToNFA(fsa.initialState, endState)
class ConcatRule(ComplexRule):
- def __init__(self, children, line):
- super(ConcatRule, self).__init__(children, line)
+ def __init__(self, children):
+ super(ConcatRule, self).__init__(children)
+
+ def _doAddToNFA(self, startState, endState):
+ currStartState = startState
+ for child in self.children[:-1]:
+ currEndState = RulesNFAState()
+ child._doAddToNFA(currStartState, currEndState)
+ nextStartState = RulesNFAState()
+ currEndState.addTransition(None, nextStartState)
+ currStartState = nextStartState
+ lastChild = self.children[-1]
+ lastChild._doAddToNFA(currStartState, endState)
class OrRule(ComplexRule):
- def __init__(self, children, line):
- super(OrRule, self).__init__(children, line)
+ def __init__(self, children):
+ super(OrRule, self).__init__(children)
+
+ def _doAddToNFA(self, startState, endState):
+ for child in self.children:
+ intermStartState = RulesNFAState()
+ intermEndState = RulesNFAState()
+ startState.addTransition(None, intermStartState)
+ child._doAddToNFA(intermStartState, intermEndState)
+ intermEndState.addTransition(None, endState)
class ZeroOrMoreRule(UnaryRule):
- def __init__(self, child, line):
- super(ZeroOrMoreRule, self).__init__(child, line)
-
-class OneOrMoreRule(UnaryRule):
+ def __init__(self, child):
+ super(ZeroOrMoreRule, self).__init__(child)
+
+ def addToNFA(self, fsa):
+ raise ValueError()
- def __init__(self, child, line):
- super(OneOrMoreRule, self).__init__(child, line)
+ def _doAddToNFA(self, startState, endState):
+ intermStartState = RulesNFAState()
+ intermEndState = RulesNFAState()
+
+ startState.addTransition(None, intermStartState)
+ startState.addTransition(None, endState)
+ self.child._doAddToNFA(intermStartState, intermEndState)
+ intermEndState.addTransition(None, endState)
+ endState.addTransition(None, intermStartState)
class IgnoreOrthRule(UnaryRule):
- def __init__(self, child, line):
- super(IgnoreOrthRule, self).__init__(child, line)
+ def __init__(self, child):
+ super(IgnoreOrthRule, self).__init__(child)
+
+ def _doAddToNFA(self, startState, endState):
+ startState.addTransition(self.child.segnum, endState, ignoreOrth=True)
+
diff --git a/fsabuilder/morfeuszbuilder/segrules/rulesNFA.py b/fsabuilder/morfeuszbuilder/segrules/rulesNFA.py
new file mode 100644
index 0000000..56c59ce
--- /dev/null
+++ b/fsabuilder/morfeuszbuilder/segrules/rulesNFA.py
@@ -0,0 +1,40 @@
+'''
+Created on 24 sty 2014
+
+@author: mlenart
+'''
+
+from morfeuszbuilder.fsa import fsa, state, encode
+
+class RulesNFAState(object):
+
+ def __init__(self, initial=False, final=False):
+ self.transitionsMap = {}
+ self.initial = initial
+ self.final = final
+
+ def addTransition(self, label, targetState, ignoreOrth=False):
+ assert not ignoreOrth or label is not None
+ self.transitionsMap.setdefault((label, ignoreOrth), set())
+ self.transitionsMap[(label, ignoreOrth)].add(targetState)
+
+class RulesNFA(object):
+
+ def __init__(self, key2Def={}):
+ self.initialState = RulesNFAState(initial=True)
+
+ def _doConvertState(self, dfaState, nfaStates):
+ for label, (nextIgnoreOrth, nextNFAStates) in self._groupOutputByLabels(nfaStates).iteritems():
+ nextDFAState = state.State(additionalData=nextIgnoreOrth)
+ dfaState.setTransition(label, nextDFAState)
+ dfaState.encodedData = bytearray()
+ self._doConvertState(nextDFAState, nextNFAStates)
+
+ def convertToDFA(self):
+ dfa = fsa.FSA(encoder=None, encodeWords=False)
+ startStates = self.initialState.getClosure()
+ assert not any(filter(lambda s: s.final, startStates))
+ dfa.initialState = state.State(additionalData=False)
+ self._doConvertState(dfa.initialState, startStates)
+
+
\ No newline at end of file
diff --git a/fsabuilder/morfeuszbuilder/segrules/rulesParser.py b/fsabuilder/morfeuszbuilder/segrules/rulesParser.py
index 22d97b4..398e6a6 100644
--- a/fsabuilder/morfeuszbuilder/segrules/rulesParser.py
+++ b/fsabuilder/morfeuszbuilder/segrules/rulesParser.py
@@ -1,22 +1,15 @@
from pyparsing import *
+ParserElement.enablePackrat()
from morfeuszbuilder.tagset import segtypes
-from morfeuszbuilder.utils import configFile
-from morfeuszbuilder.segrules import preprocessor
+from morfeuszbuilder.utils import configFile, exceptions
+from morfeuszbuilder.segrules import preprocessor, rules
import codecs
import re
import itertools
import logging
-import segsfsa
-
-# header = Suppress('[') + Word(alphas, bodyChars=alphanums+'_') + Suppress(']')
-# define = Keyword('#define').suppress() + identifier + Optional(Suppress('(') + identifier + Suppress(')')) + restOfLine + LineEnd() + StringEnd()
-# ifdef = Keyword('#ifdef').suppress() + identifier + LineEnd() + StringEnd()
-# endif = Keyword('#endif').suppress() + LineEnd() + StringEnd()
-
-def doprint(toks):
- print toks
+from morfeuszbuilder.segrules import rulesNFA
class RulesParser(object):
@@ -31,7 +24,7 @@ class RulesParser(object):
key, defs = lineToParse.parseString(line)
res[key] = tuple(defs)
except Exception as ex:
- raise configFile.ConfigFileException(segtypesConfigFile.filename, lineNum, u'Error in [options] section: %s' % str(ex))
+ raise exceptions.ConfigFileException(segtypesConfigFile.filename, lineNum, u'Error in [options] section: %s' % str(ex))
return res
def parse(self, filename):
@@ -48,12 +41,12 @@ class RulesParser(object):
for defs in itertools.product(*key2Defs.values()):
key2Def = dict([(def2Key[define], define) for define in defs])
- fsa = segsfsa.SegmentsFSA(key2Def)
+ nfa = rulesNFA.RulesNFA(key2Def)
combinationEnumeratedLines = segtypesConfigFile.enumerateLinesInSection('combinations')
combinationEnumeratedLines = list(preprocessor.preprocess(combinationEnumeratedLines, defs))
for rule in self._doParse(combinationEnumeratedLines, segtypesHelper):
- fsa.addSegmentRule(rule)
- res.append(fsa)
+ rule.addToNFA(nfa)
+ res.append(nfa)
return res
def _doParse(self, combinationEnumeratedLines, segtypesHelper):
@@ -61,6 +54,12 @@ class RulesParser(object):
if not line.startswith('#'):
yield self._doParseOneLine(lineNum, line, segtypesHelper)
+ def _createNewTagRule(self, segtype, lineNum, line, segtypesHelper):
+ if not segtypesHelper.hasSegtype(segtype):
+ raise exceptions.ConfigFileException(segtypesHelper.filename, lineNum, u'%s - invalid segment type: %s' % (line, segtype))
+ else:
+ return rules.TagRule(segtypesHelper.getSegnum4Segtype(segtype))
+
def _doParseOneLine(self, lineNum, line, segtypesHelper):
rule = Forward()
tagRule = Word(alphanums+'_')
@@ -74,9 +73,21 @@ class RulesParser(object):
complexRule = unaryRule ^ oneOfRule
concatRule = OneOrMore(complexRule)
rule << concatRule
+
+ tagRule.setParseAction(lambda string, loc, toks: self._createNewTagRule(toks[0], lineNum, line, segtypesHelper))
+ ignoreOrthRule.setParseAction(lambda string, loc, toks: rules.IgnoreOrthRule(toks[0]))
+# parenRule.setParseAction(lambda string, loc, toks: toks[0])
+ zeroOrMoreRule.setParseAction(lambda string, loc, toks: rules.ZeroOrMoreRule(toks[0]))
+ oneOrMoreRule.setParseAction(lambda string, loc, toks: rules.ConcatRule([toks[0], rules.ZeroOrMoreRule(toks[0])]))
+ oneOfRule.setParseAction(lambda string, loc, toks: rules.OrRule(toks))
+ concatRule.setParseAction(lambda string, loc, toks: toks[0] if len(toks) == 1 else rules.ConcatRule(toks))
+
+
# rule << tagRule ^ ignoreOrthRule ^ zeroOrMoreRule ^ oneOrMoreRule ^ orRule ^ concatRule ^ parenRule
# tagRule.setParseAction(lambda s,l,toks: doprint(toks))
# print lineNum, line
- parsedLine = rule.parseString(line, parseAll=True)
+ parsedRule = rule.parseString(line, parseAll=True)[0]
+ print parsedRule
+ return parsedRule
# print parsedLine
diff --git a/fsabuilder/morfeuszbuilder/segrules/segsfsa.py b/fsabuilder/morfeuszbuilder/segrules/segsfsa.py
deleted file mode 100644
index f060472..0000000
--- a/fsabuilder/morfeuszbuilder/segrules/segsfsa.py
+++ /dev/null
@@ -1,27 +0,0 @@
-'''
-Created on 24 sty 2014
-
-@author: mlenart
-'''
-
-class SegmentsFSAState(object):
-
- def __init__(self):
- self.transitionsMap = {}
-
- def addSegmentRule(self, segmentRule):
- pass
-
-class SegmentsFSA(object):
-
- def __init__(self, key2Def={}):
- self.initialState = SegmentsFSAState()
-
- def addSegmentRule(self, segmentRule):
- self.initialState.addSegmentRule(segmentRule)
-
- def serialize(self):
- res = bytearray()
- return res
-
-
\ No newline at end of file
diff --git a/fsabuilder/morfeuszbuilder/segrules/test/parserTest.py b/fsabuilder/morfeuszbuilder/segrules/test/parserTest.py
index 5b92392..f74556d 100644
--- a/fsabuilder/morfeuszbuilder/segrules/test/parserTest.py
+++ b/fsabuilder/morfeuszbuilder/segrules/test/parserTest.py
@@ -9,9 +9,11 @@ from morfeuszbuilder.segrules import rulesParser
from morfeuszbuilder.tagset import tagset
class Test(unittest.TestCase):
+ print 'do test'
t = tagset.Tagset(os.path.join(os.path.dirname(__file__), 'polimorf.tagset'))
parser = rulesParser.RulesParser(t)
parser.parse(os.path.join(os.path.dirname(__file__), 'segmenty.dat'))
+ print 'done'
if __name__ == "__main__":
unittest.main()
diff --git a/fsabuilder/morfeuszbuilder/segrules/test/segmenty.dat b/fsabuilder/morfeuszbuilder/segrules/test/segmenty.dat
index 7f1e14e..b55cbef 100644
--- a/fsabuilder/morfeuszbuilder/segrules/test/segmenty.dat
+++ b/fsabuilder/morfeuszbuilder/segrules/test/segmenty.dat
@@ -3,7 +3,6 @@ aggl=permissive strict isolated
praet=split composite
[combinations]
-(dupa|dupa)
#define wsz_interp (interp|kropka|dywiz)*
#define moze_interp(segmenty) wsz_interp segmenty wsz_interp
diff --git a/fsabuilder/morfeuszbuilder/tagset/segtypes.py b/fsabuilder/morfeuszbuilder/tagset/segtypes.py
index 254491e..24652a5 100644
--- a/fsabuilder/morfeuszbuilder/tagset/segtypes.py
+++ b/fsabuilder/morfeuszbuilder/tagset/segtypes.py
@@ -4,80 +4,85 @@ Created on 17 lut 2014
@author: mlenart
'''
import re
+from morfeuszbuilder.utils import exceptions
class Segtypes(object):
- def __init__(self, tagset, segrulesFile):
+ def __init__(self, tagset, segrulesConfigFile):
self.tagset = tagset
- self.segrulesConfigFile = segrulesFile
+ self.filename = segrulesConfigFile.filename
self.segtype2Segnum = {}
self.patternsList = []
+ self._readLexemes(segrulesConfigFile)
+ self._readTags(segrulesConfigFile)
+
+ def _validate(self, msg, lineNum, cond):
+ if not cond:
+ raise exceptions.ConfigFileException(self.filename, lineNum, msg)
+
+ def _readTags(self, segrulesConfigFile):
+ for lineNum, line in segrulesConfigFile.enumerateLinesInSection('tags'):
+ print lineNum, line
+ splitLine = re.split(r'\s+', line.strip())
+ self._validate(
+ u'Line in [tags] section must contain exactly two fields - segment type and tag pattern',
+ lineNum,
+ len(splitLine) == 2)
+ segtype, pattern = splitLine
+ self._validate(
+ u'Segment type must be a lowercase alphanumeric with optional underscores',
+ lineNum,
+ re.match(r'[a-z_]+', segtype))
+ self._validate(
+ u'Pattern must contain only ":", "%", "." and lowercase alphanumeric letters',
+ lineNum,
+ re.match(r'[a-z_\.\:\%]+', pattern))
+
+ if segtype in self.segtype2Segnum:
+ segnum = self.segtype2Segnum[segtype]
+ else:
+ segnum = len(self.segtype2Segnum)
+ self.segtype2Segnum[segtype] = segnum
+
+ self.patternsList.append(SegtypePattern(None, pattern, segnum))
+
+ def _readLexemes(self, segrulesConfigFile):
+ for lineNum, line in segrulesConfigFile.enumerateLinesInSection('lexemes'):
+ segtype, pattern = line.strip().split('\t')
+ self._validate(
+ u'Segment type must be a lowercase alphanumeric with optional underscores',
+ lineNum,
+ re.match(r'[a-z_]+', segtype))
+ self._validate(
+ u'Pattern must contain lemma and POS',
+ lineNum,
+ re.match(r'.+\:[a-z_]+', pattern, re.U))
+
+ if segtype in self.segtype2Segnum:
+ segnum = self.segtype2Segnum[segtype]
+ else:
+ segnum = len(self.segtype2Segnum)
+ self.segtype2Segnum[segtype] = segnum
+
+ lemma, pos = pattern.split(':')
+
+ self.patternsList.append(SegtypePattern(lemma, pos + ':%', segnum))
- def readTags(self, lines):
- inTags = False
- for lineNum, line in enumerate(lines, start=1):
- header = self._getHeaderValue(line, lineNum)
- if header == 'tags':
- inTags = True
- elif header:
- inTags = False
- elif inTags:
- segtype, pattern = line.strip().split('\t')
- self._validate(
- u'Segment type must be a lowercase alphanumeric with optional underscores',
- lineNum,
- re.match(r'[a-z_]+', segtype))
- self._validate(
- u'Pattern must contain only ":", "%", "." and lowercase alphanumeric letters',
- lineNum,
- re.match(r'[a-z_\.\:\%]+', pattern))
-
- if segtype in self.segtype2Segnum:
- segnum = self.segtype2Segnum[segtype]
- else:
- segnum = len(self.segtype2Segnum)
- self.segtype2Segnum[segtype] = segnum
-
- self.patternsList.append(SegtypePattern(None, pattern, segnum))
+ def hasSegtype(self, segTypeString):
+ return segTypeString in self.segtype2Segnum
- def readLexemes(self, lines):
- inLexemes = False
- for lineNum, line in enumerate(lines, start=1):
- header = self._getHeaderValue(line, lineNum)
- if header == 'lexemes':
- inLexemes = True
- elif header:
- inLexemes = False
- elif inLexemes:
- segtype, pattern = line.strip().split('\t')
- self._validate(
- u'Segment type must be a lowercase alphanumeric with optional underscores',
- lineNum,
- re.match(r'[a-z_]+', segtype))
- self._validate(
- u'Pattern must contain lemma and POS',
- lineNum,
- re.match(r'\w+\:[a-z_]+', pattern, re.U))
-
- if segtype in self.segtype2Segnum:
- segnum = self.segtype2Segnum[segtype]
- else:
- segnum = len(self.segtype2Segnum)
- self.segtype2Segnum[segtype] = segnum
-
- lemma, pos = pattern.split(':')
-
- self.patternsList.append(SegtypePattern(lemma, pos + ':%', segnum))
+ def getSegnum4Segtype(self, segTypeString):
+ return self.segtype2Segnum[segTypeString]
def lexeme2Segnum(self, lemma, tag):
for p in self.patternsList:
res = p.tryToMatch(lemma, tag)
if res >= 0:
return res
- raise SegtypesException('Cannot find segment type for given tag: %s' % tag)
+ return None
class SegtypePattern(object):
@@ -92,11 +97,3 @@ class SegtypePattern(object):
return self.segnum
else:
return -1
-
-class SegtypesException(Exception):
-
- def __init__(self, msg):
- self.msg = msg
-
- def __str__(self):
- return u'Error in segment rules: %s' % self.msg
diff --git a/fsabuilder/morfeuszbuilder/tagset/tagset.py b/fsabuilder/morfeuszbuilder/tagset/tagset.py
index 2599918..cde6fb2 100644
--- a/fsabuilder/morfeuszbuilder/tagset/tagset.py
+++ b/fsabuilder/morfeuszbuilder/tagset/tagset.py
@@ -12,10 +12,11 @@ class Tagset(object):
NAMES = 2
SEP = '\t'
- def __init__(self, filename, encoding='utf8'):
+ def __init__(self, filename=None, encoding='utf8'):
self.tag2tagnum = {}
self.name2namenum = {}
- self._doInit(filename, encoding)
+ if filename:
+ self._doInit(filename, encoding)
self.tagnum2tag = dict(map(lambda (k, v): (v, k), self.tag2tagnum.iteritems()))
def _doInit(self, filename, encoding):
@@ -37,4 +38,4 @@ class Tagset(object):
res[tag] = int(tagNum)
def getTag4Tagnum(self, tagnum):
- return self.tagnum2tag[tagnum]
\ No newline at end of file
+ return self.tagnum2tag[tagnum]
diff --git a/fsabuilder/morfeuszbuilder/utils/configFile.py b/fsabuilder/morfeuszbuilder/utils/configFile.py
index 53e29fb..2e4c4af 100644
--- a/fsabuilder/morfeuszbuilder/utils/configFile.py
+++ b/fsabuilder/morfeuszbuilder/utils/configFile.py
@@ -6,6 +6,7 @@ Created on 18 lut 2014
import re
import codecs
+import exceptions
def getHeaderValue(line, lineNum):
m = re.match(ur'\s*\[(.*?)\]\s*(\#.*)?', line)
@@ -25,9 +26,9 @@ class ConfigFile(object):
def _addSectionStart(self, sectionName, lineNum):
if not sectionName in self.sectionNames:
- raise ConfigFileException(self.filename, lineNum, 'Invalid section: %s' % sectionName)
+ raise exceptions.ConfigFileException(self.filename, lineNum, 'Invalid section: %s' % sectionName)
if sectionName in self.section2Lines:
- raise ConfigFileException(self.filename, lineNum, 'Duplicate section: %s' % sectionName)
+ raise exceptions.ConfigFileException(self.filename, lineNum, 'Duplicate section: %s' % sectionName)
self.section2Lines[sectionName] = []
self.currSection = sectionName
@@ -35,7 +36,7 @@ class ConfigFile(object):
line = line.strip()
if line:
if self.currSection is None and not line.startswith('#'):
- raise ConfigFileException(self.filename, lineNum, 'Text outside of any section')
+ raise exceptions.ConfigFileException(self.filename, lineNum, 'Text outside of any section')
self.section2Lines[self.currSection].append((lineNum, line))
def _getHeaderValue(self, line, lineNum):
@@ -56,13 +57,3 @@ class ConfigFile(object):
self._addSectionStart(header, lineNum)
else:
self._addLine(line, lineNum)
-
-class ConfigFileException(Exception):
-
- def __init__(self, filename, lineNum, msg):
- self.filename = filename
- self.lineNum = lineNum
- self.msg = msg
-
- def __str__(self):
- return u'%s:%d - %s' % (self.filename, self.lineNum, self.msg)
diff --git a/fsabuilder/morfeuszbuilder/utils/exceptions.py b/fsabuilder/morfeuszbuilder/utils/exceptions.py
new file mode 100644
index 0000000..494eef2
--- /dev/null
+++ b/fsabuilder/morfeuszbuilder/utils/exceptions.py
@@ -0,0 +1,34 @@
+'''
+Created on Feb 19, 2014
+
+@author: lennyn
+'''
+
+class FSABuilderException(Exception):
+ '''
+ Exception in configFile module
+ '''
+
+ def __init__(self, msg):
+ self.msg = msg
+
+ def __str__(self):
+ return 'Failed to create FSA files: ' + self.msg
+
+class SegtypesException(FSABuilderException):
+
+ def __init__(self, msg):
+ self.msg = msg
+
+ def __str__(self):
+ return u'Error in segment rules: %s' % self.msg
+
+class ConfigFileException(FSABuilderException):
+
+ def __init__(self, filename, lineNum, msg):
+ self.filename = filename
+ self.lineNum = lineNum
+ self.msg = msg
+
+ def __str__(self):
+ return u'%s:%d - %s' % (self.filename, self.lineNum, self.msg)
--
libgit2 0.22.2