Commit 3fa5c3388953574a4614746fd748465ff9656f25

Authored by Michał Lenart
1 parent bbe81abc

dodane łączenie identycznych wierszy w jeden

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@142 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
fsabuilder/morfeuszbuilder/fsa/convertinput.py
... ... @@ -121,15 +121,17 @@ class PolimorfConverter4Generator(object):
121 121 return sorted(inputLines, key=lambda line: (self.encoder.word2SortKey(line.split(' ')[1].decode('utf8')), line))
122 122  
123 123 def _reallyParseLines(self, inputLines):
  124 + prevLine = None
124 125 for line in inputLines:
125 126 line = line.decode(self.inputEncoding).strip(u'\n')
126   - if line:
  127 + if line and line != prevLine:
127 128 orth, base, tagnum, namenum, typenum, homonymId = line.split(u' ')
128 129 # print orth.encode('utf8'), base.encode('utf8'), homonymId
129 130 tagnum = int(tagnum)
130 131 namenum = int(namenum)
131 132 typenum = int(typenum)
132 133 yield (base, Interpretation4Generator(orth, base, tagnum, namenum, typenum, homonymId))
  134 + prevLine = line
133 135  
134 136 def convert(self, inputLines):
135 137 return _mergeEntries(self._reallyParseLines(self._sortLines(self._partiallyParseLines(inputLines))), lowercase=False)
... ...