Commit 524cea05b1987df16e8706a666715964c105ae52

Authored by Michał Lenart
1 parent bd1aba95

- poprawienie buga przy ustawianiu nowego pliku z automatem (nie ustawiał się au…

…tomat do segmentacji)
- poprawienie budowania pythonowego modułu

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@118 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
CMakeLists.txt
... ... @@ -5,7 +5,7 @@ project (Morfeusz)
5 5 set (Morfeusz_VERSION_MAJOR 2)
6 6 set (Morfeusz_VERSION_MINOR 0)
7 7 set (Morfeusz_VERSION_PATCH 0)
8   -set (CMAKE_BUILD_TYPE "Debug")
  8 +set (CMAKE_BUILD_TYPE "Release")
9 9  
10 10 enable_testing()
11 11  
... ... @@ -36,7 +36,7 @@ if ("${INPUT_DICTIONARIES}" STREQUAL "")
36 36 if ("${EMPTY_INPUT_DICTIONARY}" STREQUAL "TRUE")
37 37 set (INPUT_DICTIONARIES ${PROJECT_SOURCE_DIR}/input/empty.txt)
38 38 else ()
39   - set (INPUT_DICTIONARIES "${PROJECT_SOURCE_DIR}/input/PoliMorfSmall.tab,${PROJECT_SOURCE_DIR}/input/dodatki.tab")
  39 + set (INPUT_DICTIONARIES "${PROJECT_SOURCE_DIR}/input/PoliMorf-0.6.7.tab,${PROJECT_SOURCE_DIR}/input/dodatki.tab")
40 40 endif ()
41 41 endif ()
42 42  
... ...
fsabuilder/morfeuszbuilder/tagset/segtypes.py
... ... @@ -28,6 +28,9 @@ class Segtypes(object):
28 28 self._readTags(segrulesConfigFile)
29 29 self._indexSegnums()
30 30  
  31 + print self._lemmaTagnum2Segnum
  32 + print self._tagnum2Segnum
  33 +
31 34 print self.segnum2Segtype
32 35  
33 36 def _validate(self, msg, lineNum, cond):
... ... @@ -108,7 +111,7 @@ class Segtypes(object):
108 111 self._validate(
109 112 u'Pattern must contain encodedForm and part-of-speech fields',
110 113 lineNum,
111   - re.match(r'.+\:[a-z_]+', pattern, re.U))
  114 + re.match(r'.+?\:[a-z_]+', pattern, re.U))
112 115  
113 116 if segtype in self.segtype2Segnum:
114 117 segnum = self.segtype2Segnum[segtype]
... ... @@ -146,13 +149,13 @@ class Segtypes(object):
146 149  
147 150 # index lexemes
148 151 for p in self.patternsList:
149   - if p.encodedForm:
  152 + if p.lemma:
150 153 for tag in self.tagset.getAllTags():
151 154 tagnum = self.tagset.getTagnum4Tag(tag)
152   - if not (p.encodedForm, tagnum) in self._lemmaTagnum2Segnum:
153   - segnum = p.tryToMatch(p.encodedForm, tag)
  155 + if not (p.lemma, tagnum) in self._lemmaTagnum2Segnum:
  156 + segnum = p.tryToMatch(p.lemma, tag)
154 157 if segnum != -1:
155   - self._lemmaTagnum2Segnum[(p.encodedForm, tagnum)] = segnum
  158 + self._lemmaTagnum2Segnum[(p.lemma, tagnum)] = segnum
156 159 # logging.info('indexing segment type numbers - done')
157 160 # self._debugSegnums()
158 161  
... ... @@ -171,7 +174,7 @@ class Segtypes(object):
171 174 class SegtypePattern(object):
172 175  
173 176 def __init__(self, lemma, pattern, segnum):
174   - self.encodedForm = lemma
  177 + self.lemma = lemma
175 178 self.pattern = pattern
176 179 self.segnum = segnum
177 180  
... ... @@ -181,8 +184,9 @@ class SegtypePattern(object):
181 184 patterns2Match = []
182 185 patterns2Match.append(self.pattern.replace('%', '.*'))
183 186 patterns2Match.append(re.sub(r'\:\%$', '', self.pattern).replace('%', '.*'))
184   - if (self.encodedForm is None or self.encodedForm == lemma) \
  187 + if (self.lemma is None or self.lemma == lemma) \
185 188 and any([re.match(p, tag) for p in patterns2Match]):
186 189 return self.segnum
187 190 else:
  191 +# print 'NOT match', lemma.encode('utf8') if lemma else '%', tag, self.segnum
188 192 return -1
... ...
morfeusz/Environment.cpp
... ... @@ -108,6 +108,7 @@ void Environment::setFSAFile(const std::string& filename) {
108 108 this->fsaFileStartPtr = readFile<unsigned char>(filename.c_str());
109 109 this->fsa = FSA< vector<InterpsGroup> > ::getFSA(fsaFileStartPtr, initializeDeserializer(this->processorType));
110 110 this->segrulesFSAsMap = createSegrulesFSAsMap(this->fsaFileStartPtr);
  111 + this->currSegrulesFSA = getDefaultSegrulesFSA(this->segrulesFSAsMap, this->fsaFileStartPtr);
111 112 this->isFromFile = true;
112 113 }
113 114  
... ...
morfeusz/Morfeusz.cpp
... ... @@ -123,6 +123,7 @@ void Morfeusz::doProcessOneWord(
123 123 // cerr << "accept at '" << currInput << "' type=" << (int) ig.type << endl;
124 124 set<SegrulesState> newSegrulesStates;
125 125 env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, newSegrulesStates);
  126 +// cerr << "newSegrulesStates.size() " << newSegrulesStates.size() << endl;
126 127 for (
127 128 set<SegrulesState>::iterator it = newSegrulesStates.begin();
128 129 it != newSegrulesStates.end();
... ...
morfeusz/python/CMakeLists.txt
... ... @@ -29,7 +29,7 @@ configure_file (${SETUP_PY_IN} ${SETUP_PY})
29 29 add_custom_command (OUTPUT ${SETUP_PY}
30 30 COMMAND python
31 31 ARGS setup.py build
32   - DEPENDS ${DEPS})
  32 + DEPENDS ${DEPS} ${SETUP_PY_IN})
33 33  
34 34 add_custom_target (pymorfeusz DEPENDS ${SETUP_PY} libmorfeusz)
35 35  
... ...
morfeusz/python/setup.py.in
1 1 # -*- coding: utf-8 -*-
2 2 from setuptools import setup, Extension
3 3  
4   -morfeusz = Extension('_morfeusz',
5   - libraries=['morfeusz'],
  4 +morfeusz2 = Extension('_morfeusz',
  5 + libraries=['morfeusz2'],
6 6 library_dirs=['${CMAKE_CURRENT_BINARY_DIR}/..'],
7 7 include_dirs=['${CMAKE_CURRENT_SOURCE_DIR}/..'],
8 8 sources=['${CMAKE_CURRENT_BINARY_DIR}/${SWIG_PYTHON_OUTFILE}'])
... ... @@ -14,5 +14,5 @@ if __name__ == &#39;__main__&#39;:
14 14 description='Python bindings for Morfeusz',
15 15 version='${PYMORFEUSZ_VERSION}',
16 16 package_dir={ '': '${CMAKE_CURRENT_BINARY_DIR}'},
17   - py_modules = ['morfeusz'],
18   - ext_modules = [morfeusz])
  17 + py_modules = ['morfeusz2'],
  18 + ext_modules = [morfeusz2])
... ...
nbproject/configurations.xml
... ... @@ -93,7 +93,8 @@
93 93 </toolsSet>
94 94 <flagsDictionary>
95 95 <element flagsID="0" commonFlags="-std=c++98"/>
96   - <element flagsID="1" commonFlags="-std=c++98 -fPIC"/>
  96 + <element flagsID="1" commonFlags="-std=c++98 -O3 -fPIC"/>
  97 + <element flagsID="2" commonFlags="-std=c++98 -fPIC"/>
97 98 </flagsDictionary>
98 99 <codeAssistance>
99 100 </codeAssistance>
... ... @@ -106,12 +107,8 @@
106 107 </makeTool>
107 108 </makefileType>
108 109 <item path="../default_fsa.cpp" ex="false" tool="1" flavor2="4">
109   - <ccTool flags="1">
110   - </ccTool>
111 110 </item>
112 111 <item path="../default_synth_fsa.cpp" ex="false" tool="1" flavor2="4">
113   - <ccTool flags="1">
114   - </ccTool>
115 112 </item>
116 113 <item path="build/default_fsa.cpp" ex="false" tool="1" flavor2="4">
117 114 </item>
... ... @@ -282,6 +279,7 @@
282 279 <pElem>/usr/lib/jvm/default-java/include</pElem>
283 280 </incDir>
284 281 <preprocessorList>
  282 + <Elem>NDEBUG</Elem>
285 283 <Elem>libjmorfeusz_EXPORTS</Elem>
286 284 </preprocessorList>
287 285 </ccTool>
... ... @@ -318,7 +316,7 @@
318 316 </ccTool>
319 317 </folder>
320 318 <item path="morfeusz/Environment.cpp" ex="false" tool="1" flavor2="4">
321   - <ccTool flags="1">
  319 + <ccTool flags="2">
322 320 <incDir>
323 321 <pElem>build</pElem>
324 322 <pElem>morfeusz</pElem>
... ... @@ -330,7 +328,7 @@
330 328 </ccTool>
331 329 </item>
332 330 <item path="morfeusz/FlexionGraph.cpp" ex="false" tool="1" flavor2="4">
333   - <ccTool flags="1">
  331 + <ccTool flags="2">
334 332 <incDir>
335 333 <pElem>build</pElem>
336 334 <pElem>morfeusz</pElem>
... ... @@ -342,7 +340,7 @@
342 340 </ccTool>
343 341 </item>
344 342 <item path="morfeusz/Generator.cpp" ex="false" tool="1" flavor2="4">
345   - <ccTool flags="1">
  343 + <ccTool flags="2">
346 344 <incDir>
347 345 <pElem>build</pElem>
348 346 <pElem>morfeusz</pElem>
... ... @@ -354,7 +352,7 @@
354 352 </ccTool>
355 353 </item>
356 354 <item path="morfeusz/GeneratorDeserializer.cpp" ex="false" tool="1" flavor2="4">
357   - <ccTool flags="1">
  355 + <ccTool flags="2">
358 356 <incDir>
359 357 <pElem>build</pElem>
360 358 <pElem>morfeusz</pElem>
... ... @@ -373,12 +371,13 @@
373 371 <pElem>build/morfeusz</pElem>
374 372 </incDir>
375 373 <preprocessorList>
  374 + <Elem>NDEBUG</Elem>
376 375 <Elem>libmorfeusz_EXPORTS</Elem>
377 376 </preprocessorList>
378 377 </ccTool>
379 378 </item>
380 379 <item path="morfeusz/MorphDeserializer.cpp" ex="false" tool="1" flavor2="4">
381   - <ccTool flags="1">
  380 + <ccTool flags="2">
382 381 <incDir>
383 382 <pElem>build</pElem>
384 383 <pElem>morfeusz</pElem>
... ... @@ -390,7 +389,7 @@
390 389 </ccTool>
391 390 </item>
392 391 <item path="morfeusz/MorphInterpretation.cpp" ex="false" tool="1" flavor2="4">
393   - <ccTool flags="1">
  392 + <ccTool flags="2">
394 393 <incDir>
395 394 <pElem>build</pElem>
396 395 <pElem>morfeusz</pElem>
... ... @@ -402,7 +401,7 @@
402 401 </ccTool>
403 402 </item>
404 403 <item path="morfeusz/Tagset.cpp" ex="false" tool="1" flavor2="4">
405   - <ccTool flags="1">
  404 + <ccTool flags="2">
406 405 <incDir>
407 406 <pElem>build</pElem>
408 407 <pElem>morfeusz</pElem>
... ... @@ -414,29 +413,21 @@
414 413 </ccTool>
415 414 </item>
416 415 <item path="morfeusz/charset/CaseConverter.cpp" ex="false" tool="1" flavor2="4">
417   - <ccTool flags="1">
418   - </ccTool>
419 416 </item>
420 417 <item path="morfeusz/charset/CharsetConverter.cpp"
421 418 ex="false"
422 419 tool="1"
423 420 flavor2="4">
424   - <ccTool flags="1">
425   - </ccTool>
426 421 </item>
427 422 <item path="morfeusz/charset/caseconv.cpp" ex="false" tool="1" flavor2="4">
428   - <ccTool flags="1">
429   - </ccTool>
430 423 </item>
431 424 <item path="morfeusz/charset/conversion_tables.cpp"
432 425 ex="false"
433 426 tool="1"
434 427 flavor2="4">
435   - <ccTool flags="1">
436   - </ccTool>
437 428 </item>
438 429 <item path="morfeusz/const.cpp" ex="false" tool="1" flavor2="4">
439   - <ccTool flags="1">
  430 + <ccTool flags="2">
440 431 <incDir>
441 432 <pElem>build</pElem>
442 433 <pElem>morfeusz</pElem>
... ... @@ -450,7 +441,7 @@
450 441 <item path="morfeusz/data/default_fsa.cpp" ex="false" tool="1" flavor2="4">
451 442 </item>
452 443 <item path="morfeusz/fsa/const.cpp" ex="false" tool="1" flavor2="4">
453   - <ccTool flags="1">
  444 + <ccTool flags="2">
454 445 <incDir>
455 446 <pElem>build</pElem>
456 447 <pElem>morfeusz</pElem>
... ... @@ -522,12 +513,8 @@
522 513 ex="false"
523 514 tool="1"
524 515 flavor2="4">
525   - <ccTool flags="1">
526   - </ccTool>
527 516 </item>
528 517 <item path="morfeusz/segrules/segrules.cpp" ex="false" tool="1" flavor2="4">
529   - <ccTool flags="1">
530   - </ccTool>
531 518 </item>
532 519 <item path="morfeusz/test_recognize_dict.cpp" ex="false" tool="1" flavor2="4">
533 520 <ccTool flags="0">
... ...