Commit 524cea05b1987df16e8706a666715964c105ae52
1 parent
bd1aba95
- poprawienie buga przy ustawianiu nowego pliku z automatem (nie ustawiał się au…
…tomat do segmentacji) - poprawienie budowania pythonowego modułu git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@118 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
7 changed files
with
33 additions
and
40 deletions
CMakeLists.txt
... | ... | @@ -5,7 +5,7 @@ project (Morfeusz) |
5 | 5 | set (Morfeusz_VERSION_MAJOR 2) |
6 | 6 | set (Morfeusz_VERSION_MINOR 0) |
7 | 7 | set (Morfeusz_VERSION_PATCH 0) |
8 | -set (CMAKE_BUILD_TYPE "Debug") | |
8 | +set (CMAKE_BUILD_TYPE "Release") | |
9 | 9 | |
10 | 10 | enable_testing() |
11 | 11 | |
... | ... | @@ -36,7 +36,7 @@ if ("${INPUT_DICTIONARIES}" STREQUAL "") |
36 | 36 | if ("${EMPTY_INPUT_DICTIONARY}" STREQUAL "TRUE") |
37 | 37 | set (INPUT_DICTIONARIES ${PROJECT_SOURCE_DIR}/input/empty.txt) |
38 | 38 | else () |
39 | - set (INPUT_DICTIONARIES "${PROJECT_SOURCE_DIR}/input/PoliMorfSmall.tab,${PROJECT_SOURCE_DIR}/input/dodatki.tab") | |
39 | + set (INPUT_DICTIONARIES "${PROJECT_SOURCE_DIR}/input/PoliMorf-0.6.7.tab,${PROJECT_SOURCE_DIR}/input/dodatki.tab") | |
40 | 40 | endif () |
41 | 41 | endif () |
42 | 42 | |
... | ... |
fsabuilder/morfeuszbuilder/tagset/segtypes.py
... | ... | @@ -28,6 +28,9 @@ class Segtypes(object): |
28 | 28 | self._readTags(segrulesConfigFile) |
29 | 29 | self._indexSegnums() |
30 | 30 | |
31 | + print self._lemmaTagnum2Segnum | |
32 | + print self._tagnum2Segnum | |
33 | + | |
31 | 34 | print self.segnum2Segtype |
32 | 35 | |
33 | 36 | def _validate(self, msg, lineNum, cond): |
... | ... | @@ -108,7 +111,7 @@ class Segtypes(object): |
108 | 111 | self._validate( |
109 | 112 | u'Pattern must contain encodedForm and part-of-speech fields', |
110 | 113 | lineNum, |
111 | - re.match(r'.+\:[a-z_]+', pattern, re.U)) | |
114 | + re.match(r'.+?\:[a-z_]+', pattern, re.U)) | |
112 | 115 | |
113 | 116 | if segtype in self.segtype2Segnum: |
114 | 117 | segnum = self.segtype2Segnum[segtype] |
... | ... | @@ -146,13 +149,13 @@ class Segtypes(object): |
146 | 149 | |
147 | 150 | # index lexemes |
148 | 151 | for p in self.patternsList: |
149 | - if p.encodedForm: | |
152 | + if p.lemma: | |
150 | 153 | for tag in self.tagset.getAllTags(): |
151 | 154 | tagnum = self.tagset.getTagnum4Tag(tag) |
152 | - if not (p.encodedForm, tagnum) in self._lemmaTagnum2Segnum: | |
153 | - segnum = p.tryToMatch(p.encodedForm, tag) | |
155 | + if not (p.lemma, tagnum) in self._lemmaTagnum2Segnum: | |
156 | + segnum = p.tryToMatch(p.lemma, tag) | |
154 | 157 | if segnum != -1: |
155 | - self._lemmaTagnum2Segnum[(p.encodedForm, tagnum)] = segnum | |
158 | + self._lemmaTagnum2Segnum[(p.lemma, tagnum)] = segnum | |
156 | 159 | # logging.info('indexing segment type numbers - done') |
157 | 160 | # self._debugSegnums() |
158 | 161 | |
... | ... | @@ -171,7 +174,7 @@ class Segtypes(object): |
171 | 174 | class SegtypePattern(object): |
172 | 175 | |
173 | 176 | def __init__(self, lemma, pattern, segnum): |
174 | - self.encodedForm = lemma | |
177 | + self.lemma = lemma | |
175 | 178 | self.pattern = pattern |
176 | 179 | self.segnum = segnum |
177 | 180 | |
... | ... | @@ -181,8 +184,9 @@ class SegtypePattern(object): |
181 | 184 | patterns2Match = [] |
182 | 185 | patterns2Match.append(self.pattern.replace('%', '.*')) |
183 | 186 | patterns2Match.append(re.sub(r'\:\%$', '', self.pattern).replace('%', '.*')) |
184 | - if (self.encodedForm is None or self.encodedForm == lemma) \ | |
187 | + if (self.lemma is None or self.lemma == lemma) \ | |
185 | 188 | and any([re.match(p, tag) for p in patterns2Match]): |
186 | 189 | return self.segnum |
187 | 190 | else: |
191 | +# print 'NOT match', lemma.encode('utf8') if lemma else '%', tag, self.segnum | |
188 | 192 | return -1 |
... | ... |
morfeusz/Environment.cpp
... | ... | @@ -108,6 +108,7 @@ void Environment::setFSAFile(const std::string& filename) { |
108 | 108 | this->fsaFileStartPtr = readFile<unsigned char>(filename.c_str()); |
109 | 109 | this->fsa = FSA< vector<InterpsGroup> > ::getFSA(fsaFileStartPtr, initializeDeserializer(this->processorType)); |
110 | 110 | this->segrulesFSAsMap = createSegrulesFSAsMap(this->fsaFileStartPtr); |
111 | + this->currSegrulesFSA = getDefaultSegrulesFSA(this->segrulesFSAsMap, this->fsaFileStartPtr); | |
111 | 112 | this->isFromFile = true; |
112 | 113 | } |
113 | 114 | |
... | ... |
morfeusz/Morfeusz.cpp
... | ... | @@ -123,6 +123,7 @@ void Morfeusz::doProcessOneWord( |
123 | 123 | // cerr << "accept at '" << currInput << "' type=" << (int) ig.type << endl; |
124 | 124 | set<SegrulesState> newSegrulesStates; |
125 | 125 | env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, newSegrulesStates); |
126 | +// cerr << "newSegrulesStates.size() " << newSegrulesStates.size() << endl; | |
126 | 127 | for ( |
127 | 128 | set<SegrulesState>::iterator it = newSegrulesStates.begin(); |
128 | 129 | it != newSegrulesStates.end(); |
... | ... |
morfeusz/python/CMakeLists.txt
... | ... | @@ -29,7 +29,7 @@ configure_file (${SETUP_PY_IN} ${SETUP_PY}) |
29 | 29 | add_custom_command (OUTPUT ${SETUP_PY} |
30 | 30 | COMMAND python |
31 | 31 | ARGS setup.py build |
32 | - DEPENDS ${DEPS}) | |
32 | + DEPENDS ${DEPS} ${SETUP_PY_IN}) | |
33 | 33 | |
34 | 34 | add_custom_target (pymorfeusz DEPENDS ${SETUP_PY} libmorfeusz) |
35 | 35 | |
... | ... |
morfeusz/python/setup.py.in
1 | 1 | # -*- coding: utf-8 -*- |
2 | 2 | from setuptools import setup, Extension |
3 | 3 | |
4 | -morfeusz = Extension('_morfeusz', | |
5 | - libraries=['morfeusz'], | |
4 | +morfeusz2 = Extension('_morfeusz', | |
5 | + libraries=['morfeusz2'], | |
6 | 6 | library_dirs=['${CMAKE_CURRENT_BINARY_DIR}/..'], |
7 | 7 | include_dirs=['${CMAKE_CURRENT_SOURCE_DIR}/..'], |
8 | 8 | sources=['${CMAKE_CURRENT_BINARY_DIR}/${SWIG_PYTHON_OUTFILE}']) |
... | ... | @@ -14,5 +14,5 @@ if __name__ == '__main__': |
14 | 14 | description='Python bindings for Morfeusz', |
15 | 15 | version='${PYMORFEUSZ_VERSION}', |
16 | 16 | package_dir={ '': '${CMAKE_CURRENT_BINARY_DIR}'}, |
17 | - py_modules = ['morfeusz'], | |
18 | - ext_modules = [morfeusz]) | |
17 | + py_modules = ['morfeusz2'], | |
18 | + ext_modules = [morfeusz2]) | |
... | ... |
nbproject/configurations.xml
... | ... | @@ -93,7 +93,8 @@ |
93 | 93 | </toolsSet> |
94 | 94 | <flagsDictionary> |
95 | 95 | <element flagsID="0" commonFlags="-std=c++98"/> |
96 | - <element flagsID="1" commonFlags="-std=c++98 -fPIC"/> | |
96 | + <element flagsID="1" commonFlags="-std=c++98 -O3 -fPIC"/> | |
97 | + <element flagsID="2" commonFlags="-std=c++98 -fPIC"/> | |
97 | 98 | </flagsDictionary> |
98 | 99 | <codeAssistance> |
99 | 100 | </codeAssistance> |
... | ... | @@ -106,12 +107,8 @@ |
106 | 107 | </makeTool> |
107 | 108 | </makefileType> |
108 | 109 | <item path="../default_fsa.cpp" ex="false" tool="1" flavor2="4"> |
109 | - <ccTool flags="1"> | |
110 | - </ccTool> | |
111 | 110 | </item> |
112 | 111 | <item path="../default_synth_fsa.cpp" ex="false" tool="1" flavor2="4"> |
113 | - <ccTool flags="1"> | |
114 | - </ccTool> | |
115 | 112 | </item> |
116 | 113 | <item path="build/default_fsa.cpp" ex="false" tool="1" flavor2="4"> |
117 | 114 | </item> |
... | ... | @@ -282,6 +279,7 @@ |
282 | 279 | <pElem>/usr/lib/jvm/default-java/include</pElem> |
283 | 280 | </incDir> |
284 | 281 | <preprocessorList> |
282 | + <Elem>NDEBUG</Elem> | |
285 | 283 | <Elem>libjmorfeusz_EXPORTS</Elem> |
286 | 284 | </preprocessorList> |
287 | 285 | </ccTool> |
... | ... | @@ -318,7 +316,7 @@ |
318 | 316 | </ccTool> |
319 | 317 | </folder> |
320 | 318 | <item path="morfeusz/Environment.cpp" ex="false" tool="1" flavor2="4"> |
321 | - <ccTool flags="1"> | |
319 | + <ccTool flags="2"> | |
322 | 320 | <incDir> |
323 | 321 | <pElem>build</pElem> |
324 | 322 | <pElem>morfeusz</pElem> |
... | ... | @@ -330,7 +328,7 @@ |
330 | 328 | </ccTool> |
331 | 329 | </item> |
332 | 330 | <item path="morfeusz/FlexionGraph.cpp" ex="false" tool="1" flavor2="4"> |
333 | - <ccTool flags="1"> | |
331 | + <ccTool flags="2"> | |
334 | 332 | <incDir> |
335 | 333 | <pElem>build</pElem> |
336 | 334 | <pElem>morfeusz</pElem> |
... | ... | @@ -342,7 +340,7 @@ |
342 | 340 | </ccTool> |
343 | 341 | </item> |
344 | 342 | <item path="morfeusz/Generator.cpp" ex="false" tool="1" flavor2="4"> |
345 | - <ccTool flags="1"> | |
343 | + <ccTool flags="2"> | |
346 | 344 | <incDir> |
347 | 345 | <pElem>build</pElem> |
348 | 346 | <pElem>morfeusz</pElem> |
... | ... | @@ -354,7 +352,7 @@ |
354 | 352 | </ccTool> |
355 | 353 | </item> |
356 | 354 | <item path="morfeusz/GeneratorDeserializer.cpp" ex="false" tool="1" flavor2="4"> |
357 | - <ccTool flags="1"> | |
355 | + <ccTool flags="2"> | |
358 | 356 | <incDir> |
359 | 357 | <pElem>build</pElem> |
360 | 358 | <pElem>morfeusz</pElem> |
... | ... | @@ -373,12 +371,13 @@ |
373 | 371 | <pElem>build/morfeusz</pElem> |
374 | 372 | </incDir> |
375 | 373 | <preprocessorList> |
374 | + <Elem>NDEBUG</Elem> | |
376 | 375 | <Elem>libmorfeusz_EXPORTS</Elem> |
377 | 376 | </preprocessorList> |
378 | 377 | </ccTool> |
379 | 378 | </item> |
380 | 379 | <item path="morfeusz/MorphDeserializer.cpp" ex="false" tool="1" flavor2="4"> |
381 | - <ccTool flags="1"> | |
380 | + <ccTool flags="2"> | |
382 | 381 | <incDir> |
383 | 382 | <pElem>build</pElem> |
384 | 383 | <pElem>morfeusz</pElem> |
... | ... | @@ -390,7 +389,7 @@ |
390 | 389 | </ccTool> |
391 | 390 | </item> |
392 | 391 | <item path="morfeusz/MorphInterpretation.cpp" ex="false" tool="1" flavor2="4"> |
393 | - <ccTool flags="1"> | |
392 | + <ccTool flags="2"> | |
394 | 393 | <incDir> |
395 | 394 | <pElem>build</pElem> |
396 | 395 | <pElem>morfeusz</pElem> |
... | ... | @@ -402,7 +401,7 @@ |
402 | 401 | </ccTool> |
403 | 402 | </item> |
404 | 403 | <item path="morfeusz/Tagset.cpp" ex="false" tool="1" flavor2="4"> |
405 | - <ccTool flags="1"> | |
404 | + <ccTool flags="2"> | |
406 | 405 | <incDir> |
407 | 406 | <pElem>build</pElem> |
408 | 407 | <pElem>morfeusz</pElem> |
... | ... | @@ -414,29 +413,21 @@ |
414 | 413 | </ccTool> |
415 | 414 | </item> |
416 | 415 | <item path="morfeusz/charset/CaseConverter.cpp" ex="false" tool="1" flavor2="4"> |
417 | - <ccTool flags="1"> | |
418 | - </ccTool> | |
419 | 416 | </item> |
420 | 417 | <item path="morfeusz/charset/CharsetConverter.cpp" |
421 | 418 | ex="false" |
422 | 419 | tool="1" |
423 | 420 | flavor2="4"> |
424 | - <ccTool flags="1"> | |
425 | - </ccTool> | |
426 | 421 | </item> |
427 | 422 | <item path="morfeusz/charset/caseconv.cpp" ex="false" tool="1" flavor2="4"> |
428 | - <ccTool flags="1"> | |
429 | - </ccTool> | |
430 | 423 | </item> |
431 | 424 | <item path="morfeusz/charset/conversion_tables.cpp" |
432 | 425 | ex="false" |
433 | 426 | tool="1" |
434 | 427 | flavor2="4"> |
435 | - <ccTool flags="1"> | |
436 | - </ccTool> | |
437 | 428 | </item> |
438 | 429 | <item path="morfeusz/const.cpp" ex="false" tool="1" flavor2="4"> |
439 | - <ccTool flags="1"> | |
430 | + <ccTool flags="2"> | |
440 | 431 | <incDir> |
441 | 432 | <pElem>build</pElem> |
442 | 433 | <pElem>morfeusz</pElem> |
... | ... | @@ -450,7 +441,7 @@ |
450 | 441 | <item path="morfeusz/data/default_fsa.cpp" ex="false" tool="1" flavor2="4"> |
451 | 442 | </item> |
452 | 443 | <item path="morfeusz/fsa/const.cpp" ex="false" tool="1" flavor2="4"> |
453 | - <ccTool flags="1"> | |
444 | + <ccTool flags="2"> | |
454 | 445 | <incDir> |
455 | 446 | <pElem>build</pElem> |
456 | 447 | <pElem>morfeusz</pElem> |
... | ... | @@ -522,12 +513,8 @@ |
522 | 513 | ex="false" |
523 | 514 | tool="1" |
524 | 515 | flavor2="4"> |
525 | - <ccTool flags="1"> | |
526 | - </ccTool> | |
527 | 516 | </item> |
528 | 517 | <item path="morfeusz/segrules/segrules.cpp" ex="false" tool="1" flavor2="4"> |
529 | - <ccTool flags="1"> | |
530 | - </ccTool> | |
531 | 518 | </item> |
532 | 519 | <item path="morfeusz/test_recognize_dict.cpp" ex="false" tool="1" flavor2="4"> |
533 | 520 | <ccTool flags="0"> |
... | ... |