Commit 666111294fd69b02e7aba488584f620218f0efa7
1 parent
d2509b41
poprawki w obsłudze dużych liczb (i generalnie dużej liczby segmentów z ">"); po…
…prawki w obsłudze DICT-ID git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/trunk@314 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
7 changed files
with
140 additions
and
106 deletions
fsabuilder/morfeusz_builder
... | ... | @@ -184,12 +184,16 @@ def _readDictIdAndCopyright(inputFiles): |
184 | 184 | inCopyright = False |
185 | 185 | for linenum, line in enumerate(f, start=1): |
186 | 186 | if dictId is None and line.startswith(u'#!DICT-ID'): |
187 | - dictIdTag, _, dictId = line.strip().partition(u' ')[2] | |
187 | + dictIdTag, _, dictId = line.strip().partition(u' ') | |
188 | 188 | exceptions.validate( |
189 | 189 | dictIdTag == u'#!DICT-ID', |
190 | 190 | u'Dictionary ID tag must be followed by a space character and dictionary ID string') |
191 | - | |
192 | - | |
191 | + exceptions.validate( | |
192 | + len(line.split(u' ')) > 1, | |
193 | + u'%s:%d: Must provide DICT-ID' % (inputFile, linenum)) | |
194 | + exceptions.validate( | |
195 | + len(line.split(u' ')) == 2, | |
196 | + u'%s:%d: DICT-ID must not contain spaces' % (inputFile, linenum)) | |
193 | 197 | elif copyright is None and line.startswith(u'#<COPYRIGHT>'): |
194 | 198 | exceptions.validate( |
195 | 199 | line.strip() == u'#<COPYRIGHT>', |
... | ... |
fsabuilder/morfeuszbuilder/fsa/convertinput.py
... | ... | @@ -46,8 +46,10 @@ class LineParser(object): |
46 | 46 | return True |
47 | 47 | elif line and not ' ' in ''.join(line.split('\t')[:2]): |
48 | 48 | return False |
49 | + elif line.startswith(u'#!DICT-ID'): | |
50 | + return True | |
49 | 51 | else: |
50 | - logging.warn(u'Ignoring line: "%s" - contains space in text form or lemma' % line.strip().decode('utf8')) | |
52 | + logging.warn(u'Ignoring line: "%s" - contains space in text form or lemma' % (line.strip())) | |
51 | 53 | return True |
52 | 54 | |
53 | 55 | def parseLine(self, line): |
... | ... |
morfeusz/MorfeuszImpl.cpp
... | ... | @@ -55,12 +55,16 @@ namespace morfeusz { |
55 | 55 | } |
56 | 56 | |
57 | 57 | static void doShiftOrth(InterpretedChunk& from, InterpretedChunk& to) { |
58 | - to.prefixChunks.insert(to.prefixChunks.end(), from.prefixChunks.begin(), from.prefixChunks.end()); | |
59 | -// from.prefixChunks.resize(0); | |
58 | + to.prefixChunks.swap(from.prefixChunks); // from.prefixChunks are ignored anyway. Will swap them back in doUnshiftOrth | |
60 | 59 | to.prefixChunks.push_back(from); |
61 | 60 | to.textStartPtr = from.textStartPtr; |
62 | 61 | from.orthWasShifted = true; |
63 | 62 | } |
63 | + | |
64 | + static void doUnshiftOrth(InterpretedChunk& from, InterpretedChunk& to) { | |
65 | + to.prefixChunks.swap(from.prefixChunks); | |
66 | + from.prefixChunks.pop_back(); | |
67 | + } | |
64 | 68 | |
65 | 69 | static void feedStateDirectly( |
66 | 70 | const FSAType& fsa, |
... | ... | @@ -359,6 +363,7 @@ namespace morfeusz { |
359 | 363 | SegrulesState newSegrulesState; |
360 | 364 | env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, isAtWhitespace, newSegrulesState); |
361 | 365 | if (!newSegrulesState.failed) { |
366 | + | |
362 | 367 | InterpretedChunk ic( |
363 | 368 | createChunk(ig, reader, newSegrulesState.shiftOrthFromPrevious, homonymId)); |
364 | 369 | |
... | ... | @@ -386,8 +391,10 @@ namespace morfeusz { |
386 | 391 | bool caseMatches, |
387 | 392 | const SegrulesState& newSegrulesState, |
388 | 393 | InterpretedChunk& ic) const { |
394 | + bool orthShifted = false; | |
389 | 395 | if (!accum.empty() && accum.back().shiftOrth) { |
390 | 396 | doShiftOrth(accum.back(), ic); |
397 | + orthShifted = true; | |
391 | 398 | } |
392 | 399 | if (!caseMatches && options.caseHandling == CONDITIONALLY_CASE_SENSITIVE) { |
393 | 400 | notMatchingCaseSegs++; |
... | ... | @@ -407,6 +414,9 @@ namespace morfeusz { |
407 | 414 | doProcessOneWord(env, newReader, newSegrulesState); |
408 | 415 | } |
409 | 416 | accum.pop_back(); |
417 | + if (orthShifted) { | |
418 | + doUnshiftOrth(accum.back(), ic); | |
419 | + } | |
410 | 420 | if (!caseMatches && options.caseHandling == CONDITIONALLY_CASE_SENSITIVE) { |
411 | 421 | notMatchingCaseSegs--; |
412 | 422 | } |
... | ... |
nbproject/configurations.xml
... | ... | @@ -299,22 +299,16 @@ |
299 | 299 | flavor2="8"> |
300 | 300 | <ccTool> |
301 | 301 | <incDir> |
302 | - <pElem>build</pElem> | |
303 | - <pElem>morfeusz</pElem> | |
304 | - <pElem>build/morfeusz</pElem> | |
305 | - <pElem>build/fsa</pElem> | |
306 | 302 | <pElem>/usr/lib/jvm/default-java/include</pElem> |
307 | 303 | <pElem>build/morfeusz/java</pElem> |
308 | 304 | </incDir> |
309 | 305 | <preprocessorList> |
310 | - <Elem>_OPTIMIZE__=1</Elem> | |
311 | 306 | <Elem>__PIC__=2</Elem> |
312 | 307 | <Elem>__pic__=2</Elem> |
313 | 308 | <Elem>jmorfeusz_EXPORTS</Elem> |
314 | 309 | </preprocessorList> |
315 | 310 | <undefinedList> |
316 | 311 | <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem> |
317 | - <Elem>__NO_INLINE__</Elem> | |
318 | 312 | </undefinedList> |
319 | 313 | </ccTool> |
320 | 314 | </item> |
... | ... | @@ -324,20 +318,12 @@ |
324 | 318 | flavor2="4"> |
325 | 319 | <ccTool flags="1"> |
326 | 320 | <incDir> |
327 | - <pElem>build</pElem> | |
328 | - <pElem>morfeusz</pElem> | |
329 | - <pElem>build/morfeusz</pElem> | |
330 | - <pElem>build/fsa</pElem> | |
331 | 321 | <pElem>/usr/lib/perl/5.14/CORE</pElem> |
332 | 322 | <pElem>build/morfeusz/perl</pElem> |
333 | 323 | </incDir> |
334 | 324 | <preprocessorList> |
335 | - <Elem>_OPTIMIZE__=1</Elem> | |
336 | 325 | <Elem>morfeusz_perl_EXPORTS</Elem> |
337 | 326 | </preprocessorList> |
338 | - <undefinedList> | |
339 | - <Elem>__NO_INLINE__</Elem> | |
340 | - </undefinedList> | |
341 | 327 | </ccTool> |
342 | 328 | </item> |
343 | 329 | <item path="build/morfeusz/morfeuszPYTHON_wrap.cxx" |
... | ... | @@ -346,22 +332,16 @@ |
346 | 332 | flavor2="8"> |
347 | 333 | <ccTool> |
348 | 334 | <incDir> |
349 | - <pElem>build</pElem> | |
350 | - <pElem>morfeusz</pElem> | |
351 | - <pElem>build/morfeusz</pElem> | |
352 | - <pElem>build/fsa</pElem> | |
353 | 335 | <pElem>/usr/include/python2.7</pElem> |
354 | 336 | <pElem>build/morfeusz/python</pElem> |
355 | 337 | </incDir> |
356 | 338 | <preprocessorList> |
357 | - <Elem>_OPTIMIZE__=1</Elem> | |
358 | 339 | <Elem>__PIC__=2</Elem> |
359 | 340 | <Elem>__pic__=2</Elem> |
360 | 341 | <Elem>_morfeusz_EXPORTS</Elem> |
361 | 342 | </preprocessorList> |
362 | 343 | <undefinedList> |
363 | 344 | <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem> |
364 | - <Elem>__NO_INLINE__</Elem> | |
365 | 345 | </undefinedList> |
366 | 346 | </ccTool> |
367 | 347 | </item> |
... | ... | @@ -385,9 +365,6 @@ |
385 | 365 | <pElem>build/morfeusz/wrappers/perl</pElem> |
386 | 366 | </incDir> |
387 | 367 | <preprocessorList> |
388 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
389 | - <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
390 | - <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
391 | 368 | <Elem>morfeusz_perl_EXPORTS</Elem> |
392 | 369 | </preprocessorList> |
393 | 370 | </ccTool> |
... | ... | @@ -407,35 +384,15 @@ |
407 | 384 | <item path="default_fsa.cpp" ex="false" tool="1" flavor2="4"> |
408 | 385 | <ccTool> |
409 | 386 | <incDir> |
410 | - <pElem>build</pElem> | |
411 | - <pElem>morfeusz</pElem> | |
412 | - <pElem>build/morfeusz</pElem> | |
413 | - <pElem>build/fsa</pElem> | |
414 | 387 | <pElem>morfeusz/build/morfeusz</pElem> |
415 | 388 | </incDir> |
416 | - <preprocessorList> | |
417 | - <Elem>_OPTIMIZE__=1</Elem> | |
418 | - </preprocessorList> | |
419 | - <undefinedList> | |
420 | - <Elem>__NO_INLINE__</Elem> | |
421 | - </undefinedList> | |
422 | 389 | </ccTool> |
423 | 390 | </item> |
424 | 391 | <item path="default_synth_fsa.cpp" ex="false" tool="1" flavor2="4"> |
425 | 392 | <ccTool> |
426 | 393 | <incDir> |
427 | - <pElem>build</pElem> | |
428 | - <pElem>morfeusz</pElem> | |
429 | - <pElem>build/morfeusz</pElem> | |
430 | - <pElem>build/fsa</pElem> | |
431 | 394 | <pElem>morfeusz/build/morfeusz</pElem> |
432 | 395 | </incDir> |
433 | - <preprocessorList> | |
434 | - <Elem>_OPTIMIZE__=1</Elem> | |
435 | - </preprocessorList> | |
436 | - <undefinedList> | |
437 | - <Elem>__NO_INLINE__</Elem> | |
438 | - </undefinedList> | |
439 | 396 | </ccTool> |
440 | 397 | </item> |
441 | 398 | <folder path="0/c_api"> |
... | ... | @@ -446,9 +403,10 @@ |
446 | 403 | </incDir> |
447 | 404 | <preprocessorList> |
448 | 405 | <Elem>BUILDING_MORFEUSZ</Elem> |
449 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
406 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
450 | 407 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
451 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
408 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
409 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
452 | 410 | <Elem>NDEBUG</Elem> |
453 | 411 | <Elem>libmorfeusz_EXPORTS</Elem> |
454 | 412 | </preprocessorList> |
... | ... | @@ -462,9 +420,10 @@ |
462 | 420 | </incDir> |
463 | 421 | <preprocessorList> |
464 | 422 | <Elem>BUILDING_MORFEUSZ</Elem> |
465 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
423 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
466 | 424 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
467 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
425 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
426 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
468 | 427 | <Elem>NDEBUG</Elem> |
469 | 428 | <Elem>libmorfeusz_EXPORTS</Elem> |
470 | 429 | </preprocessorList> |
... | ... | @@ -478,9 +437,10 @@ |
478 | 437 | </incDir> |
479 | 438 | <preprocessorList> |
480 | 439 | <Elem>BUILDING_MORFEUSZ</Elem> |
481 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
440 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
482 | 441 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
483 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
442 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
443 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
484 | 444 | <Elem>NDEBUG</Elem> |
485 | 445 | <Elem>libmorfeusz_EXPORTS</Elem> |
486 | 446 | </preprocessorList> |
... | ... | @@ -494,9 +454,10 @@ |
494 | 454 | </incDir> |
495 | 455 | <preprocessorList> |
496 | 456 | <Elem>BUILDING_MORFEUSZ</Elem> |
497 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
457 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
498 | 458 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
499 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
459 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
460 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
500 | 461 | <Elem>NDEBUG</Elem> |
501 | 462 | <Elem>libmorfeusz_EXPORTS</Elem> |
502 | 463 | </preprocessorList> |
... | ... | @@ -510,9 +471,10 @@ |
510 | 471 | </incDir> |
511 | 472 | <preprocessorList> |
512 | 473 | <Elem>BUILDING_MORFEUSZ</Elem> |
513 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
474 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
514 | 475 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
515 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
476 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
477 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
516 | 478 | <Elem>NDEBUG</Elem> |
517 | 479 | <Elem>libmorfeusz_EXPORTS</Elem> |
518 | 480 | </preprocessorList> |
... | ... | @@ -526,9 +488,10 @@ |
526 | 488 | </incDir> |
527 | 489 | <preprocessorList> |
528 | 490 | <Elem>BUILDING_MORFEUSZ</Elem> |
529 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
491 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
530 | 492 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
531 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
493 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
494 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
532 | 495 | <Elem>NDEBUG</Elem> |
533 | 496 | <Elem>libmorfeusz_EXPORTS</Elem> |
534 | 497 | </preprocessorList> |
... | ... | @@ -542,9 +505,10 @@ |
542 | 505 | </incDir> |
543 | 506 | <preprocessorList> |
544 | 507 | <Elem>BUILDING_MORFEUSZ</Elem> |
545 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
508 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
546 | 509 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
547 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
510 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
511 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
548 | 512 | <Elem>NDEBUG</Elem> |
549 | 513 | <Elem>libmorfeusz_EXPORTS</Elem> |
550 | 514 | </preprocessorList> |
... | ... | @@ -693,18 +657,19 @@ |
693 | 657 | </incDir> |
694 | 658 | <preprocessorList> |
695 | 659 | <Elem>BUILDING_MORFEUSZ</Elem> |
696 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
660 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
697 | 661 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
662 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
663 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
698 | 664 | </preprocessorList> |
699 | 665 | </ccTool> |
700 | 666 | </folder> |
701 | 667 | <folder path="build/morfeusz/wrappers/java"> |
702 | 668 | <ccTool> |
703 | 669 | <incDir> |
704 | - <pElem>/usr/lib/jvm/default-java/include</pElem> | |
670 | + <pElem>/usr/lib/jvm/java-6-openjdk/include</pElem> | |
705 | 671 | </incDir> |
706 | 672 | <preprocessorList> |
707 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
708 | 673 | <Elem>libjmorfeusz_EXPORTS</Elem> |
709 | 674 | </preprocessorList> |
710 | 675 | </ccTool> |
... | ... | @@ -755,28 +720,30 @@ |
755 | 720 | </folder> |
756 | 721 | <folder path="morfeusz"> |
757 | 722 | <ccTool> |
723 | + <incDir> | |
724 | + <pElem>build</pElem> | |
725 | + <pElem>morfeusz</pElem> | |
726 | + <pElem>build/morfeusz</pElem> | |
727 | + <pElem>build/fsa</pElem> | |
728 | + </incDir> | |
758 | 729 | <preprocessorList> |
759 | 730 | <Elem>NDEBUG</Elem> |
731 | + <Elem>_OPTIMIZE__=1</Elem> | |
760 | 732 | <Elem>libmorfeusz_EXPORTS</Elem> |
761 | 733 | </preprocessorList> |
734 | + <undefinedList> | |
735 | + <Elem>__NO_INLINE__</Elem> | |
736 | + </undefinedList> | |
762 | 737 | </ccTool> |
763 | 738 | </folder> |
764 | 739 | <folder path="morfeusz/java"> |
765 | 740 | <ccTool> |
766 | 741 | <incDir> |
767 | - <pElem>build</pElem> | |
768 | - <pElem>morfeusz</pElem> | |
769 | - <pElem>build/morfeusz</pElem> | |
770 | - <pElem>build/fsa</pElem> | |
771 | 742 | <pElem>/usr/lib/jvm/default-java/include</pElem> |
772 | 743 | </incDir> |
773 | 744 | <preprocessorList> |
774 | - <Elem>_OPTIMIZE__=1</Elem> | |
775 | 745 | <Elem>libjmorfeusz_EXPORTS</Elem> |
776 | 746 | </preprocessorList> |
777 | - <undefinedList> | |
778 | - <Elem>__NO_INLINE__</Elem> | |
779 | - </undefinedList> | |
780 | 747 | </ccTool> |
781 | 748 | </folder> |
782 | 749 | <folder path="morfeusz/python"> |
... | ... | @@ -809,9 +776,10 @@ |
809 | 776 | </incDir> |
810 | 777 | <preprocessorList> |
811 | 778 | <Elem>BUILDING_MORFEUSZ</Elem> |
812 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
779 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
813 | 780 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
814 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
781 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
782 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
815 | 783 | <Elem>NDEBUG</Elem> |
816 | 784 | <Elem>libmorfeusz_EXPORTS</Elem> |
817 | 785 | </preprocessorList> |
... | ... | @@ -825,9 +793,10 @@ |
825 | 793 | </incDir> |
826 | 794 | <preprocessorList> |
827 | 795 | <Elem>BUILDING_MORFEUSZ</Elem> |
828 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
796 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
829 | 797 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
830 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
798 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
799 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
831 | 800 | <Elem>NDEBUG</Elem> |
832 | 801 | <Elem>libmorfeusz_EXPORTS</Elem> |
833 | 802 | </preprocessorList> |
... | ... | @@ -841,9 +810,10 @@ |
841 | 810 | </incDir> |
842 | 811 | <preprocessorList> |
843 | 812 | <Elem>BUILDING_MORFEUSZ</Elem> |
844 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
813 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
845 | 814 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
846 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
815 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
816 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
847 | 817 | <Elem>NDEBUG</Elem> |
848 | 818 | <Elem>libmorfeusz_EXPORTS</Elem> |
849 | 819 | </preprocessorList> |
... | ... | @@ -857,9 +827,10 @@ |
857 | 827 | </incDir> |
858 | 828 | <preprocessorList> |
859 | 829 | <Elem>BUILDING_MORFEUSZ</Elem> |
860 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
830 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
861 | 831 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
862 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
832 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
833 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
863 | 834 | <Elem>NDEBUG</Elem> |
864 | 835 | <Elem>libmorfeusz_EXPORTS</Elem> |
865 | 836 | </preprocessorList> |
... | ... | @@ -873,9 +844,10 @@ |
873 | 844 | </incDir> |
874 | 845 | <preprocessorList> |
875 | 846 | <Elem>BUILDING_MORFEUSZ</Elem> |
876 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
847 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
877 | 848 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
878 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
849 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
850 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
879 | 851 | <Elem>NDEBUG</Elem> |
880 | 852 | <Elem>libmorfeusz_EXPORTS</Elem> |
881 | 853 | </preprocessorList> |
... | ... | @@ -889,9 +861,10 @@ |
889 | 861 | </incDir> |
890 | 862 | <preprocessorList> |
891 | 863 | <Elem>BUILDING_MORFEUSZ</Elem> |
892 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
864 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
893 | 865 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
894 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
866 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
867 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
895 | 868 | <Elem>NDEBUG</Elem> |
896 | 869 | <Elem>libmorfeusz_EXPORTS</Elem> |
897 | 870 | </preprocessorList> |
... | ... | @@ -905,9 +878,10 @@ |
905 | 878 | </incDir> |
906 | 879 | <preprocessorList> |
907 | 880 | <Elem>BUILDING_MORFEUSZ</Elem> |
908 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
881 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
909 | 882 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
910 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
883 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
884 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
911 | 885 | <Elem>NDEBUG</Elem> |
912 | 886 | <Elem>libmorfeusz_EXPORTS</Elem> |
913 | 887 | </preprocessorList> |
... | ... | @@ -921,9 +895,10 @@ |
921 | 895 | </incDir> |
922 | 896 | <preprocessorList> |
923 | 897 | <Elem>BUILDING_MORFEUSZ</Elem> |
924 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
898 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
925 | 899 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
926 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
900 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
901 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
927 | 902 | <Elem>NDEBUG</Elem> |
928 | 903 | <Elem>libmorfeusz_EXPORTS</Elem> |
929 | 904 | </preprocessorList> |
... | ... | @@ -937,38 +912,55 @@ |
937 | 912 | </incDir> |
938 | 913 | <preprocessorList> |
939 | 914 | <Elem>BUILDING_MORFEUSZ</Elem> |
940 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
915 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
941 | 916 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
942 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
917 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
918 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
943 | 919 | <Elem>NDEBUG</Elem> |
944 | 920 | <Elem>libmorfeusz_EXPORTS</Elem> |
945 | 921 | </preprocessorList> |
946 | 922 | </ccTool> |
947 | 923 | </item> |
948 | 924 | <item path="morfeusz/c_api/ResultsManager.cpp" ex="false" tool="1" flavor2="4"> |
925 | + <ccTool flags="1"> | |
926 | + </ccTool> | |
949 | 927 | </item> |
950 | 928 | <item path="morfeusz/case/CaseConverter.cpp" ex="false" tool="1" flavor2="4"> |
929 | + <ccTool flags="1"> | |
930 | + </ccTool> | |
951 | 931 | </item> |
952 | 932 | <item path="morfeusz/case/CasePatternHelper.cpp" |
953 | 933 | ex="false" |
954 | 934 | tool="1" |
955 | 935 | flavor2="4"> |
936 | + <ccTool flags="1"> | |
937 | + </ccTool> | |
956 | 938 | </item> |
957 | 939 | <item path="morfeusz/case/caseconv.cpp" ex="false" tool="1" flavor2="4"> |
940 | + <ccTool flags="1"> | |
941 | + </ccTool> | |
958 | 942 | </item> |
959 | 943 | <item path="morfeusz/charset/CharsetConverter.cpp" |
960 | 944 | ex="false" |
961 | 945 | tool="1" |
962 | 946 | flavor2="4"> |
947 | + <ccTool flags="1"> | |
948 | + </ccTool> | |
963 | 949 | </item> |
964 | 950 | <item path="morfeusz/charset/TextReader.cpp" ex="false" tool="1" flavor2="4"> |
951 | + <ccTool flags="1"> | |
952 | + </ccTool> | |
965 | 953 | </item> |
966 | 954 | <item path="morfeusz/charset/conversion_tables.cpp" |
967 | 955 | ex="false" |
968 | 956 | tool="1" |
969 | 957 | flavor2="4"> |
958 | + <ccTool flags="1"> | |
959 | + </ccTool> | |
970 | 960 | </item> |
971 | 961 | <item path="morfeusz/cli/cli.cpp" ex="false" tool="1" flavor2="4"> |
962 | + <ccTool flags="1"> | |
963 | + </ccTool> | |
972 | 964 | </item> |
973 | 965 | <item path="morfeusz/const.cpp" ex="false" tool="1" flavor2="4"> |
974 | 966 | <ccTool flags="1"> |
... | ... | @@ -978,9 +970,10 @@ |
978 | 970 | </incDir> |
979 | 971 | <preprocessorList> |
980 | 972 | <Elem>BUILDING_MORFEUSZ</Elem> |
981 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
973 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
982 | 974 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
983 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
975 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
976 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
984 | 977 | <Elem>NDEBUG</Elem> |
985 | 978 | <Elem>libmorfeusz_EXPORTS</Elem> |
986 | 979 | </preprocessorList> |
... | ... | @@ -990,28 +983,40 @@ |
990 | 983 | ex="false" |
991 | 984 | tool="1" |
992 | 985 | flavor2="4"> |
986 | + <ccTool flags="1"> | |
987 | + </ccTool> | |
993 | 988 | </item> |
994 | 989 | <item path="morfeusz/deserialization/MorphDeserializer.cpp" |
995 | 990 | ex="false" |
996 | 991 | tool="1" |
997 | 992 | flavor2="4"> |
993 | + <ccTool flags="1"> | |
994 | + </ccTool> | |
998 | 995 | </item> |
999 | 996 | <item path="morfeusz/deserialization/morphInterps/InterpretedChunksDecoder.cpp" |
1000 | 997 | ex="false" |
1001 | 998 | tool="1" |
1002 | 999 | flavor2="4"> |
1000 | + <ccTool flags="1"> | |
1001 | + </ccTool> | |
1003 | 1002 | </item> |
1004 | 1003 | <item path="morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.cpp" |
1005 | 1004 | ex="false" |
1006 | 1005 | tool="1" |
1007 | 1006 | flavor2="4"> |
1007 | + <ccTool flags="1"> | |
1008 | + </ccTool> | |
1008 | 1009 | </item> |
1009 | 1010 | <item path="morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Generator.cpp" |
1010 | 1011 | ex="false" |
1011 | 1012 | tool="1" |
1012 | 1013 | flavor2="4"> |
1014 | + <ccTool flags="1"> | |
1015 | + </ccTool> | |
1013 | 1016 | </item> |
1014 | 1017 | <item path="morfeusz/fsa/const.cpp" ex="false" tool="1" flavor2="4"> |
1018 | + <ccTool flags="1"> | |
1019 | + </ccTool> | |
1015 | 1020 | </item> |
1016 | 1021 | <item path="morfeusz/morfeusz2_c.cpp" ex="false" tool="1" flavor2="4"> |
1017 | 1022 | <ccTool flags="1"> |
... | ... | @@ -1021,9 +1026,10 @@ |
1021 | 1026 | </incDir> |
1022 | 1027 | <preprocessorList> |
1023 | 1028 | <Elem>BUILDING_MORFEUSZ</Elem> |
1024 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
1029 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
1025 | 1030 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
1026 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
1031 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
1032 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
1027 | 1033 | <Elem>NDEBUG</Elem> |
1028 | 1034 | <Elem>libmorfeusz_EXPORTS</Elem> |
1029 | 1035 | </preprocessorList> |
... | ... | @@ -1037,9 +1043,10 @@ |
1037 | 1043 | </incDir> |
1038 | 1044 | <preprocessorList> |
1039 | 1045 | <Elem>BUILDING_MORFEUSZ</Elem> |
1040 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
1046 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
1041 | 1047 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
1042 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
1048 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
1049 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
1043 | 1050 | <Elem>NDEBUG</Elem> |
1044 | 1051 | </preprocessorList> |
1045 | 1052 | </ccTool> |
... | ... | @@ -1052,16 +1059,21 @@ |
1052 | 1059 | </incDir> |
1053 | 1060 | <preprocessorList> |
1054 | 1061 | <Elem>BUILDING_MORFEUSZ</Elem> |
1055 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
1062 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
1056 | 1063 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
1057 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
1064 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
1065 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
1058 | 1066 | <Elem>NDEBUG</Elem> |
1059 | 1067 | </preprocessorList> |
1060 | 1068 | </ccTool> |
1061 | 1069 | </item> |
1062 | 1070 | <item path="morfeusz/segrules/SegrulesFSA.cpp" ex="false" tool="1" flavor2="4"> |
1071 | + <ccTool flags="1"> | |
1072 | + </ccTool> | |
1063 | 1073 | </item> |
1064 | 1074 | <item path="morfeusz/segrules/segrules.cpp" ex="false" tool="1" flavor2="4"> |
1075 | + <ccTool flags="1"> | |
1076 | + </ccTool> | |
1065 | 1077 | </item> |
1066 | 1078 | <item path="morfeusz/test_runner.cpp" ex="false" tool="1" flavor2="4"> |
1067 | 1079 | <ccTool flags="0"> |
... | ... | @@ -1071,9 +1083,10 @@ |
1071 | 1083 | </incDir> |
1072 | 1084 | <preprocessorList> |
1073 | 1085 | <Elem>BUILDING_MORFEUSZ</Elem> |
1074 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
1086 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
1075 | 1087 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
1076 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
1088 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
1089 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
1077 | 1090 | <Elem>NDEBUG</Elem> |
1078 | 1091 | </preprocessorList> |
1079 | 1092 | </ccTool> |
... | ... | @@ -1088,9 +1101,10 @@ |
1088 | 1101 | </incDir> |
1089 | 1102 | <preprocessorList> |
1090 | 1103 | <Elem>BUILDING_MORFEUSZ</Elem> |
1091 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
1104 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
1092 | 1105 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
1093 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
1106 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
1107 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
1094 | 1108 | <Elem>NDEBUG</Elem> |
1095 | 1109 | </preprocessorList> |
1096 | 1110 | </ccTool> |
... | ... | @@ -1103,9 +1117,10 @@ |
1103 | 1117 | </incDir> |
1104 | 1118 | <preprocessorList> |
1105 | 1119 | <Elem>BUILDING_MORFEUSZ</Elem> |
1106 | - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> | |
1120 | + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> | |
1107 | 1121 | <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> |
1108 | - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> | |
1122 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> | |
1123 | + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> | |
1109 | 1124 | <Elem>NDEBUG</Elem> |
1110 | 1125 | </preprocessorList> |
1111 | 1126 | </ccTool> |
... | ... |
tests/analyzer/test_dict_copyright/dictionary.tab
tests/analyzer/test_digits/input.txt
... | ... | @@ -5,3 +5,4 @@ |
5 | 5 | 012341 |
6 | 6 | 1234seasdfa |
7 | 7 | sdfa123 |
8 | +111100002222000033330000444400005555000066660000777700008888000099990000111100002222000033330000444400005555000066660000777700008888000099990000111100002222000033330000444400005555000066660000777700008888000099990000111100002222000033330000444400005555000066660000777700008888000099990000 | |
... | ... |
tests/analyzer/test_digits/output.txt
... | ... | @@ -5,4 +5,5 @@ |
5 | 5 | [0,1,012341,012341,dig,_,_] |
6 | 6 | [0,1,1234seasdfa,1234seasdfa,ign,_,_] |
7 | 7 | [0,1,sdfa123,sdfa123,ign,_,_] |
8 | +[0,1,111100002222000033330000444400005555000066660000777700008888000099990000111100002222000033330000444400005555000066660000777700008888000099990000111100002222000033330000444400005555000066660000777700008888000099990000111100002222000033330000444400005555000066660000777700008888000099990000,111100002222000033330000444400005555000066660000777700008888000099990000111100002222000033330000444400005555000066660000777700008888000099990000111100002222000033330000444400005555000066660000777700008888000099990000111100002222000033330000444400005555000066660000777700008888000099990000,dig,_,_] | |
8 | 9 | |
... | ... |