From 666111294fd69b02e7aba488584f620218f0efa7 Mon Sep 17 00:00:00 2001 From: Michał Lenart <michall@ipipan.waw.pl> Date: Thu, 11 Sep 2014 20:47:44 +0000 Subject: [PATCH] poprawki w obsłudze dużych liczb (i generalnie dużej liczby segmentów z ">"); poprawki w obsłudze DICT-ID --- fsabuilder/morfeusz_builder | 10 +++++++--- fsabuilder/morfeuszbuilder/fsa/convertinput.py | 4 +++- morfeusz/MorfeuszImpl.cpp | 14 ++++++++++++-- nbproject/configurations.xml | 215 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------------------------------------------- tests/analyzer/test_dict_copyright/dictionary.tab | 1 + tests/analyzer/test_digits/input.txt | 1 + tests/analyzer/test_digits/output.txt | 1 + 7 files changed, 140 insertions(+), 106 deletions(-) diff --git a/fsabuilder/morfeusz_builder b/fsabuilder/morfeusz_builder index feee299..1b0b352 100644 --- a/fsabuilder/morfeusz_builder +++ b/fsabuilder/morfeusz_builder @@ -184,12 +184,16 @@ def _readDictIdAndCopyright(inputFiles): inCopyright = False for linenum, line in enumerate(f, start=1): if dictId is None and line.startswith(u'#!DICT-ID'): - dictIdTag, _, dictId = line.strip().partition(u' ')[2] + dictIdTag, _, dictId = line.strip().partition(u' ') exceptions.validate( dictIdTag == u'#!DICT-ID', u'Dictionary ID tag must be followed by a space character and dictionary ID string') - - + exceptions.validate( + len(line.split(u' ')) > 1, + u'%s:%d: Must provide DICT-ID' % (inputFile, linenum)) + exceptions.validate( + len(line.split(u' ')) == 2, + u'%s:%d: DICT-ID must not contain spaces' % (inputFile, linenum)) elif copyright is None and line.startswith(u'#<COPYRIGHT>'): exceptions.validate( line.strip() == u'#<COPYRIGHT>', diff --git a/fsabuilder/morfeuszbuilder/fsa/convertinput.py b/fsabuilder/morfeuszbuilder/fsa/convertinput.py index c9de7ff..c3c4269 100644 --- a/fsabuilder/morfeuszbuilder/fsa/convertinput.py +++ b/fsabuilder/morfeuszbuilder/fsa/convertinput.py @@ -46,8 +46,10 @@ class LineParser(object): return True elif line and not ' ' in ''.join(line.split('\t')[:2]): return False + elif line.startswith(u'#!DICT-ID'): + return True else: - logging.warn(u'Ignoring line: "%s" - contains space in text form or lemma' % line.strip().decode('utf8')) + logging.warn(u'Ignoring line: "%s" - contains space in text form or lemma' % (line.strip())) return True def parseLine(self, line): diff --git a/morfeusz/MorfeuszImpl.cpp b/morfeusz/MorfeuszImpl.cpp index c56cac9..0f1dab1 100644 --- a/morfeusz/MorfeuszImpl.cpp +++ b/morfeusz/MorfeuszImpl.cpp @@ -55,12 +55,16 @@ namespace morfeusz { } static void doShiftOrth(InterpretedChunk& from, InterpretedChunk& to) { - to.prefixChunks.insert(to.prefixChunks.end(), from.prefixChunks.begin(), from.prefixChunks.end()); -// from.prefixChunks.resize(0); + to.prefixChunks.swap(from.prefixChunks); // from.prefixChunks are ignored anyway. Will swap them back in doUnshiftOrth to.prefixChunks.push_back(from); to.textStartPtr = from.textStartPtr; from.orthWasShifted = true; } + + static void doUnshiftOrth(InterpretedChunk& from, InterpretedChunk& to) { + to.prefixChunks.swap(from.prefixChunks); + from.prefixChunks.pop_back(); + } static void feedStateDirectly( const FSAType& fsa, @@ -359,6 +363,7 @@ namespace morfeusz { SegrulesState newSegrulesState; env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, isAtWhitespace, newSegrulesState); if (!newSegrulesState.failed) { + InterpretedChunk ic( createChunk(ig, reader, newSegrulesState.shiftOrthFromPrevious, homonymId)); @@ -386,8 +391,10 @@ namespace morfeusz { bool caseMatches, const SegrulesState& newSegrulesState, InterpretedChunk& ic) const { + bool orthShifted = false; if (!accum.empty() && accum.back().shiftOrth) { doShiftOrth(accum.back(), ic); + orthShifted = true; } if (!caseMatches && options.caseHandling == CONDITIONALLY_CASE_SENSITIVE) { notMatchingCaseSegs++; @@ -407,6 +414,9 @@ namespace morfeusz { doProcessOneWord(env, newReader, newSegrulesState); } accum.pop_back(); + if (orthShifted) { + doUnshiftOrth(accum.back(), ic); + } if (!caseMatches && options.caseHandling == CONDITIONALLY_CASE_SENSITIVE) { notMatchingCaseSegs--; } diff --git a/nbproject/configurations.xml b/nbproject/configurations.xml index 14706bf..c0028a7 100644 --- a/nbproject/configurations.xml +++ b/nbproject/configurations.xml @@ -299,22 +299,16 @@ flavor2="8"> <ccTool> <incDir> - <pElem>build</pElem> - <pElem>morfeusz</pElem> - <pElem>build/morfeusz</pElem> - <pElem>build/fsa</pElem> <pElem>/usr/lib/jvm/default-java/include</pElem> <pElem>build/morfeusz/java</pElem> </incDir> <preprocessorList> - <Elem>_OPTIMIZE__=1</Elem> <Elem>__PIC__=2</Elem> <Elem>__pic__=2</Elem> <Elem>jmorfeusz_EXPORTS</Elem> </preprocessorList> <undefinedList> <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem> - <Elem>__NO_INLINE__</Elem> </undefinedList> </ccTool> </item> @@ -324,20 +318,12 @@ flavor2="4"> <ccTool flags="1"> <incDir> - <pElem>build</pElem> - <pElem>morfeusz</pElem> - <pElem>build/morfeusz</pElem> - <pElem>build/fsa</pElem> <pElem>/usr/lib/perl/5.14/CORE</pElem> <pElem>build/morfeusz/perl</pElem> </incDir> <preprocessorList> - <Elem>_OPTIMIZE__=1</Elem> <Elem>morfeusz_perl_EXPORTS</Elem> </preprocessorList> - <undefinedList> - <Elem>__NO_INLINE__</Elem> - </undefinedList> </ccTool> </item> <item path="build/morfeusz/morfeuszPYTHON_wrap.cxx" @@ -346,22 +332,16 @@ flavor2="8"> <ccTool> <incDir> - <pElem>build</pElem> - <pElem>morfeusz</pElem> - <pElem>build/morfeusz</pElem> - <pElem>build/fsa</pElem> <pElem>/usr/include/python2.7</pElem> <pElem>build/morfeusz/python</pElem> </incDir> <preprocessorList> - <Elem>_OPTIMIZE__=1</Elem> <Elem>__PIC__=2</Elem> <Elem>__pic__=2</Elem> <Elem>_morfeusz_EXPORTS</Elem> </preprocessorList> <undefinedList> <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem> - <Elem>__NO_INLINE__</Elem> </undefinedList> </ccTool> </item> @@ -385,9 +365,6 @@ <pElem>build/morfeusz/wrappers/perl</pElem> </incDir> <preprocessorList> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> - <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> - <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>morfeusz_perl_EXPORTS</Elem> </preprocessorList> </ccTool> @@ -407,35 +384,15 @@ <item path="default_fsa.cpp" ex="false" tool="1" flavor2="4"> <ccTool> <incDir> - <pElem>build</pElem> - <pElem>morfeusz</pElem> - <pElem>build/morfeusz</pElem> - <pElem>build/fsa</pElem> <pElem>morfeusz/build/morfeusz</pElem> </incDir> - <preprocessorList> - <Elem>_OPTIMIZE__=1</Elem> - </preprocessorList> - <undefinedList> - <Elem>__NO_INLINE__</Elem> - </undefinedList> </ccTool> </item> <item path="default_synth_fsa.cpp" ex="false" tool="1" flavor2="4"> <ccTool> <incDir> - <pElem>build</pElem> - <pElem>morfeusz</pElem> - <pElem>build/morfeusz</pElem> - <pElem>build/fsa</pElem> <pElem>morfeusz/build/morfeusz</pElem> </incDir> - <preprocessorList> - <Elem>_OPTIMIZE__=1</Elem> - </preprocessorList> - <undefinedList> - <Elem>__NO_INLINE__</Elem> - </undefinedList> </ccTool> </item> <folder path="0/c_api"> @@ -446,9 +403,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> @@ -462,9 +420,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> @@ -478,9 +437,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> @@ -494,9 +454,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> @@ -510,9 +471,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> @@ -526,9 +488,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> @@ -542,9 +505,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> @@ -693,18 +657,19 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> </preprocessorList> </ccTool> </folder> <folder path="build/morfeusz/wrappers/java"> <ccTool> <incDir> - <pElem>/usr/lib/jvm/default-java/include</pElem> + <pElem>/usr/lib/jvm/java-6-openjdk/include</pElem> </incDir> <preprocessorList> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> <Elem>libjmorfeusz_EXPORTS</Elem> </preprocessorList> </ccTool> @@ -755,28 +720,30 @@ </folder> <folder path="morfeusz"> <ccTool> + <incDir> + <pElem>build</pElem> + <pElem>morfeusz</pElem> + <pElem>build/morfeusz</pElem> + <pElem>build/fsa</pElem> + </incDir> <preprocessorList> <Elem>NDEBUG</Elem> + <Elem>_OPTIMIZE__=1</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> + <undefinedList> + <Elem>__NO_INLINE__</Elem> + </undefinedList> </ccTool> </folder> <folder path="morfeusz/java"> <ccTool> <incDir> - <pElem>build</pElem> - <pElem>morfeusz</pElem> - <pElem>build/morfeusz</pElem> - <pElem>build/fsa</pElem> <pElem>/usr/lib/jvm/default-java/include</pElem> </incDir> <preprocessorList> - <Elem>_OPTIMIZE__=1</Elem> <Elem>libjmorfeusz_EXPORTS</Elem> </preprocessorList> - <undefinedList> - <Elem>__NO_INLINE__</Elem> - </undefinedList> </ccTool> </folder> <folder path="morfeusz/python"> @@ -809,9 +776,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> @@ -825,9 +793,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> @@ -841,9 +810,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> @@ -857,9 +827,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> @@ -873,9 +844,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> @@ -889,9 +861,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> @@ -905,9 +878,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> @@ -921,9 +895,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> @@ -937,38 +912,55 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> </ccTool> </item> <item path="morfeusz/c_api/ResultsManager.cpp" ex="false" tool="1" flavor2="4"> + <ccTool flags="1"> + </ccTool> </item> <item path="morfeusz/case/CaseConverter.cpp" ex="false" tool="1" flavor2="4"> + <ccTool flags="1"> + </ccTool> </item> <item path="morfeusz/case/CasePatternHelper.cpp" ex="false" tool="1" flavor2="4"> + <ccTool flags="1"> + </ccTool> </item> <item path="morfeusz/case/caseconv.cpp" ex="false" tool="1" flavor2="4"> + <ccTool flags="1"> + </ccTool> </item> <item path="morfeusz/charset/CharsetConverter.cpp" ex="false" tool="1" flavor2="4"> + <ccTool flags="1"> + </ccTool> </item> <item path="morfeusz/charset/TextReader.cpp" ex="false" tool="1" flavor2="4"> + <ccTool flags="1"> + </ccTool> </item> <item path="morfeusz/charset/conversion_tables.cpp" ex="false" tool="1" flavor2="4"> + <ccTool flags="1"> + </ccTool> </item> <item path="morfeusz/cli/cli.cpp" ex="false" tool="1" flavor2="4"> + <ccTool flags="1"> + </ccTool> </item> <item path="morfeusz/const.cpp" ex="false" tool="1" flavor2="4"> <ccTool flags="1"> @@ -978,9 +970,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> @@ -990,28 +983,40 @@ ex="false" tool="1" flavor2="4"> + <ccTool flags="1"> + </ccTool> </item> <item path="morfeusz/deserialization/MorphDeserializer.cpp" ex="false" tool="1" flavor2="4"> + <ccTool flags="1"> + </ccTool> </item> <item path="morfeusz/deserialization/morphInterps/InterpretedChunksDecoder.cpp" ex="false" tool="1" flavor2="4"> + <ccTool flags="1"> + </ccTool> </item> <item path="morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.cpp" ex="false" tool="1" flavor2="4"> + <ccTool flags="1"> + </ccTool> </item> <item path="morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Generator.cpp" ex="false" tool="1" flavor2="4"> + <ccTool flags="1"> + </ccTool> </item> <item path="morfeusz/fsa/const.cpp" ex="false" tool="1" flavor2="4"> + <ccTool flags="1"> + </ccTool> </item> <item path="morfeusz/morfeusz2_c.cpp" ex="false" tool="1" flavor2="4"> <ccTool flags="1"> @@ -1021,9 +1026,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> @@ -1037,9 +1043,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> </preprocessorList> </ccTool> @@ -1052,16 +1059,21 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> </preprocessorList> </ccTool> </item> <item path="morfeusz/segrules/SegrulesFSA.cpp" ex="false" tool="1" flavor2="4"> + <ccTool flags="1"> + </ccTool> </item> <item path="morfeusz/segrules/segrules.cpp" ex="false" tool="1" flavor2="4"> + <ccTool flags="1"> + </ccTool> </item> <item path="morfeusz/test_runner.cpp" ex="false" tool="1" flavor2="4"> <ccTool flags="0"> @@ -1071,9 +1083,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> </preprocessorList> </ccTool> @@ -1088,9 +1101,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> </preprocessorList> </ccTool> @@ -1103,9 +1117,10 @@ </incDir> <preprocessorList> <Elem>BUILDING_MORFEUSZ</Elem> - <Elem>MORFEUSZ2_VERSION="2.0.0_dupa"</Elem> + <Elem>MORFEUSZ2_VERSION="2.0.0_dupa-20140831"</Elem> <Elem>MORFEUSZ_DEFAULT_DICT_NAME="dupa"</Elem> - <Elem>MORFEUSZ_EMBEDDED_DEFAULT_DICT</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH1="/usr/local/share/morfeusz/dictionaries"</Elem> + <Elem>MORFEUSZ_DICTIONARY_SEARCH_PATH2="/usr/share/morfeusz/dictionaries"</Elem> <Elem>NDEBUG</Elem> </preprocessorList> </ccTool> diff --git a/tests/analyzer/test_dict_copyright/dictionary.tab b/tests/analyzer/test_dict_copyright/dictionary.tab index 41b54d5..2a8b199 100644 --- a/tests/analyzer/test_dict_copyright/dictionary.tab +++ b/tests/analyzer/test_dict_copyright/dictionary.tab @@ -1,3 +1,4 @@ +#!DICT-ID identyfikator_słownika #<COPYRIGHT> To jest testowa notka copyrightowa. #</COPYRIGHT> diff --git a/tests/analyzer/test_digits/input.txt b/tests/analyzer/test_digits/input.txt index af63b3c..dad1e6d 100644 --- a/tests/analyzer/test_digits/input.txt +++ b/tests/analyzer/test_digits/input.txt @@ -5,3 +5,4 @@ 012341 1234seasdfa sdfa123 +111100002222000033330000444400005555000066660000777700008888000099990000111100002222000033330000444400005555000066660000777700008888000099990000111100002222000033330000444400005555000066660000777700008888000099990000111100002222000033330000444400005555000066660000777700008888000099990000 diff --git a/tests/analyzer/test_digits/output.txt b/tests/analyzer/test_digits/output.txt index dd00cac..23cd74c 100644 --- a/tests/analyzer/test_digits/output.txt +++ b/tests/analyzer/test_digits/output.txt @@ -5,4 +5,5 @@ [0,1,012341,012341,dig,_,_] [0,1,1234seasdfa,1234seasdfa,ign,_,_] [0,1,sdfa123,sdfa123,ign,_,_] +[0,1,111100002222000033330000444400005555000066660000777700008888000099990000111100002222000033330000444400005555000066660000777700008888000099990000111100002222000033330000444400005555000066660000777700008888000099990000111100002222000033330000444400005555000066660000777700008888000099990000,111100002222000033330000444400005555000066660000777700008888000099990000111100002222000033330000444400005555000066660000777700008888000099990000111100002222000033330000444400005555000066660000777700008888000099990000111100002222000033330000444400005555000066660000777700008888000099990000,dig,_,_] -- libgit2 0.22.2