From b1ced679e43ea0b0a324278e09ef10fb5c09701f Mon Sep 17 00:00:00 2001 From: Michał Lenart <michall@ipipan.waw.pl> Date: Fri, 6 Jun 2014 09:04:58 +0000 Subject: [PATCH] nowsza wersja automatu (bez bitu mówiącego o tym, czy trzymać stan w tablicy) --- fsabuilder/morfeusz_builder | 6 +++--- fsabuilder/morfeuszbuilder/fsa/serializer.py | 12 ++++++------ morfeusz/Morfeusz.cpp | 1 - morfeusz/charset/TextReader.cpp | 2 +- morfeusz/charset/TextReader.hpp | 2 +- morfeusz/fsa/cfsa1_impl.hpp | 16 ++++++++++------ morfeusz/fsa/const.cpp | 2 +- nbproject/configurations.xml | 17 ++++++++++++----- 8 files changed, 34 insertions(+), 24 deletions(-) diff --git a/fsabuilder/morfeusz_builder b/fsabuilder/morfeusz_builder index 9fee487..53af796 100644 --- a/fsabuilder/morfeusz_builder +++ b/fsabuilder/morfeusz_builder @@ -258,9 +258,9 @@ if __name__ == '__main__': opts = _parseOptions() try: main(opts) -# except Exception as ex: -# print >> sys.stderr, unicode(ex).encode('utf8') -# sys.exit(1) + except Exception as ex: + print >> sys.stderr, unicode(ex).encode('utf8') + sys.exit(1) finally: pass diff --git a/fsabuilder/morfeuszbuilder/fsa/serializer.py b/fsabuilder/morfeuszbuilder/fsa/serializer.py index d78e662..44188c6 100644 --- a/fsabuilder/morfeuszbuilder/fsa/serializer.py +++ b/fsabuilder/morfeuszbuilder/fsa/serializer.py @@ -42,7 +42,7 @@ class Serializer(object): # get the Morfeusz file format version that is being encoded def getVersion(self): - return 17 + return 18 def serialize2CppFile(self, fname, isGenerator, headerFilename="data/default_fsa.hpp"): res = [] @@ -236,7 +236,7 @@ class VLengthSerializer1(Serializer): self.label2ShortLabel = None self.ACCEPTING_FLAG = 0b10000000 - self.ARRAY_FLAG = 0b01000000 +# self.ARRAY_FLAG = 0b01000000 def getImplementationCode(self): return 1 @@ -277,13 +277,13 @@ class VLengthSerializer1(Serializer): firstByte = 0 if state.isAccepting(): firstByte |= self.ACCEPTING_FLAG - if self.stateShouldBeAnArray(state): - firstByte |= self.ARRAY_FLAG - if state.transitionsNum < 63: +# if self.stateShouldBeAnArray(state): +# firstByte |= self.ARRAY_FLAG + if state.transitionsNum < 127: firstByte |= state.transitionsNum res.append(firstByte) else: - firstByte |= 63 + firstByte |= 127 res.append(firstByte) res.append(state.transitionsNum) diff --git a/morfeusz/Morfeusz.cpp b/morfeusz/Morfeusz.cpp index 5264b47..24c9fdf 100644 --- a/morfeusz/Morfeusz.cpp +++ b/morfeusz/Morfeusz.cpp @@ -223,7 +223,6 @@ void Morfeusz::doProcessOneWord( } else { assert(!newSegrulesState.sink); - // cerr << "will process " << currInput << endl; TextReader newReader(reader.getCurrPtr(), reader.getEndPtr(), env); doProcessOneWord(env, newReader, newSegrulesState); } diff --git a/morfeusz/charset/TextReader.cpp b/morfeusz/charset/TextReader.cpp index ba9bd46..d028bf8 100644 --- a/morfeusz/charset/TextReader.cpp +++ b/morfeusz/charset/TextReader.cpp @@ -1,5 +1,5 @@ /* - * File: StatefulCharsetConverter.cpp + * File: TextReader.cpp * Author: lennyn * * Created on May 28, 2014, 11:43 AM diff --git a/morfeusz/charset/TextReader.hpp b/morfeusz/charset/TextReader.hpp index 212f125..f74c10b 100644 --- a/morfeusz/charset/TextReader.hpp +++ b/morfeusz/charset/TextReader.hpp @@ -1,5 +1,5 @@ /* - * File: StatefulCharsetConverter.hpp + * File: TextReader.hpp * Author: lennyn * * Created on May 28, 2014, 11:43 AM diff --git a/morfeusz/fsa/cfsa1_impl.hpp b/morfeusz/fsa/cfsa1_impl.hpp index 7348fc1..a25b0aa 100644 --- a/morfeusz/fsa/cfsa1_impl.hpp +++ b/morfeusz/fsa/cfsa1_impl.hpp @@ -15,8 +15,8 @@ #include "../deserializationUtils.hpp" static const unsigned char CFSA1_ACCEPTING_FLAG = 128; -static const unsigned char CFSA1_ARRAY_FLAG = 64; -static const unsigned char CFSA1_TRANSITIONS_NUM_MASK = 63; +//static const unsigned char CFSA1_ARRAY_FLAG = 64; +static const unsigned char CFSA1_TRANSITIONS_NUM_MASK = 127; static const unsigned char CFSA1_OFFSET_SIZE_MASK = 3; @@ -136,14 +136,17 @@ void CompressedFSA1<T>::doProceedToNext(const char c, State<T>& state, bool init if (label == c) { found = true; break; - } else { + } + else { currPtr += td.offsetSize; } - } else { + } + else { found = true; break; } - } else { + } + else { if (td.shortLabel == 0) { currPtr++; } @@ -152,7 +155,8 @@ void CompressedFSA1<T>::doProceedToNext(const char c, State<T>& state, bool init } if (!found) { state.setNextAsSink(); - } else { + } + else { uint32_t offset; switch (td.offsetSize) { case 0: diff --git a/morfeusz/fsa/const.cpp b/morfeusz/fsa/const.cpp index ea74006..3352670 100644 --- a/morfeusz/fsa/const.cpp +++ b/morfeusz/fsa/const.cpp @@ -2,7 +2,7 @@ #include "const.hpp" extern const uint32_t MAGIC_NUMBER = 0x8fc2bc1b; -extern const uint8_t VERSION_NUM = 17; +extern const uint8_t VERSION_NUM = 18; extern const unsigned int VERSION_NUM_OFFSET = 4; extern const unsigned int IMPLEMENTATION_NUM_OFFSET = 5; diff --git a/nbproject/configurations.xml b/nbproject/configurations.xml index 51639f5..54102cd 100644 --- a/nbproject/configurations.xml +++ b/nbproject/configurations.xml @@ -103,7 +103,8 @@ <flagsDictionary> <element flagsID="0" commonFlags="-O2 -std=c++98"/> <element flagsID="1" commonFlags="-O2 -std=c++98 -fPIC"/> - <element flagsID="2" commonFlags="-std=c++98 -O3 -fPIC"/> + <element flagsID="2" commonFlags="-std=c++98 -O3"/> + <element flagsID="3" commonFlags="-std=c++98 -O3 -fPIC"/> </flagsDictionary> <codeAssistance> </codeAssistance> @@ -145,7 +146,7 @@ </ccTool> </item> <item path="build/morfeusz/java/swigJAVA.cpp" ex="false" tool="1" flavor2="4"> - <ccTool flags="2"> + <ccTool flags="3"> </ccTool> </item> <item path="build/morfeusz/morfeuszJAVA_wrap.cxx" @@ -382,14 +383,15 @@ </ccTool> </item> <item path="morfeusz/Morfeusz.cpp" ex="false" tool="1" flavor2="4"> - <ccTool flags="1"> + <ccTool flags="3"> <preprocessorList> + <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> </preprocessorList> </ccTool> </item> <item path="morfeusz/MorphDeserializer.cpp" ex="false" tool="1" flavor2="4"> - <ccTool flags="2"> + <ccTool flags="3"> <preprocessorList> <Elem>NDEBUG</Elem> <Elem>libmorfeusz_EXPORTS</Elem> @@ -425,7 +427,7 @@ ex="false" tool="1" flavor2="4"> - <ccTool flags="2"> + <ccTool flags="3"> <preprocessorList> <Elem>NDEBUG</Elem> </preprocessorList> @@ -532,6 +534,11 @@ </ccTool> </item> <item path="morfeusz/morfeusz_analyzer.cpp" ex="false" tool="1" flavor2="4"> + <ccTool flags="2"> + <preprocessorList> + <Elem>NDEBUG</Elem> + </preprocessorList> + </ccTool> </item> <item path="morfeusz/morfeusz_generator.cpp" ex="false" tool="1" flavor2="4"> </item> -- libgit2 0.22.2