Commit b1ced679e43ea0b0a324278e09ef10fb5c09701f
1 parent
c4676798
nowsza wersja automatu (bez bitu mówiącego o tym, czy trzymać stan w tablicy)
git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@207 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
8 changed files
with
34 additions
and
24 deletions
fsabuilder/morfeusz_builder
| @@ -258,9 +258,9 @@ if __name__ == '__main__': | @@ -258,9 +258,9 @@ if __name__ == '__main__': | ||
| 258 | opts = _parseOptions() | 258 | opts = _parseOptions() |
| 259 | try: | 259 | try: |
| 260 | main(opts) | 260 | main(opts) |
| 261 | -# except Exception as ex: | ||
| 262 | -# print >> sys.stderr, unicode(ex).encode('utf8') | ||
| 263 | -# sys.exit(1) | 261 | + except Exception as ex: |
| 262 | + print >> sys.stderr, unicode(ex).encode('utf8') | ||
| 263 | + sys.exit(1) | ||
| 264 | finally: | 264 | finally: |
| 265 | pass | 265 | pass |
| 266 | 266 |
fsabuilder/morfeuszbuilder/fsa/serializer.py
| @@ -42,7 +42,7 @@ class Serializer(object): | @@ -42,7 +42,7 @@ class Serializer(object): | ||
| 42 | 42 | ||
| 43 | # get the Morfeusz file format version that is being encoded | 43 | # get the Morfeusz file format version that is being encoded |
| 44 | def getVersion(self): | 44 | def getVersion(self): |
| 45 | - return 17 | 45 | + return 18 |
| 46 | 46 | ||
| 47 | def serialize2CppFile(self, fname, isGenerator, headerFilename="data/default_fsa.hpp"): | 47 | def serialize2CppFile(self, fname, isGenerator, headerFilename="data/default_fsa.hpp"): |
| 48 | res = [] | 48 | res = [] |
| @@ -236,7 +236,7 @@ class VLengthSerializer1(Serializer): | @@ -236,7 +236,7 @@ class VLengthSerializer1(Serializer): | ||
| 236 | self.label2ShortLabel = None | 236 | self.label2ShortLabel = None |
| 237 | 237 | ||
| 238 | self.ACCEPTING_FLAG = 0b10000000 | 238 | self.ACCEPTING_FLAG = 0b10000000 |
| 239 | - self.ARRAY_FLAG = 0b01000000 | 239 | +# self.ARRAY_FLAG = 0b01000000 |
| 240 | 240 | ||
| 241 | def getImplementationCode(self): | 241 | def getImplementationCode(self): |
| 242 | return 1 | 242 | return 1 |
| @@ -277,13 +277,13 @@ class VLengthSerializer1(Serializer): | @@ -277,13 +277,13 @@ class VLengthSerializer1(Serializer): | ||
| 277 | firstByte = 0 | 277 | firstByte = 0 |
| 278 | if state.isAccepting(): | 278 | if state.isAccepting(): |
| 279 | firstByte |= self.ACCEPTING_FLAG | 279 | firstByte |= self.ACCEPTING_FLAG |
| 280 | - if self.stateShouldBeAnArray(state): | ||
| 281 | - firstByte |= self.ARRAY_FLAG | ||
| 282 | - if state.transitionsNum < 63: | 280 | +# if self.stateShouldBeAnArray(state): |
| 281 | +# firstByte |= self.ARRAY_FLAG | ||
| 282 | + if state.transitionsNum < 127: | ||
| 283 | firstByte |= state.transitionsNum | 283 | firstByte |= state.transitionsNum |
| 284 | res.append(firstByte) | 284 | res.append(firstByte) |
| 285 | else: | 285 | else: |
| 286 | - firstByte |= 63 | 286 | + firstByte |= 127 |
| 287 | res.append(firstByte) | 287 | res.append(firstByte) |
| 288 | res.append(state.transitionsNum) | 288 | res.append(state.transitionsNum) |
| 289 | 289 |
morfeusz/Morfeusz.cpp
| @@ -223,7 +223,6 @@ void Morfeusz::doProcessOneWord( | @@ -223,7 +223,6 @@ void Morfeusz::doProcessOneWord( | ||
| 223 | } | 223 | } |
| 224 | else { | 224 | else { |
| 225 | assert(!newSegrulesState.sink); | 225 | assert(!newSegrulesState.sink); |
| 226 | - // cerr << "will process " << currInput << endl; | ||
| 227 | TextReader newReader(reader.getCurrPtr(), reader.getEndPtr(), env); | 226 | TextReader newReader(reader.getCurrPtr(), reader.getEndPtr(), env); |
| 228 | doProcessOneWord(env, newReader, newSegrulesState); | 227 | doProcessOneWord(env, newReader, newSegrulesState); |
| 229 | } | 228 | } |
morfeusz/charset/TextReader.cpp
morfeusz/charset/TextReader.hpp
morfeusz/fsa/cfsa1_impl.hpp
| @@ -15,8 +15,8 @@ | @@ -15,8 +15,8 @@ | ||
| 15 | #include "../deserializationUtils.hpp" | 15 | #include "../deserializationUtils.hpp" |
| 16 | 16 | ||
| 17 | static const unsigned char CFSA1_ACCEPTING_FLAG = 128; | 17 | static const unsigned char CFSA1_ACCEPTING_FLAG = 128; |
| 18 | -static const unsigned char CFSA1_ARRAY_FLAG = 64; | ||
| 19 | -static const unsigned char CFSA1_TRANSITIONS_NUM_MASK = 63; | 18 | +//static const unsigned char CFSA1_ARRAY_FLAG = 64; |
| 19 | +static const unsigned char CFSA1_TRANSITIONS_NUM_MASK = 127; | ||
| 20 | 20 | ||
| 21 | static const unsigned char CFSA1_OFFSET_SIZE_MASK = 3; | 21 | static const unsigned char CFSA1_OFFSET_SIZE_MASK = 3; |
| 22 | 22 | ||
| @@ -136,14 +136,17 @@ void CompressedFSA1<T>::doProceedToNext(const char c, State<T>& state, bool init | @@ -136,14 +136,17 @@ void CompressedFSA1<T>::doProceedToNext(const char c, State<T>& state, bool init | ||
| 136 | if (label == c) { | 136 | if (label == c) { |
| 137 | found = true; | 137 | found = true; |
| 138 | break; | 138 | break; |
| 139 | - } else { | 139 | + } |
| 140 | + else { | ||
| 140 | currPtr += td.offsetSize; | 141 | currPtr += td.offsetSize; |
| 141 | } | 142 | } |
| 142 | - } else { | 143 | + } |
| 144 | + else { | ||
| 143 | found = true; | 145 | found = true; |
| 144 | break; | 146 | break; |
| 145 | } | 147 | } |
| 146 | - } else { | 148 | + } |
| 149 | + else { | ||
| 147 | if (td.shortLabel == 0) { | 150 | if (td.shortLabel == 0) { |
| 148 | currPtr++; | 151 | currPtr++; |
| 149 | } | 152 | } |
| @@ -152,7 +155,8 @@ void CompressedFSA1<T>::doProceedToNext(const char c, State<T>& state, bool init | @@ -152,7 +155,8 @@ void CompressedFSA1<T>::doProceedToNext(const char c, State<T>& state, bool init | ||
| 152 | } | 155 | } |
| 153 | if (!found) { | 156 | if (!found) { |
| 154 | state.setNextAsSink(); | 157 | state.setNextAsSink(); |
| 155 | - } else { | 158 | + } |
| 159 | + else { | ||
| 156 | uint32_t offset; | 160 | uint32_t offset; |
| 157 | switch (td.offsetSize) { | 161 | switch (td.offsetSize) { |
| 158 | case 0: | 162 | case 0: |
morfeusz/fsa/const.cpp
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | #include "const.hpp" | 2 | #include "const.hpp" |
| 3 | 3 | ||
| 4 | extern const uint32_t MAGIC_NUMBER = 0x8fc2bc1b; | 4 | extern const uint32_t MAGIC_NUMBER = 0x8fc2bc1b; |
| 5 | -extern const uint8_t VERSION_NUM = 17; | 5 | +extern const uint8_t VERSION_NUM = 18; |
| 6 | 6 | ||
| 7 | extern const unsigned int VERSION_NUM_OFFSET = 4; | 7 | extern const unsigned int VERSION_NUM_OFFSET = 4; |
| 8 | extern const unsigned int IMPLEMENTATION_NUM_OFFSET = 5; | 8 | extern const unsigned int IMPLEMENTATION_NUM_OFFSET = 5; |
nbproject/configurations.xml
| @@ -103,7 +103,8 @@ | @@ -103,7 +103,8 @@ | ||
| 103 | <flagsDictionary> | 103 | <flagsDictionary> |
| 104 | <element flagsID="0" commonFlags="-O2 -std=c++98"/> | 104 | <element flagsID="0" commonFlags="-O2 -std=c++98"/> |
| 105 | <element flagsID="1" commonFlags="-O2 -std=c++98 -fPIC"/> | 105 | <element flagsID="1" commonFlags="-O2 -std=c++98 -fPIC"/> |
| 106 | - <element flagsID="2" commonFlags="-std=c++98 -O3 -fPIC"/> | 106 | + <element flagsID="2" commonFlags="-std=c++98 -O3"/> |
| 107 | + <element flagsID="3" commonFlags="-std=c++98 -O3 -fPIC"/> | ||
| 107 | </flagsDictionary> | 108 | </flagsDictionary> |
| 108 | <codeAssistance> | 109 | <codeAssistance> |
| 109 | </codeAssistance> | 110 | </codeAssistance> |
| @@ -145,7 +146,7 @@ | @@ -145,7 +146,7 @@ | ||
| 145 | </ccTool> | 146 | </ccTool> |
| 146 | </item> | 147 | </item> |
| 147 | <item path="build/morfeusz/java/swigJAVA.cpp" ex="false" tool="1" flavor2="4"> | 148 | <item path="build/morfeusz/java/swigJAVA.cpp" ex="false" tool="1" flavor2="4"> |
| 148 | - <ccTool flags="2"> | 149 | + <ccTool flags="3"> |
| 149 | </ccTool> | 150 | </ccTool> |
| 150 | </item> | 151 | </item> |
| 151 | <item path="build/morfeusz/morfeuszJAVA_wrap.cxx" | 152 | <item path="build/morfeusz/morfeuszJAVA_wrap.cxx" |
| @@ -382,14 +383,15 @@ | @@ -382,14 +383,15 @@ | ||
| 382 | </ccTool> | 383 | </ccTool> |
| 383 | </item> | 384 | </item> |
| 384 | <item path="morfeusz/Morfeusz.cpp" ex="false" tool="1" flavor2="4"> | 385 | <item path="morfeusz/Morfeusz.cpp" ex="false" tool="1" flavor2="4"> |
| 385 | - <ccTool flags="1"> | 386 | + <ccTool flags="3"> |
| 386 | <preprocessorList> | 387 | <preprocessorList> |
| 388 | + <Elem>NDEBUG</Elem> | ||
| 387 | <Elem>libmorfeusz_EXPORTS</Elem> | 389 | <Elem>libmorfeusz_EXPORTS</Elem> |
| 388 | </preprocessorList> | 390 | </preprocessorList> |
| 389 | </ccTool> | 391 | </ccTool> |
| 390 | </item> | 392 | </item> |
| 391 | <item path="morfeusz/MorphDeserializer.cpp" ex="false" tool="1" flavor2="4"> | 393 | <item path="morfeusz/MorphDeserializer.cpp" ex="false" tool="1" flavor2="4"> |
| 392 | - <ccTool flags="2"> | 394 | + <ccTool flags="3"> |
| 393 | <preprocessorList> | 395 | <preprocessorList> |
| 394 | <Elem>NDEBUG</Elem> | 396 | <Elem>NDEBUG</Elem> |
| 395 | <Elem>libmorfeusz_EXPORTS</Elem> | 397 | <Elem>libmorfeusz_EXPORTS</Elem> |
| @@ -425,7 +427,7 @@ | @@ -425,7 +427,7 @@ | ||
| 425 | ex="false" | 427 | ex="false" |
| 426 | tool="1" | 428 | tool="1" |
| 427 | flavor2="4"> | 429 | flavor2="4"> |
| 428 | - <ccTool flags="2"> | 430 | + <ccTool flags="3"> |
| 429 | <preprocessorList> | 431 | <preprocessorList> |
| 430 | <Elem>NDEBUG</Elem> | 432 | <Elem>NDEBUG</Elem> |
| 431 | </preprocessorList> | 433 | </preprocessorList> |
| @@ -532,6 +534,11 @@ | @@ -532,6 +534,11 @@ | ||
| 532 | </ccTool> | 534 | </ccTool> |
| 533 | </item> | 535 | </item> |
| 534 | <item path="morfeusz/morfeusz_analyzer.cpp" ex="false" tool="1" flavor2="4"> | 536 | <item path="morfeusz/morfeusz_analyzer.cpp" ex="false" tool="1" flavor2="4"> |
| 537 | + <ccTool flags="2"> | ||
| 538 | + <preprocessorList> | ||
| 539 | + <Elem>NDEBUG</Elem> | ||
| 540 | + </preprocessorList> | ||
| 541 | + </ccTool> | ||
| 535 | </item> | 542 | </item> |
| 536 | <item path="morfeusz/morfeusz_generator.cpp" ex="false" tool="1" flavor2="4"> | 543 | <item path="morfeusz/morfeusz_generator.cpp" ex="false" tool="1" flavor2="4"> |
| 537 | </item> | 544 | </item> |