Commit b1ced679e43ea0b0a324278e09ef10fb5c09701f
1 parent
c4676798
nowsza wersja automatu (bez bitu mówiącego o tym, czy trzymać stan w tablicy)
git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@207 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
8 changed files
with
34 additions
and
24 deletions
fsabuilder/morfeusz_builder
... | ... | @@ -258,9 +258,9 @@ if __name__ == '__main__': |
258 | 258 | opts = _parseOptions() |
259 | 259 | try: |
260 | 260 | main(opts) |
261 | -# except Exception as ex: | |
262 | -# print >> sys.stderr, unicode(ex).encode('utf8') | |
263 | -# sys.exit(1) | |
261 | + except Exception as ex: | |
262 | + print >> sys.stderr, unicode(ex).encode('utf8') | |
263 | + sys.exit(1) | |
264 | 264 | finally: |
265 | 265 | pass |
266 | 266 | |
... | ... |
fsabuilder/morfeuszbuilder/fsa/serializer.py
... | ... | @@ -42,7 +42,7 @@ class Serializer(object): |
42 | 42 | |
43 | 43 | # get the Morfeusz file format version that is being encoded |
44 | 44 | def getVersion(self): |
45 | - return 17 | |
45 | + return 18 | |
46 | 46 | |
47 | 47 | def serialize2CppFile(self, fname, isGenerator, headerFilename="data/default_fsa.hpp"): |
48 | 48 | res = [] |
... | ... | @@ -236,7 +236,7 @@ class VLengthSerializer1(Serializer): |
236 | 236 | self.label2ShortLabel = None |
237 | 237 | |
238 | 238 | self.ACCEPTING_FLAG = 0b10000000 |
239 | - self.ARRAY_FLAG = 0b01000000 | |
239 | +# self.ARRAY_FLAG = 0b01000000 | |
240 | 240 | |
241 | 241 | def getImplementationCode(self): |
242 | 242 | return 1 |
... | ... | @@ -277,13 +277,13 @@ class VLengthSerializer1(Serializer): |
277 | 277 | firstByte = 0 |
278 | 278 | if state.isAccepting(): |
279 | 279 | firstByte |= self.ACCEPTING_FLAG |
280 | - if self.stateShouldBeAnArray(state): | |
281 | - firstByte |= self.ARRAY_FLAG | |
282 | - if state.transitionsNum < 63: | |
280 | +# if self.stateShouldBeAnArray(state): | |
281 | +# firstByte |= self.ARRAY_FLAG | |
282 | + if state.transitionsNum < 127: | |
283 | 283 | firstByte |= state.transitionsNum |
284 | 284 | res.append(firstByte) |
285 | 285 | else: |
286 | - firstByte |= 63 | |
286 | + firstByte |= 127 | |
287 | 287 | res.append(firstByte) |
288 | 288 | res.append(state.transitionsNum) |
289 | 289 | |
... | ... |
morfeusz/Morfeusz.cpp
... | ... | @@ -223,7 +223,6 @@ void Morfeusz::doProcessOneWord( |
223 | 223 | } |
224 | 224 | else { |
225 | 225 | assert(!newSegrulesState.sink); |
226 | - // cerr << "will process " << currInput << endl; | |
227 | 226 | TextReader newReader(reader.getCurrPtr(), reader.getEndPtr(), env); |
228 | 227 | doProcessOneWord(env, newReader, newSegrulesState); |
229 | 228 | } |
... | ... |
morfeusz/charset/TextReader.cpp
morfeusz/charset/TextReader.hpp
morfeusz/fsa/cfsa1_impl.hpp
... | ... | @@ -15,8 +15,8 @@ |
15 | 15 | #include "../deserializationUtils.hpp" |
16 | 16 | |
17 | 17 | static const unsigned char CFSA1_ACCEPTING_FLAG = 128; |
18 | -static const unsigned char CFSA1_ARRAY_FLAG = 64; | |
19 | -static const unsigned char CFSA1_TRANSITIONS_NUM_MASK = 63; | |
18 | +//static const unsigned char CFSA1_ARRAY_FLAG = 64; | |
19 | +static const unsigned char CFSA1_TRANSITIONS_NUM_MASK = 127; | |
20 | 20 | |
21 | 21 | static const unsigned char CFSA1_OFFSET_SIZE_MASK = 3; |
22 | 22 | |
... | ... | @@ -136,14 +136,17 @@ void CompressedFSA1<T>::doProceedToNext(const char c, State<T>& state, bool init |
136 | 136 | if (label == c) { |
137 | 137 | found = true; |
138 | 138 | break; |
139 | - } else { | |
139 | + } | |
140 | + else { | |
140 | 141 | currPtr += td.offsetSize; |
141 | 142 | } |
142 | - } else { | |
143 | + } | |
144 | + else { | |
143 | 145 | found = true; |
144 | 146 | break; |
145 | 147 | } |
146 | - } else { | |
148 | + } | |
149 | + else { | |
147 | 150 | if (td.shortLabel == 0) { |
148 | 151 | currPtr++; |
149 | 152 | } |
... | ... | @@ -152,7 +155,8 @@ void CompressedFSA1<T>::doProceedToNext(const char c, State<T>& state, bool init |
152 | 155 | } |
153 | 156 | if (!found) { |
154 | 157 | state.setNextAsSink(); |
155 | - } else { | |
158 | + } | |
159 | + else { | |
156 | 160 | uint32_t offset; |
157 | 161 | switch (td.offsetSize) { |
158 | 162 | case 0: |
... | ... |
morfeusz/fsa/const.cpp
... | ... | @@ -2,7 +2,7 @@ |
2 | 2 | #include "const.hpp" |
3 | 3 | |
4 | 4 | extern const uint32_t MAGIC_NUMBER = 0x8fc2bc1b; |
5 | -extern const uint8_t VERSION_NUM = 17; | |
5 | +extern const uint8_t VERSION_NUM = 18; | |
6 | 6 | |
7 | 7 | extern const unsigned int VERSION_NUM_OFFSET = 4; |
8 | 8 | extern const unsigned int IMPLEMENTATION_NUM_OFFSET = 5; |
... | ... |
nbproject/configurations.xml
... | ... | @@ -103,7 +103,8 @@ |
103 | 103 | <flagsDictionary> |
104 | 104 | <element flagsID="0" commonFlags="-O2 -std=c++98"/> |
105 | 105 | <element flagsID="1" commonFlags="-O2 -std=c++98 -fPIC"/> |
106 | - <element flagsID="2" commonFlags="-std=c++98 -O3 -fPIC"/> | |
106 | + <element flagsID="2" commonFlags="-std=c++98 -O3"/> | |
107 | + <element flagsID="3" commonFlags="-std=c++98 -O3 -fPIC"/> | |
107 | 108 | </flagsDictionary> |
108 | 109 | <codeAssistance> |
109 | 110 | </codeAssistance> |
... | ... | @@ -145,7 +146,7 @@ |
145 | 146 | </ccTool> |
146 | 147 | </item> |
147 | 148 | <item path="build/morfeusz/java/swigJAVA.cpp" ex="false" tool="1" flavor2="4"> |
148 | - <ccTool flags="2"> | |
149 | + <ccTool flags="3"> | |
149 | 150 | </ccTool> |
150 | 151 | </item> |
151 | 152 | <item path="build/morfeusz/morfeuszJAVA_wrap.cxx" |
... | ... | @@ -382,14 +383,15 @@ |
382 | 383 | </ccTool> |
383 | 384 | </item> |
384 | 385 | <item path="morfeusz/Morfeusz.cpp" ex="false" tool="1" flavor2="4"> |
385 | - <ccTool flags="1"> | |
386 | + <ccTool flags="3"> | |
386 | 387 | <preprocessorList> |
388 | + <Elem>NDEBUG</Elem> | |
387 | 389 | <Elem>libmorfeusz_EXPORTS</Elem> |
388 | 390 | </preprocessorList> |
389 | 391 | </ccTool> |
390 | 392 | </item> |
391 | 393 | <item path="morfeusz/MorphDeserializer.cpp" ex="false" tool="1" flavor2="4"> |
392 | - <ccTool flags="2"> | |
394 | + <ccTool flags="3"> | |
393 | 395 | <preprocessorList> |
394 | 396 | <Elem>NDEBUG</Elem> |
395 | 397 | <Elem>libmorfeusz_EXPORTS</Elem> |
... | ... | @@ -425,7 +427,7 @@ |
425 | 427 | ex="false" |
426 | 428 | tool="1" |
427 | 429 | flavor2="4"> |
428 | - <ccTool flags="2"> | |
430 | + <ccTool flags="3"> | |
429 | 431 | <preprocessorList> |
430 | 432 | <Elem>NDEBUG</Elem> |
431 | 433 | </preprocessorList> |
... | ... | @@ -532,6 +534,11 @@ |
532 | 534 | </ccTool> |
533 | 535 | </item> |
534 | 536 | <item path="morfeusz/morfeusz_analyzer.cpp" ex="false" tool="1" flavor2="4"> |
537 | + <ccTool flags="2"> | |
538 | + <preprocessorList> | |
539 | + <Elem>NDEBUG</Elem> | |
540 | + </preprocessorList> | |
541 | + </ccTool> | |
535 | 542 | </item> |
536 | 543 | <item path="morfeusz/morfeusz_generator.cpp" ex="false" tool="1" flavor2="4"> |
537 | 544 | </item> |
... | ... |