Commit b1ced679e43ea0b0a324278e09ef10fb5c09701f

Authored by Michał Lenart
1 parent c4676798

nowsza wersja automatu (bez bitu mówiącego o tym, czy trzymać stan w tablicy)

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@207 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
fsabuilder/morfeusz_builder
... ... @@ -258,9 +258,9 @@ if __name__ == '__main__':
258 258 opts = _parseOptions()
259 259 try:
260 260 main(opts)
261   -# except Exception as ex:
262   -# print >> sys.stderr, unicode(ex).encode('utf8')
263   -# sys.exit(1)
  261 + except Exception as ex:
  262 + print >> sys.stderr, unicode(ex).encode('utf8')
  263 + sys.exit(1)
264 264 finally:
265 265 pass
266 266  
... ...
fsabuilder/morfeuszbuilder/fsa/serializer.py
... ... @@ -42,7 +42,7 @@ class Serializer(object):
42 42  
43 43 # get the Morfeusz file format version that is being encoded
44 44 def getVersion(self):
45   - return 17
  45 + return 18
46 46  
47 47 def serialize2CppFile(self, fname, isGenerator, headerFilename="data/default_fsa.hpp"):
48 48 res = []
... ... @@ -236,7 +236,7 @@ class VLengthSerializer1(Serializer):
236 236 self.label2ShortLabel = None
237 237  
238 238 self.ACCEPTING_FLAG = 0b10000000
239   - self.ARRAY_FLAG = 0b01000000
  239 +# self.ARRAY_FLAG = 0b01000000
240 240  
241 241 def getImplementationCode(self):
242 242 return 1
... ... @@ -277,13 +277,13 @@ class VLengthSerializer1(Serializer):
277 277 firstByte = 0
278 278 if state.isAccepting():
279 279 firstByte |= self.ACCEPTING_FLAG
280   - if self.stateShouldBeAnArray(state):
281   - firstByte |= self.ARRAY_FLAG
282   - if state.transitionsNum < 63:
  280 +# if self.stateShouldBeAnArray(state):
  281 +# firstByte |= self.ARRAY_FLAG
  282 + if state.transitionsNum < 127:
283 283 firstByte |= state.transitionsNum
284 284 res.append(firstByte)
285 285 else:
286   - firstByte |= 63
  286 + firstByte |= 127
287 287 res.append(firstByte)
288 288 res.append(state.transitionsNum)
289 289  
... ...
morfeusz/Morfeusz.cpp
... ... @@ -223,7 +223,6 @@ void Morfeusz::doProcessOneWord(
223 223 }
224 224 else {
225 225 assert(!newSegrulesState.sink);
226   - // cerr << "will process " << currInput << endl;
227 226 TextReader newReader(reader.getCurrPtr(), reader.getEndPtr(), env);
228 227 doProcessOneWord(env, newReader, newSegrulesState);
229 228 }
... ...
morfeusz/charset/TextReader.cpp
1 1 /*
2   - * File: StatefulCharsetConverter.cpp
  2 + * File: TextReader.cpp
3 3 * Author: lennyn
4 4 *
5 5 * Created on May 28, 2014, 11:43 AM
... ...
morfeusz/charset/TextReader.hpp
1 1 /*
2   - * File: StatefulCharsetConverter.hpp
  2 + * File: TextReader.hpp
3 3 * Author: lennyn
4 4 *
5 5 * Created on May 28, 2014, 11:43 AM
... ...
morfeusz/fsa/cfsa1_impl.hpp
... ... @@ -15,8 +15,8 @@
15 15 #include "../deserializationUtils.hpp"
16 16  
17 17 static const unsigned char CFSA1_ACCEPTING_FLAG = 128;
18   -static const unsigned char CFSA1_ARRAY_FLAG = 64;
19   -static const unsigned char CFSA1_TRANSITIONS_NUM_MASK = 63;
  18 +//static const unsigned char CFSA1_ARRAY_FLAG = 64;
  19 +static const unsigned char CFSA1_TRANSITIONS_NUM_MASK = 127;
20 20  
21 21 static const unsigned char CFSA1_OFFSET_SIZE_MASK = 3;
22 22  
... ... @@ -136,14 +136,17 @@ void CompressedFSA1&lt;T&gt;::doProceedToNext(const char c, State&lt;T&gt;&amp; state, bool init
136 136 if (label == c) {
137 137 found = true;
138 138 break;
139   - } else {
  139 + }
  140 + else {
140 141 currPtr += td.offsetSize;
141 142 }
142   - } else {
  143 + }
  144 + else {
143 145 found = true;
144 146 break;
145 147 }
146   - } else {
  148 + }
  149 + else {
147 150 if (td.shortLabel == 0) {
148 151 currPtr++;
149 152 }
... ... @@ -152,7 +155,8 @@ void CompressedFSA1&lt;T&gt;::doProceedToNext(const char c, State&lt;T&gt;&amp; state, bool init
152 155 }
153 156 if (!found) {
154 157 state.setNextAsSink();
155   - } else {
  158 + }
  159 + else {
156 160 uint32_t offset;
157 161 switch (td.offsetSize) {
158 162 case 0:
... ...
morfeusz/fsa/const.cpp
... ... @@ -2,7 +2,7 @@
2 2 #include "const.hpp"
3 3  
4 4 extern const uint32_t MAGIC_NUMBER = 0x8fc2bc1b;
5   -extern const uint8_t VERSION_NUM = 17;
  5 +extern const uint8_t VERSION_NUM = 18;
6 6  
7 7 extern const unsigned int VERSION_NUM_OFFSET = 4;
8 8 extern const unsigned int IMPLEMENTATION_NUM_OFFSET = 5;
... ...
nbproject/configurations.xml
... ... @@ -103,7 +103,8 @@
103 103 <flagsDictionary>
104 104 <element flagsID="0" commonFlags="-O2 -std=c++98"/>
105 105 <element flagsID="1" commonFlags="-O2 -std=c++98 -fPIC"/>
106   - <element flagsID="2" commonFlags="-std=c++98 -O3 -fPIC"/>
  106 + <element flagsID="2" commonFlags="-std=c++98 -O3"/>
  107 + <element flagsID="3" commonFlags="-std=c++98 -O3 -fPIC"/>
107 108 </flagsDictionary>
108 109 <codeAssistance>
109 110 </codeAssistance>
... ... @@ -145,7 +146,7 @@
145 146 </ccTool>
146 147 </item>
147 148 <item path="build/morfeusz/java/swigJAVA.cpp" ex="false" tool="1" flavor2="4">
148   - <ccTool flags="2">
  149 + <ccTool flags="3">
149 150 </ccTool>
150 151 </item>
151 152 <item path="build/morfeusz/morfeuszJAVA_wrap.cxx"
... ... @@ -382,14 +383,15 @@
382 383 </ccTool>
383 384 </item>
384 385 <item path="morfeusz/Morfeusz.cpp" ex="false" tool="1" flavor2="4">
385   - <ccTool flags="1">
  386 + <ccTool flags="3">
386 387 <preprocessorList>
  388 + <Elem>NDEBUG</Elem>
387 389 <Elem>libmorfeusz_EXPORTS</Elem>
388 390 </preprocessorList>
389 391 </ccTool>
390 392 </item>
391 393 <item path="morfeusz/MorphDeserializer.cpp" ex="false" tool="1" flavor2="4">
392   - <ccTool flags="2">
  394 + <ccTool flags="3">
393 395 <preprocessorList>
394 396 <Elem>NDEBUG</Elem>
395 397 <Elem>libmorfeusz_EXPORTS</Elem>
... ... @@ -425,7 +427,7 @@
425 427 ex="false"
426 428 tool="1"
427 429 flavor2="4">
428   - <ccTool flags="2">
  430 + <ccTool flags="3">
429 431 <preprocessorList>
430 432 <Elem>NDEBUG</Elem>
431 433 </preprocessorList>
... ... @@ -532,6 +534,11 @@
532 534 </ccTool>
533 535 </item>
534 536 <item path="morfeusz/morfeusz_analyzer.cpp" ex="false" tool="1" flavor2="4">
  537 + <ccTool flags="2">
  538 + <preprocessorList>
  539 + <Elem>NDEBUG</Elem>
  540 + </preprocessorList>
  541 + </ccTool>
535 542 </item>
536 543 <item path="morfeusz/morfeusz_generator.cpp" ex="false" tool="1" flavor2="4">
537 544 </item>
... ...