Commit b1ced679e43ea0b0a324278e09ef10fb5c09701f

Authored by Michał Lenart
1 parent c4676798

nowsza wersja automatu (bez bitu mówiącego o tym, czy trzymać stan w tablicy)

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@207 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
fsabuilder/morfeusz_builder
@@ -258,9 +258,9 @@ if __name__ == '__main__': @@ -258,9 +258,9 @@ if __name__ == '__main__':
258 opts = _parseOptions() 258 opts = _parseOptions()
259 try: 259 try:
260 main(opts) 260 main(opts)
261 -# except Exception as ex:  
262 -# print >> sys.stderr, unicode(ex).encode('utf8')  
263 -# sys.exit(1) 261 + except Exception as ex:
  262 + print >> sys.stderr, unicode(ex).encode('utf8')
  263 + sys.exit(1)
264 finally: 264 finally:
265 pass 265 pass
266 266
fsabuilder/morfeuszbuilder/fsa/serializer.py
@@ -42,7 +42,7 @@ class Serializer(object): @@ -42,7 +42,7 @@ class Serializer(object):
42 42
43 # get the Morfeusz file format version that is being encoded 43 # get the Morfeusz file format version that is being encoded
44 def getVersion(self): 44 def getVersion(self):
45 - return 17 45 + return 18
46 46
47 def serialize2CppFile(self, fname, isGenerator, headerFilename="data/default_fsa.hpp"): 47 def serialize2CppFile(self, fname, isGenerator, headerFilename="data/default_fsa.hpp"):
48 res = [] 48 res = []
@@ -236,7 +236,7 @@ class VLengthSerializer1(Serializer): @@ -236,7 +236,7 @@ class VLengthSerializer1(Serializer):
236 self.label2ShortLabel = None 236 self.label2ShortLabel = None
237 237
238 self.ACCEPTING_FLAG = 0b10000000 238 self.ACCEPTING_FLAG = 0b10000000
239 - self.ARRAY_FLAG = 0b01000000 239 +# self.ARRAY_FLAG = 0b01000000
240 240
241 def getImplementationCode(self): 241 def getImplementationCode(self):
242 return 1 242 return 1
@@ -277,13 +277,13 @@ class VLengthSerializer1(Serializer): @@ -277,13 +277,13 @@ class VLengthSerializer1(Serializer):
277 firstByte = 0 277 firstByte = 0
278 if state.isAccepting(): 278 if state.isAccepting():
279 firstByte |= self.ACCEPTING_FLAG 279 firstByte |= self.ACCEPTING_FLAG
280 - if self.stateShouldBeAnArray(state):  
281 - firstByte |= self.ARRAY_FLAG  
282 - if state.transitionsNum < 63: 280 +# if self.stateShouldBeAnArray(state):
  281 +# firstByte |= self.ARRAY_FLAG
  282 + if state.transitionsNum < 127:
283 firstByte |= state.transitionsNum 283 firstByte |= state.transitionsNum
284 res.append(firstByte) 284 res.append(firstByte)
285 else: 285 else:
286 - firstByte |= 63 286 + firstByte |= 127
287 res.append(firstByte) 287 res.append(firstByte)
288 res.append(state.transitionsNum) 288 res.append(state.transitionsNum)
289 289
morfeusz/Morfeusz.cpp
@@ -223,7 +223,6 @@ void Morfeusz::doProcessOneWord( @@ -223,7 +223,6 @@ void Morfeusz::doProcessOneWord(
223 } 223 }
224 else { 224 else {
225 assert(!newSegrulesState.sink); 225 assert(!newSegrulesState.sink);
226 - // cerr << "will process " << currInput << endl;  
227 TextReader newReader(reader.getCurrPtr(), reader.getEndPtr(), env); 226 TextReader newReader(reader.getCurrPtr(), reader.getEndPtr(), env);
228 doProcessOneWord(env, newReader, newSegrulesState); 227 doProcessOneWord(env, newReader, newSegrulesState);
229 } 228 }
morfeusz/charset/TextReader.cpp
1 /* 1 /*
2 - * File: StatefulCharsetConverter.cpp 2 + * File: TextReader.cpp
3 * Author: lennyn 3 * Author: lennyn
4 * 4 *
5 * Created on May 28, 2014, 11:43 AM 5 * Created on May 28, 2014, 11:43 AM
morfeusz/charset/TextReader.hpp
1 /* 1 /*
2 - * File: StatefulCharsetConverter.hpp 2 + * File: TextReader.hpp
3 * Author: lennyn 3 * Author: lennyn
4 * 4 *
5 * Created on May 28, 2014, 11:43 AM 5 * Created on May 28, 2014, 11:43 AM
morfeusz/fsa/cfsa1_impl.hpp
@@ -15,8 +15,8 @@ @@ -15,8 +15,8 @@
15 #include "../deserializationUtils.hpp" 15 #include "../deserializationUtils.hpp"
16 16
17 static const unsigned char CFSA1_ACCEPTING_FLAG = 128; 17 static const unsigned char CFSA1_ACCEPTING_FLAG = 128;
18 -static const unsigned char CFSA1_ARRAY_FLAG = 64;  
19 -static const unsigned char CFSA1_TRANSITIONS_NUM_MASK = 63; 18 +//static const unsigned char CFSA1_ARRAY_FLAG = 64;
  19 +static const unsigned char CFSA1_TRANSITIONS_NUM_MASK = 127;
20 20
21 static const unsigned char CFSA1_OFFSET_SIZE_MASK = 3; 21 static const unsigned char CFSA1_OFFSET_SIZE_MASK = 3;
22 22
@@ -136,14 +136,17 @@ void CompressedFSA1&lt;T&gt;::doProceedToNext(const char c, State&lt;T&gt;&amp; state, bool init @@ -136,14 +136,17 @@ void CompressedFSA1&lt;T&gt;::doProceedToNext(const char c, State&lt;T&gt;&amp; state, bool init
136 if (label == c) { 136 if (label == c) {
137 found = true; 137 found = true;
138 break; 138 break;
139 - } else { 139 + }
  140 + else {
140 currPtr += td.offsetSize; 141 currPtr += td.offsetSize;
141 } 142 }
142 - } else { 143 + }
  144 + else {
143 found = true; 145 found = true;
144 break; 146 break;
145 } 147 }
146 - } else { 148 + }
  149 + else {
147 if (td.shortLabel == 0) { 150 if (td.shortLabel == 0) {
148 currPtr++; 151 currPtr++;
149 } 152 }
@@ -152,7 +155,8 @@ void CompressedFSA1&lt;T&gt;::doProceedToNext(const char c, State&lt;T&gt;&amp; state, bool init @@ -152,7 +155,8 @@ void CompressedFSA1&lt;T&gt;::doProceedToNext(const char c, State&lt;T&gt;&amp; state, bool init
152 } 155 }
153 if (!found) { 156 if (!found) {
154 state.setNextAsSink(); 157 state.setNextAsSink();
155 - } else { 158 + }
  159 + else {
156 uint32_t offset; 160 uint32_t offset;
157 switch (td.offsetSize) { 161 switch (td.offsetSize) {
158 case 0: 162 case 0:
morfeusz/fsa/const.cpp
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 #include "const.hpp" 2 #include "const.hpp"
3 3
4 extern const uint32_t MAGIC_NUMBER = 0x8fc2bc1b; 4 extern const uint32_t MAGIC_NUMBER = 0x8fc2bc1b;
5 -extern const uint8_t VERSION_NUM = 17; 5 +extern const uint8_t VERSION_NUM = 18;
6 6
7 extern const unsigned int VERSION_NUM_OFFSET = 4; 7 extern const unsigned int VERSION_NUM_OFFSET = 4;
8 extern const unsigned int IMPLEMENTATION_NUM_OFFSET = 5; 8 extern const unsigned int IMPLEMENTATION_NUM_OFFSET = 5;
nbproject/configurations.xml
@@ -103,7 +103,8 @@ @@ -103,7 +103,8 @@
103 <flagsDictionary> 103 <flagsDictionary>
104 <element flagsID="0" commonFlags="-O2 -std=c++98"/> 104 <element flagsID="0" commonFlags="-O2 -std=c++98"/>
105 <element flagsID="1" commonFlags="-O2 -std=c++98 -fPIC"/> 105 <element flagsID="1" commonFlags="-O2 -std=c++98 -fPIC"/>
106 - <element flagsID="2" commonFlags="-std=c++98 -O3 -fPIC"/> 106 + <element flagsID="2" commonFlags="-std=c++98 -O3"/>
  107 + <element flagsID="3" commonFlags="-std=c++98 -O3 -fPIC"/>
107 </flagsDictionary> 108 </flagsDictionary>
108 <codeAssistance> 109 <codeAssistance>
109 </codeAssistance> 110 </codeAssistance>
@@ -145,7 +146,7 @@ @@ -145,7 +146,7 @@
145 </ccTool> 146 </ccTool>
146 </item> 147 </item>
147 <item path="build/morfeusz/java/swigJAVA.cpp" ex="false" tool="1" flavor2="4"> 148 <item path="build/morfeusz/java/swigJAVA.cpp" ex="false" tool="1" flavor2="4">
148 - <ccTool flags="2"> 149 + <ccTool flags="3">
149 </ccTool> 150 </ccTool>
150 </item> 151 </item>
151 <item path="build/morfeusz/morfeuszJAVA_wrap.cxx" 152 <item path="build/morfeusz/morfeuszJAVA_wrap.cxx"
@@ -382,14 +383,15 @@ @@ -382,14 +383,15 @@
382 </ccTool> 383 </ccTool>
383 </item> 384 </item>
384 <item path="morfeusz/Morfeusz.cpp" ex="false" tool="1" flavor2="4"> 385 <item path="morfeusz/Morfeusz.cpp" ex="false" tool="1" flavor2="4">
385 - <ccTool flags="1"> 386 + <ccTool flags="3">
386 <preprocessorList> 387 <preprocessorList>
  388 + <Elem>NDEBUG</Elem>
387 <Elem>libmorfeusz_EXPORTS</Elem> 389 <Elem>libmorfeusz_EXPORTS</Elem>
388 </preprocessorList> 390 </preprocessorList>
389 </ccTool> 391 </ccTool>
390 </item> 392 </item>
391 <item path="morfeusz/MorphDeserializer.cpp" ex="false" tool="1" flavor2="4"> 393 <item path="morfeusz/MorphDeserializer.cpp" ex="false" tool="1" flavor2="4">
392 - <ccTool flags="2"> 394 + <ccTool flags="3">
393 <preprocessorList> 395 <preprocessorList>
394 <Elem>NDEBUG</Elem> 396 <Elem>NDEBUG</Elem>
395 <Elem>libmorfeusz_EXPORTS</Elem> 397 <Elem>libmorfeusz_EXPORTS</Elem>
@@ -425,7 +427,7 @@ @@ -425,7 +427,7 @@
425 ex="false" 427 ex="false"
426 tool="1" 428 tool="1"
427 flavor2="4"> 429 flavor2="4">
428 - <ccTool flags="2"> 430 + <ccTool flags="3">
429 <preprocessorList> 431 <preprocessorList>
430 <Elem>NDEBUG</Elem> 432 <Elem>NDEBUG</Elem>
431 </preprocessorList> 433 </preprocessorList>
@@ -532,6 +534,11 @@ @@ -532,6 +534,11 @@
532 </ccTool> 534 </ccTool>
533 </item> 535 </item>
534 <item path="morfeusz/morfeusz_analyzer.cpp" ex="false" tool="1" flavor2="4"> 536 <item path="morfeusz/morfeusz_analyzer.cpp" ex="false" tool="1" flavor2="4">
  537 + <ccTool flags="2">
  538 + <preprocessorList>
  539 + <Elem>NDEBUG</Elem>
  540 + </preprocessorList>
  541 + </ccTool>
535 </item> 542 </item>
536 <item path="morfeusz/morfeusz_generator.cpp" ex="false" tool="1" flavor2="4"> 543 <item path="morfeusz/morfeusz_generator.cpp" ex="false" tool="1" flavor2="4">
537 </item> 544 </item>