Commit 4a94bb689bf0122f2270624056cb168e739d1063
1 parent
7351ce74
prawie dorobiona kompletna obsługa ignów
git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@147 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
8 changed files
with
131 additions
and
24 deletions
fsabuilder/morfeuszbuilder/fsa/serializer.py
@@ -22,7 +22,7 @@ class Serializer(object): | @@ -22,7 +22,7 @@ class Serializer(object): | ||
22 | 22 | ||
23 | # get the Morfeusz file format version that is being encoded | 23 | # get the Morfeusz file format version that is being encoded |
24 | def getVersion(self): | 24 | def getVersion(self): |
25 | - return 12 | 25 | + return 13 |
26 | 26 | ||
27 | def serialize2CppFile(self, fname, generator, segmentationRulesData): | 27 | def serialize2CppFile(self, fname, generator, segmentationRulesData): |
28 | res = [] | 28 | res = [] |
fsabuilder/morfeuszbuilder/segrules/rulesManager.py
@@ -5,12 +5,14 @@ Created on 20 lut 2014 | @@ -5,12 +5,14 @@ Created on 20 lut 2014 | ||
5 | ''' | 5 | ''' |
6 | import logging | 6 | import logging |
7 | from morfeuszbuilder.utils.serializationUtils import htons, htonl | 7 | from morfeuszbuilder.utils.serializationUtils import htons, htonl |
8 | +from morfeuszbuilder.utils import serializationUtils | ||
8 | 9 | ||
9 | class RulesManager(object): | 10 | class RulesManager(object): |
10 | 11 | ||
11 | - def __init__(self, segtypes): | 12 | + def __init__(self, segtypes, separatorsList): |
12 | self.options2DFA = {} | 13 | self.options2DFA = {} |
13 | self.segtypes = segtypes | 14 | self.segtypes = segtypes |
15 | + self.separatorsList = separatorsList | ||
14 | self.defaultOptions = None | 16 | self.defaultOptions = None |
15 | 17 | ||
16 | def _options2Key(self, optionsMap): | 18 | def _options2Key(self, optionsMap): |
@@ -37,6 +39,7 @@ class RulesManager(object): | @@ -37,6 +39,7 @@ class RulesManager(object): | ||
37 | 39 | ||
38 | def serialize(self): | 40 | def serialize(self): |
39 | res = bytearray() | 41 | res = bytearray() |
42 | + res.extend(self._serializeSeparatorsList()) | ||
40 | dfasNum = len(self.options2DFA) | 43 | dfasNum = len(self.options2DFA) |
41 | assert dfasNum > 0 and dfasNum < 256 | 44 | assert dfasNum > 0 and dfasNum < 256 |
42 | res.append(dfasNum) | 45 | res.append(dfasNum) |
@@ -48,6 +51,13 @@ class RulesManager(object): | @@ -48,6 +51,13 @@ class RulesManager(object): | ||
48 | logging.info('segmentation rules size: %s bytes', len(res)) | 51 | logging.info('segmentation rules size: %s bytes', len(res)) |
49 | return res | 52 | return res |
50 | 53 | ||
54 | + def _serializeSeparatorsList(self): | ||
55 | + res = bytearray() | ||
56 | + res.extend(serializationUtils.htons(len(self.separatorsList))) | ||
57 | + for cp in sorted(self.separatorsList): | ||
58 | + res.extend(serializationUtils.htonl(cp)) | ||
59 | + return res | ||
60 | + | ||
51 | def _serializeOptionsMap(self, optionsMap): | 61 | def _serializeOptionsMap(self, optionsMap): |
52 | assert len(optionsMap) < 256 | 62 | assert len(optionsMap) < 256 |
53 | res = bytearray() | 63 | res = bytearray() |
fsabuilder/morfeuszbuilder/segrules/rulesParser.py
@@ -3,7 +3,7 @@ from pyparsing import * | @@ -3,7 +3,7 @@ from pyparsing import * | ||
3 | ParserElement.enablePackrat() | 3 | ParserElement.enablePackrat() |
4 | from morfeuszbuilder.tagset import segtypes | 4 | from morfeuszbuilder.tagset import segtypes |
5 | from morfeuszbuilder.utils import configFile, exceptions | 5 | from morfeuszbuilder.utils import configFile, exceptions |
6 | -from morfeuszbuilder.segrules import preprocessor, rules, rulesManager, pyparseString | 6 | +from morfeuszbuilder.segrules import preprocessor, rules, rulesManager, pyparseString, separatorChars |
7 | import codecs | 7 | import codecs |
8 | import re | 8 | import re |
9 | 9 | ||
@@ -34,11 +34,22 @@ class RulesParser(object): | @@ -34,11 +34,22 @@ class RulesParser(object): | ||
34 | 34 | ||
35 | def parse(self, filename): | 35 | def parse(self, filename): |
36 | 36 | ||
37 | - segtypesConfigFile = configFile.ConfigFile(filename, ['options', 'combinations', 'generator combinations', 'tags', 'lexemes', 'segment types']) | 37 | + segtypesConfigFile = configFile.ConfigFile(filename, |
38 | + [ | ||
39 | + 'options', | ||
40 | + 'combinations', | ||
41 | + 'generator combinations', | ||
42 | + 'tags', | ||
43 | + 'lexemes', | ||
44 | + 'segment types', | ||
45 | + 'separator chars']) | ||
38 | key2Defs = self._getKey2Defs(segtypesConfigFile) | 46 | key2Defs = self._getKey2Defs(segtypesConfigFile) |
39 | segtypesHelper = segtypes.Segtypes(self.tagset, segtypesConfigFile) | 47 | segtypesHelper = segtypes.Segtypes(self.tagset, segtypesConfigFile) |
48 | + separatorsList = separatorChars.parseSeparatorChars(segtypesConfigFile) \ | ||
49 | + if self.rulesType == RulesParser.PARSE4ANALYZER \ | ||
50 | + else [] | ||
40 | 51 | ||
41 | - res = rulesManager.RulesManager(segtypesHelper) | 52 | + res = rulesManager.RulesManager(segtypesHelper, separatorsList) |
42 | 53 | ||
43 | def2Key = {} | 54 | def2Key = {} |
44 | for key, defs in key2Defs.iteritems(): | 55 | for key, defs in key2Defs.iteritems(): |
input/segmenty.dat
@@ -243,6 +243,16 @@ moze_interp( dig>+ dywiz> latek ) | @@ -243,6 +243,16 @@ moze_interp( dig>+ dywiz> latek ) | ||
243 | # interpretacja znaków interpunkcyjnych | 243 | # interpretacja znaków interpunkcyjnych |
244 | # moze_interp(samodz interp) | 244 | # moze_interp(samodz interp) |
245 | 245 | ||
246 | +[separator chars] | ||
247 | +# , | ||
248 | +44 | ||
249 | + | ||
250 | +# . | ||
251 | +46 | ||
252 | + | ||
253 | +# ; | ||
254 | +59 | ||
255 | + | ||
246 | [generator combinations] | 256 | [generator combinations] |
247 | 257 | ||
248 | [segment types] | 258 | [segment types] |
morfeusz/Morfeusz.cpp
@@ -62,7 +62,8 @@ void Morfeusz::processOneWord( | @@ -62,7 +62,8 @@ void Morfeusz::processOneWord( | ||
62 | const char*& inputStart, | 62 | const char*& inputStart, |
63 | const char* inputEnd, | 63 | const char* inputEnd, |
64 | int startNodeNum, | 64 | int startNodeNum, |
65 | - std::vector<MorphInterpretation>& results) const { | 65 | + std::vector<MorphInterpretation>& results, |
66 | + bool insideIgnHandler) const { | ||
66 | while (inputStart != inputEnd | 67 | while (inputStart != inputEnd |
67 | && isEndOfWord(env.getCharsetConverter().peek(inputStart, inputEnd))) { | 68 | && isEndOfWord(env.getCharsetConverter().peek(inputStart, inputEnd))) { |
68 | env.getCharsetConverter().next(inputStart, inputEnd); | 69 | env.getCharsetConverter().next(inputStart, inputEnd); |
@@ -71,9 +72,9 @@ void Morfeusz::processOneWord( | @@ -71,9 +72,9 @@ void Morfeusz::processOneWord( | ||
71 | InflexionGraph graph; | 72 | InflexionGraph graph; |
72 | const char* currInput = inputStart; | 73 | const char* currInput = inputStart; |
73 | const SegrulesFSA& segrulesFSA = env.getCurrentSegrulesFSA(); | 74 | const SegrulesFSA& segrulesFSA = env.getCurrentSegrulesFSA(); |
74 | - | 75 | + |
75 | doProcessOneWord(env, currInput, inputEnd, segrulesFSA.initialState, accum, graph); | 76 | doProcessOneWord(env, currInput, inputEnd, segrulesFSA.initialState, accum, graph); |
76 | - | 77 | + |
77 | if (!graph.empty()) { | 78 | if (!graph.empty()) { |
78 | const InterpretedChunksDecoder& interpretedChunksDecoder = env.getInterpretedChunksDecoder(); | 79 | const InterpretedChunksDecoder& interpretedChunksDecoder = env.getInterpretedChunksDecoder(); |
79 | int srcNode = startNodeNum; | 80 | int srcNode = startNodeNum; |
@@ -87,6 +88,12 @@ void Morfeusz::processOneWord( | @@ -87,6 +88,12 @@ void Morfeusz::processOneWord( | ||
87 | srcNode++; | 88 | srcNode++; |
88 | } | 89 | } |
89 | } | 90 | } |
91 | + else if (inputStart != inputEnd | ||
92 | + && env.getProcessorType() == ANALYZER | ||
93 | + && !insideIgnHandler) { | ||
94 | + this->handleIgnChunk(env, inputStart, currInput, startNodeNum, results); | ||
95 | + // this->appendIgnotiumToResults(env, string(inputStart, currInput), startNodeNum, results); | ||
96 | + } | ||
90 | else if (inputStart != inputEnd) { | 97 | else if (inputStart != inputEnd) { |
91 | this->appendIgnotiumToResults(env, string(inputStart, currInput), startNodeNum, results); | 98 | this->appendIgnotiumToResults(env, string(inputStart, currInput), startNodeNum, results); |
92 | } | 99 | } |
@@ -113,7 +120,7 @@ static inline string debugAccum(vector<InterpretedChunk>& accum) { | @@ -113,7 +120,7 @@ static inline string debugAccum(vector<InterpretedChunk>& accum) { | ||
113 | stringstream res; | 120 | stringstream res; |
114 | for (unsigned int i = 0; i < accum.size(); i++) { | 121 | for (unsigned int i = 0; i < accum.size(); i++) { |
115 | res << debugInterpsGroup(accum[i].interpsGroup.type, accum[i].chunkStartPtr, accum[i].chunkEndPtr); | 122 | res << debugInterpsGroup(accum[i].interpsGroup.type, accum[i].chunkStartPtr, accum[i].chunkEndPtr); |
116 | -// res << "(" << (int) accum[i].interpsGroup.type << ", " << string(accum[i].chunkStartPtr, accum[i].chunkStartPtr) << "), "; | 123 | + // res << "(" << (int) accum[i].interpsGroup.type << ", " << string(accum[i].chunkStartPtr, accum[i].chunkStartPtr) << "), "; |
117 | } | 124 | } |
118 | return res.str(); | 125 | return res.str(); |
119 | } | 126 | } |
@@ -125,11 +132,11 @@ void Morfeusz::doProcessOneWord( | @@ -125,11 +132,11 @@ void Morfeusz::doProcessOneWord( | ||
125 | SegrulesState segrulesState, | 132 | SegrulesState segrulesState, |
126 | vector<InterpretedChunk>& accum, | 133 | vector<InterpretedChunk>& accum, |
127 | InflexionGraph& graph) const { | 134 | InflexionGraph& graph) const { |
128 | -// if (this->options.debug) { | ||
129 | -// cerr << "----------" << endl; | ||
130 | -// cerr << "PROCESS: '" << inputData << "', already recognized: " << debugAccum(accum) << endl; | ||
131 | -// } | ||
132 | -// cerr << "doAnalyzeOneWord " << inputData << endl; | 135 | + // if (this->options.debug) { |
136 | + // cerr << "----------" << endl; | ||
137 | + // cerr << "PROCESS: '" << inputData << "', already recognized: " << debugAccum(accum) << endl; | ||
138 | + // } | ||
139 | + // cerr << "doAnalyzeOneWord " << inputData << endl; | ||
133 | const char* inputStart = inputData; | 140 | const char* inputStart = inputData; |
134 | const char* currInput = inputData; | 141 | const char* currInput = inputData; |
135 | uint32_t codepoint = inputData == inputEnd ? 0 : env.getCharsetConverter().next(currInput, inputEnd); | 142 | uint32_t codepoint = inputData == inputEnd ? 0 : env.getCharsetConverter().next(currInput, inputEnd); |
@@ -140,8 +147,8 @@ void Morfeusz::doProcessOneWord( | @@ -140,8 +147,8 @@ void Morfeusz::doProcessOneWord( | ||
140 | 147 | ||
141 | while (!isEndOfWord(codepoint)) { | 148 | while (!isEndOfWord(codepoint)) { |
142 | uint32_t normalizedCodepoint = env.getProcessorType() == ANALYZER | 149 | uint32_t normalizedCodepoint = env.getProcessorType() == ANALYZER |
143 | - ? env.getCaseConverter().toLower(codepoint) | ||
144 | - : codepoint; | 150 | + ? env.getCaseConverter().toLower(codepoint) |
151 | + : codepoint; | ||
145 | originalCodepoints.push_back(codepoint); | 152 | originalCodepoints.push_back(codepoint); |
146 | normalizedCodepoints.push_back(normalizedCodepoint); | 153 | normalizedCodepoints.push_back(normalizedCodepoint); |
147 | feedState(state, normalizedCodepoint, UTF8CharsetConverter()); | 154 | feedState(state, normalizedCodepoint, UTF8CharsetConverter()); |
@@ -152,7 +159,7 @@ void Morfeusz::doProcessOneWord( | @@ -152,7 +159,7 @@ void Morfeusz::doProcessOneWord( | ||
152 | throw MorfeuszException("Lemma of length > 1 cannot start with a colon"); | 159 | throw MorfeuszException("Lemma of length > 1 cannot start with a colon"); |
153 | } | 160 | } |
154 | homonymId = string(currInput + 1, inputEnd); | 161 | homonymId = string(currInput + 1, inputEnd); |
155 | -// cerr << "homonym " << homonymId << endl; | 162 | + // cerr << "homonym " << homonymId << endl; |
156 | currInput = inputEnd; | 163 | currInput = inputEnd; |
157 | codepoint = 0x00; | 164 | codepoint = 0x00; |
158 | } | 165 | } |
@@ -163,13 +170,13 @@ void Morfeusz::doProcessOneWord( | @@ -163,13 +170,13 @@ void Morfeusz::doProcessOneWord( | ||
163 | if (this->options.debug) { | 170 | if (this->options.debug) { |
164 | cerr << "recognized: " << debugInterpsGroup(ig.type, inputStart, currInput) << " at: '" << inputStart << "'" << endl; | 171 | cerr << "recognized: " << debugInterpsGroup(ig.type, inputStart, currInput) << " at: '" << inputStart << "'" << endl; |
165 | } | 172 | } |
166 | -// cerr << "accept at '" << currInput << "' type=" << (int) ig.type << endl; | 173 | + // cerr << "accept at '" << currInput << "' type=" << (int) ig.type << endl; |
167 | set<SegrulesState> newSegrulesStates; | 174 | set<SegrulesState> newSegrulesStates; |
168 | env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, newSegrulesStates); | 175 | env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, newSegrulesStates); |
169 | if (this->options.debug && newSegrulesStates.empty()) { | 176 | if (this->options.debug && newSegrulesStates.empty()) { |
170 | cerr << "NOT ACCEPTING " << debugAccum(accum) << debugInterpsGroup(ig.type, inputStart, currInput) << endl; | 177 | cerr << "NOT ACCEPTING " << debugAccum(accum) << debugInterpsGroup(ig.type, inputStart, currInput) << endl; |
171 | } | 178 | } |
172 | -// cerr << "newSegrulesStates.size() " << newSegrulesStates.size() << endl; | 179 | + // cerr << "newSegrulesStates.size() " << newSegrulesStates.size() << endl; |
173 | for ( | 180 | for ( |
174 | set<SegrulesState>::iterator it = newSegrulesStates.begin(); | 181 | set<SegrulesState>::iterator it = newSegrulesStates.begin(); |
175 | it != newSegrulesStates.end(); | 182 | it != newSegrulesStates.end(); |
@@ -190,7 +197,7 @@ void Morfeusz::doProcessOneWord( | @@ -190,7 +197,7 @@ void Morfeusz::doProcessOneWord( | ||
190 | doShiftOrth(accum.back(), ic); | 197 | doShiftOrth(accum.back(), ic); |
191 | } | 198 | } |
192 | accum.push_back(ic); | 199 | accum.push_back(ic); |
193 | - if (isEndOfWord(codepoint) | 200 | + if (isEndOfWord(codepoint) |
194 | && newSegrulesState.accepting) { | 201 | && newSegrulesState.accepting) { |
195 | if (this->options.debug) { | 202 | if (this->options.debug) { |
196 | cerr << "ACCEPTING " << debugAccum(accum) << endl; | 203 | cerr << "ACCEPTING " << debugAccum(accum) << endl; |
@@ -198,7 +205,7 @@ void Morfeusz::doProcessOneWord( | @@ -198,7 +205,7 @@ void Morfeusz::doProcessOneWord( | ||
198 | graph.addPath(accum, newSegrulesState.weak); | 205 | graph.addPath(accum, newSegrulesState.weak); |
199 | } | 206 | } |
200 | else if (!isEndOfWord(codepoint)) { | 207 | else if (!isEndOfWord(codepoint)) { |
201 | -// cerr << "will process " << currInput << endl; | 208 | + // cerr << "will process " << currInput << endl; |
202 | const char* newCurrInput = currInput; | 209 | const char* newCurrInput = currInput; |
203 | doProcessOneWord(env, newCurrInput, inputEnd, newSegrulesState, accum, graph); | 210 | doProcessOneWord(env, newCurrInput, inputEnd, newSegrulesState, accum, graph); |
204 | } | 211 | } |
@@ -211,6 +218,58 @@ void Morfeusz::doProcessOneWord( | @@ -211,6 +218,58 @@ void Morfeusz::doProcessOneWord( | ||
211 | inputData = currInput; | 218 | inputData = currInput; |
212 | } | 219 | } |
213 | 220 | ||
221 | +static inline bool isSeparator(uint32_t codepoint) { | ||
222 | + return codepoint == 44; | ||
223 | +} | ||
224 | + | ||
225 | +void Morfeusz::handleIgnChunk( | ||
226 | + const Environment& env, | ||
227 | + const char* inputStart, | ||
228 | + const char* inputEnd, | ||
229 | + int startNodeNum, | ||
230 | + std::vector<MorphInterpretation>& results) const { | ||
231 | + const char* currInput = inputStart; | ||
232 | + const char* prevInput; | ||
233 | + uint32_t codepoint; | ||
234 | + bool separatorFound = false; | ||
235 | + while (currInput != inputEnd) { | ||
236 | + prevInput = currInput; | ||
237 | + const char* nonSeparatorInputEnd = prevInput; | ||
238 | + do { | ||
239 | + codepoint = env.getCharsetConverter().next(currInput, inputEnd); | ||
240 | + if (!isSeparator(codepoint)) { | ||
241 | + nonSeparatorInputEnd = currInput; | ||
242 | + } | ||
243 | + } | ||
244 | + while (currInput != inputEnd && !isSeparator(codepoint)); | ||
245 | + | ||
246 | + if (isSeparator(codepoint)) { | ||
247 | + separatorFound = true; | ||
248 | + if (nonSeparatorInputEnd != prevInput) { | ||
249 | + int startNode = results.empty() ? startNodeNum : results.back().getEndNode(); | ||
250 | + this->processOneWord(env, prevInput, nonSeparatorInputEnd, startNode, results, true); | ||
251 | + startNode = results.empty() ? startNodeNum : results.back().getEndNode(); | ||
252 | + this->processOneWord(env, nonSeparatorInputEnd, currInput, startNode, results, true); | ||
253 | + } | ||
254 | + else { | ||
255 | + int startNode = results.empty() ? startNodeNum : results.back().getEndNode(); | ||
256 | + this->processOneWord(env, prevInput, currInput, startNode, results, true); | ||
257 | + } | ||
258 | + } | ||
259 | + } | ||
260 | + | ||
261 | + // currInput == inputEnd | ||
262 | + if (!isSeparator(codepoint)) { | ||
263 | + if (separatorFound) { | ||
264 | + int startNode = results.empty() ? startNodeNum : results.back().getEndNode(); | ||
265 | + this->processOneWord(env, prevInput, inputEnd, startNode, results, true); | ||
266 | + } | ||
267 | + else { | ||
268 | + this->appendIgnotiumToResults(env, string(inputStart, inputEnd), startNodeNum, results); | ||
269 | + } | ||
270 | + } | ||
271 | +} | ||
272 | + | ||
214 | void Morfeusz::appendIgnotiumToResults( | 273 | void Morfeusz::appendIgnotiumToResults( |
215 | const Environment& env, | 274 | const Environment& env, |
216 | const string& word, | 275 | const string& word, |
@@ -260,6 +319,7 @@ void Morfeusz::generate(const string& text, vector<MorphInterpretation>& results | @@ -260,6 +319,7 @@ void Morfeusz::generate(const string& text, vector<MorphInterpretation>& results | ||
260 | } | 319 | } |
261 | 320 | ||
262 | // XXX - someday it should be improved | 321 | // XXX - someday it should be improved |
322 | + | ||
263 | void Morfeusz::generate(const std::string& lemma, int tagnum, vector<MorphInterpretation>& result) const { | 323 | void Morfeusz::generate(const std::string& lemma, int tagnum, vector<MorphInterpretation>& result) const { |
264 | vector<MorphInterpretation> partRes; | 324 | vector<MorphInterpretation> partRes; |
265 | this->generate(lemma, partRes); | 325 | this->generate(lemma, partRes); |
morfeusz/Morfeusz.hpp
@@ -157,7 +157,8 @@ private: | @@ -157,7 +157,8 @@ private: | ||
157 | const char*& inputData, | 157 | const char*& inputData, |
158 | const char* inputEnd, | 158 | const char* inputEnd, |
159 | int startNodeNum, | 159 | int startNodeNum, |
160 | - std::vector<MorphInterpretation>& result) const; | 160 | + std::vector<MorphInterpretation>& result, |
161 | + bool insideIgnHandler=false) const; | ||
161 | 162 | ||
162 | void doProcessOneWord( | 163 | void doProcessOneWord( |
163 | const Environment& env, | 164 | const Environment& env, |
@@ -166,6 +167,13 @@ private: | @@ -166,6 +167,13 @@ private: | ||
166 | SegrulesState segrulesState, | 167 | SegrulesState segrulesState, |
167 | std::vector<InterpretedChunk>& accum, | 168 | std::vector<InterpretedChunk>& accum, |
168 | InflexionGraph& graph) const; | 169 | InflexionGraph& graph) const; |
170 | + | ||
171 | + void handleIgnChunk( | ||
172 | + const Environment& env, | ||
173 | + const char* inputStart, | ||
174 | + const char* inputEnd, | ||
175 | + int startNodeNum, | ||
176 | + std::vector<MorphInterpretation>& results) const; | ||
169 | 177 | ||
170 | void appendIgnotiumToResults( | 178 | void appendIgnotiumToResults( |
171 | const Environment& env, | 179 | const Environment& env, |
morfeusz/fsa/const.cpp
@@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
2 | #include "const.hpp" | 2 | #include "const.hpp" |
3 | 3 | ||
4 | extern const uint32_t MAGIC_NUMBER = 0x8fc2bc1b; | 4 | extern const uint32_t MAGIC_NUMBER = 0x8fc2bc1b; |
5 | -extern const uint8_t VERSION_NUM = 12; | 5 | +extern const uint8_t VERSION_NUM = 13; |
6 | 6 | ||
7 | extern const unsigned int VERSION_NUM_OFFSET = 4; | 7 | extern const unsigned int VERSION_NUM_OFFSET = 4; |
8 | extern const unsigned int IMPLEMENTATION_NUM_OFFSET = 5; | 8 | extern const unsigned int IMPLEMENTATION_NUM_OFFSET = 5; |
morfeusz/segrules/segrules.cpp
@@ -18,11 +18,19 @@ static inline string deserializeString(const unsigned char*& ptr) { | @@ -18,11 +18,19 @@ static inline string deserializeString(const unsigned char*& ptr) { | ||
18 | return res; | 18 | return res; |
19 | } | 19 | } |
20 | 20 | ||
21 | +static inline void ignoreSeparatorsList(const unsigned char*& ptr) { | ||
22 | + uint16_t listSize = ntohs(*reinterpret_cast<const uint16_t*>(ptr)); | ||
23 | + ptr += 2; | ||
24 | + ptr += 4 * listSize; | ||
25 | +} | ||
26 | + | ||
21 | static inline const unsigned char* getFSAsMapPtr(const unsigned char* ptr) { | 27 | static inline const unsigned char* getFSAsMapPtr(const unsigned char* ptr) { |
22 | const unsigned char* additionalDataPtr = ptr | 28 | const unsigned char* additionalDataPtr = ptr |
23 | + FSA_DATA_OFFSET | 29 | + FSA_DATA_OFFSET |
24 | + ntohl(*reinterpret_cast<const uint32_t*>(ptr + FSA_DATA_SIZE_OFFSET)); | 30 | + ntohl(*reinterpret_cast<const uint32_t*>(ptr + FSA_DATA_SIZE_OFFSET)); |
25 | - return additionalDataPtr + deserializeUint32(additionalDataPtr) + 4; | 31 | + const unsigned char* res = additionalDataPtr + deserializeUint32(additionalDataPtr) + 4; |
32 | + ignoreSeparatorsList(res); | ||
33 | + return res; | ||
26 | } | 34 | } |
27 | 35 | ||
28 | static inline SegrulesOptions deserializeOptions(const unsigned char*& ptr) { | 36 | static inline SegrulesOptions deserializeOptions(const unsigned char*& ptr) { |