segrules.cpp
3.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#include "segrules.hpp"
#include "../fsa/fsa.hpp"
#include "../fsa/const.hpp"
#include "../deserializationUtils.hpp"
using namespace std;
static inline void skipSeparatorsList(const unsigned char*& ptr) {
uint16_t listSize = readInt16(ptr);
ptr += 4 * listSize;
}
static inline const unsigned char* getSeparatorsListPtr(const unsigned char* ptr) {
const unsigned char* additionalDataPtr = ptr
+ FSA_DATA_OFFSET
+ ntohl(*reinterpret_cast<const uint32_t*>(ptr + FSA_DATA_SIZE_OFFSET));
const unsigned char* res = additionalDataPtr + readInt32(additionalDataPtr) + 4;
return res;
}
static inline const unsigned char* getFSAsMapPtr(const unsigned char* ptr) {
// const unsigned char* additionalDataPtr = ptr
// + FSA_DATA_OFFSET
// + ntohl(*reinterpret_cast<const uint32_t*>(ptr + FSA_DATA_SIZE_OFFSET));
// const unsigned char* res = additionalDataPtr + deserializeUint32(additionalDataPtr) + 4;
const unsigned char* res = getSeparatorsListPtr(ptr);
skipSeparatorsList(res);
return res;
}
static inline SegrulesOptions deserializeOptions(const unsigned char*& ptr) {
SegrulesOptions res;
unsigned char optsNum = *ptr;
ptr++;
for (unsigned char i = 0; i < optsNum; i++) {
string key = readString(ptr);
res[key] = readString(ptr);
}
return res;
}
static inline SegrulesFSA* deserializeFSA(const unsigned char*& ptr) {
uint32_t fsaSize = readInt32(ptr);
// static SegrulesDeserializer deserializer;
SegrulesFSA* res = new SegrulesFSA(ptr);
ptr += fsaSize;
return res;
}
map<SegrulesOptions, SegrulesFSA*> createSegrulesFSAsMap(const unsigned char* analyzerPtr) {
map<SegrulesOptions, SegrulesFSA*> res;
const unsigned char* fsasMapPtr = getFSAsMapPtr(analyzerPtr);
const unsigned char* currPtr = fsasMapPtr;
unsigned char fsasNum = *currPtr;
currPtr++;
for (unsigned char i = 0; i < fsasNum; i++) {
SegrulesOptions options = deserializeOptions(currPtr);
SegrulesFSA* fsa = deserializeFSA(currPtr);
res[options] = fsa;
}
return res;
}
SegrulesOptions getDefaultSegrulesOptions(const unsigned char* ptr) {
const unsigned char* fsasMapPtr = getFSAsMapPtr(ptr);
const unsigned char* currPtr = fsasMapPtr;
unsigned char fsasNum = *currPtr;
currPtr++;
for (unsigned char i = 0; i < fsasNum; i++) {
deserializeOptions(currPtr);
deserializeFSA(currPtr);
}
return deserializeOptions(currPtr);
}
SegrulesFSA* getDefaultSegrulesFSA(
const map<SegrulesOptions, SegrulesFSA*>& map,
const unsigned char* ptr) {
SegrulesOptions opts = getDefaultSegrulesOptions(ptr);
return (*(map.find(opts))).second;
}
vector<uint32_t> getSeparatorsList(const unsigned char* ptr) {
ptr = getSeparatorsListPtr(ptr);
vector<uint32_t> res;
uint16_t listSize = ntohs(*reinterpret_cast<const uint16_t*>(ptr));
ptr += 2;
for (unsigned int i = 0; i < listSize; i++) {
res.push_back(ntohl(*reinterpret_cast<const uint32_t*>(ptr)));
ptr += 4;
}
return res;
}
void debugMap(const map<SegrulesOptions, SegrulesFSA*>& res) {
map<SegrulesOptions, SegrulesFSA*>::const_iterator it = res.begin();
while (it != res.end()) {
SegrulesOptions::const_iterator it1 = it->first.begin();
while (it1 != it->first.end()) {
cerr << it1->first << " --> " << it1->second << endl;
it1++;
}
cerr << it->second << endl;
it++;
}
}