SegrulesFSA.cpp
2.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#include <vector>
#include "SegrulesFSA.hpp"
using namespace std;
namespace morfeusz {
void SegrulesFSA::proceedToNext(
const unsigned char segnum,
const SegrulesState& state,
bool atEndOfWord,
vector<SegrulesState>& res) const {
if (state.offset == 0) {
doProceedFromInitialState(segnum, atEndOfWord, res);
}
else {
doProceedFromNonInitialState(segnum, state, atEndOfWord, res);
}
}
void SegrulesFSA::doProceedFromInitialState(
const unsigned char segnum,
bool atEndOfWord,
vector<SegrulesState>& res) const {
const vector<SegrulesState>& newStates = initialTransitions[segnum];
vector<SegrulesState>::const_iterator it = newStates.begin();
while (it != newStates.end()) {
const SegrulesState& newState = *it++;
if ((atEndOfWord && newState.accepting)
|| (!atEndOfWord && !newState.sink)) {
res.push_back(newState);
}
}
}
void SegrulesFSA::doProceedFromNonInitialState(
const unsigned char segnum,
const SegrulesState& state,
bool atEndOfWord,
std::vector<SegrulesState>& res) const {
const unsigned char* currPtr = ptr + state.offset + 1;
const unsigned char transitionsNum = *currPtr++;
for (int i = 0; i < transitionsNum; i++) {
if (*currPtr == segnum) {
SegrulesState newState = this->transition2State(currPtr);
if ((atEndOfWord && newState.accepting)
|| (!atEndOfWord && !newState.sink)) {
res.push_back(newState);
}
}
currPtr += 4;
}
}
SegrulesState SegrulesFSA::transition2State(const unsigned char* transitionPtr) const {
unsigned char ACCEPTING_FLAG = 1;
unsigned char WEAK_FLAG = 2;
SegrulesState res;
transitionPtr++;
res.shiftOrthFromPrevious = *transitionPtr++;
res.offset = readInt16(transitionPtr);
res.accepting = *(ptr + res.offset) & ACCEPTING_FLAG;
res.weak = *(ptr + res.offset) & WEAK_FLAG;
res.sink = *(ptr + res.offset + 1) == 0;
return res;
}
vector< vector<SegrulesState> > SegrulesFSA::createInitialTransitionsVector() {
vector< vector<SegrulesState> > res(256, vector<SegrulesState>());
const unsigned char* currPtr = ptr + initialState.offset + 1;
const unsigned char transitionsNum = *currPtr++;
for (int i = 0; i < transitionsNum; i++) {
unsigned char segnum = *currPtr;
res[segnum].push_back(this->transition2State(currPtr));
currPtr += 4;
}
return res;
}
}