SegrulesFSA.cpp
2.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#include <vector>
#include <cassert>
#include "SegrulesFSA.hpp"
using namespace std;
namespace morfeusz {
SegrulesState SegrulesState::FAILED_STATE = {
0, // offset
false, // accepting
false, // weak
false, // shift orth
true, // sink
true, // failed
};
void SegrulesFSA::proceedToNext(
const unsigned char segnum,
const SegrulesState& state,
bool atEndOfWord,
SegrulesState& resState) const {
assert(!state.failed);
if (state.offset == 0) {
doProceedFromInitialState(segnum, atEndOfWord, resState);
} else {
doProceedFromNonInitialState(segnum, state, atEndOfWord, resState);
}
}
void SegrulesFSA::doProceedFromInitialState(
const unsigned char segnum,
bool atEndOfWord,
SegrulesState& resState) const {
const SegrulesState& newState = initialTransitions[segnum];
if ((atEndOfWord && newState.accepting)
|| (!atEndOfWord && !newState.sink)) {
resState = newState;
}
}
void SegrulesFSA::doProceedFromNonInitialState(
const unsigned char segnum,
const SegrulesState& state,
bool atEndOfWord,
SegrulesState& resState) const {
const unsigned char* currPtr = ptr + state.offset + 1;
const unsigned char transitionsNum = *currPtr++;
for (int i = 0; i < transitionsNum; i++) {
if (*currPtr == segnum) {
SegrulesState newState = this->transition2State(currPtr);
if ((atEndOfWord && newState.accepting)
|| (!atEndOfWord && !newState.sink)) {
resState = newState;
}
}
currPtr += 4;
}
}
SegrulesState SegrulesFSA::transition2State(const unsigned char* transitionPtr) const {
unsigned char ACCEPTING_FLAG = 1;
unsigned char WEAK_FLAG = 2;
SegrulesState res;
transitionPtr++;
res.shiftOrthFromPrevious = *transitionPtr++;
res.offset = readInt16(transitionPtr);
res.accepting = *(ptr + res.offset) & ACCEPTING_FLAG;
res.weak = *(ptr + res.offset) & WEAK_FLAG;
res.sink = *(ptr + res.offset + 1) == 0;
res.failed = !res.accepting && res.sink;
return res;
}
vector< SegrulesState > SegrulesFSA::createInitialTransitionsVector() {
vector< SegrulesState > res(256, SegrulesState::FAILED_STATE);
const unsigned char* currPtr = ptr + initialState.offset + 1;
const unsigned char transitionsNum = *currPtr++;
for (int i = 0; i < transitionsNum; i++) {
unsigned char segnum = *currPtr;
res[segnum] = this->transition2State(currPtr);
currPtr += 4;
}
return res;
}
}