|
1
2
3
|
#include <vector>
|
|
4
|
#include <cassert>
|
|
5
6
7
8
|
#include "SegrulesFSA.hpp"
using namespace std;
|
|
9
10
|
namespace morfeusz {
|
|
11
|
void SegrulesFSA::proceedToNext(
|
|
12
13
|
const unsigned char segnum,
const SegrulesState& state,
|
|
14
15
16
|
bool atEndOfWord,
SegrulesState& resState) const {
assert(!state.failed);
|
|
17
|
if (state.offset == 0) {
|
|
18
|
doProceedFromInitialState(segnum, atEndOfWord, resState);
|
|
19
|
} else {
|
|
20
|
doProceedFromNonInitialState(segnum, state, atEndOfWord, resState);
|
|
21
|
}
|
|
22
23
|
}
|
|
24
|
void SegrulesFSA::doProceedFromInitialState(
|
|
25
|
const unsigned char segnum,
|
|
26
27
|
bool atEndOfWord,
SegrulesState& resState) const {
|
|
28
|
const SegrulesState& newState = initialTransitions[segnum];
|
|
29
30
|
if ((atEndOfWord && newState.accepting)
|| (!atEndOfWord && !newState.sink)) {
|
|
31
|
resState = newState;
|
|
32
33
34
|
}
}
|
|
35
|
void SegrulesFSA::doProceedFromNonInitialState(
|
|
36
37
|
const unsigned char segnum,
const SegrulesState& state,
|
|
38
39
|
bool atEndOfWord,
SegrulesState& resState) const {
|
|
40
41
42
43
44
45
46
|
const unsigned char* currPtr = ptr + state.offset + 1;
const unsigned char transitionsNum = *currPtr++;
for (int i = 0; i < transitionsNum; i++) {
if (*currPtr == segnum) {
SegrulesState newState = this->transition2State(currPtr);
if ((atEndOfWord && newState.accepting)
|| (!atEndOfWord && !newState.sink)) {
|
|
47
|
resState = newState;
|
|
48
|
}
|
|
49
|
}
|
|
50
|
currPtr += 4;
|
|
51
52
53
|
}
}
|
|
54
55
56
57
58
59
60
61
62
|
SegrulesState SegrulesFSA::transition2State(const unsigned char* transitionPtr) const {
unsigned char ACCEPTING_FLAG = 1;
unsigned char WEAK_FLAG = 2;
SegrulesState res;
transitionPtr++;
res.shiftOrthFromPrevious = *transitionPtr++;
res.offset = readInt16(transitionPtr);
res.accepting = *(ptr + res.offset) & ACCEPTING_FLAG;
res.weak = *(ptr + res.offset) & WEAK_FLAG;
|
|
63
64
|
res.sink = *(ptr + res.offset + 1) == 0;
res.failed = !res.accepting && res.sink;
|
|
65
66
|
return res;
}
|
|
67
|
|
|
68
|
vector< SegrulesState > SegrulesFSA::createInitialTransitionsVector() {
|
|
69
|
vector< SegrulesState > res(256, SegrulesState());
|
|
70
71
72
73
74
75
76
77
|
const unsigned char* currPtr = ptr + initialState.offset + 1;
const unsigned char transitionsNum = *currPtr++;
for (int i = 0; i < transitionsNum; i++) {
unsigned char segnum = *currPtr;
res[segnum] = this->transition2State(currPtr);
currPtr += 4;
}
return res;
|
|
78
|
}
|
|
79
80
|
}
|