Blame view

morfeusz/segrules/SegrulesFSA.cpp 2.64 KB
Michał Lenart authored
1
2
3


#include <vector>
Michał Lenart authored
4
#include <cassert>
Michał Lenart authored
5
6
7
8
#include "SegrulesFSA.hpp"

using namespace std;
Michał Lenart authored
9
10
namespace morfeusz {
Michał Lenart authored
11
    void SegrulesFSA::proceedToNext(
Michał Lenart authored
12
13
            const unsigned char segnum,
            const SegrulesState& state,
Michał Lenart authored
14
15
16
            bool atEndOfWord,
            SegrulesState& resState) const {
        assert(!state.failed);
Michał Lenart authored
17
        if (state.offset == 0) {
Michał Lenart authored
18
            doProceedFromInitialState(segnum, atEndOfWord, resState);
Michał Lenart authored
19
        } else {
Michał Lenart authored
20
            doProceedFromNonInitialState(segnum, state, atEndOfWord, resState);
Michał Lenart authored
21
        }
Michał Lenart authored
22
23
    }
Michał Lenart authored
24
    void SegrulesFSA::doProceedFromInitialState(
Michał Lenart authored
25
            const unsigned char segnum,
Michał Lenart authored
26
27
            bool atEndOfWord,
            SegrulesState& resState) const {
Michał Lenart authored
28
        const SegrulesState& newState = initialTransitions[segnum];
Michał Lenart authored
29
30
        if ((atEndOfWord && newState.accepting)
                || (!atEndOfWord && !newState.sink)) {
Michał Lenart authored
31
            resState = newState;
Michał Lenart authored
32
33
34
        }
    }
Michał Lenart authored
35
    void SegrulesFSA::doProceedFromNonInitialState(
Michał Lenart authored
36
37
            const unsigned char segnum,
            const SegrulesState& state,
Michał Lenart authored
38
39
            bool atEndOfWord,
            SegrulesState& resState) const {
Michał Lenart authored
40
41
42
43
44
45
46
        const unsigned char* currPtr = ptr + state.offset + 1;
        const unsigned char transitionsNum = *currPtr++;
        for (int i = 0; i < transitionsNum; i++) {
            if (*currPtr == segnum) {
                SegrulesState newState = this->transition2State(currPtr);
                if ((atEndOfWord && newState.accepting)
                        || (!atEndOfWord && !newState.sink)) {
Michał Lenart authored
47
                    resState = newState;
Michał Lenart authored
48
                }
Michał Lenart authored
49
            }
Michał Lenart authored
50
            currPtr += 4;
Michał Lenart authored
51
52
53
        }
    }
Michał Lenart authored
54
55
56
57
58
59
60
61
62
    SegrulesState SegrulesFSA::transition2State(const unsigned char* transitionPtr) const {
        unsigned char ACCEPTING_FLAG = 1;
        unsigned char WEAK_FLAG = 2;
        SegrulesState res;
        transitionPtr++;
        res.shiftOrthFromPrevious = *transitionPtr++;
        res.offset = readInt16(transitionPtr);
        res.accepting = *(ptr + res.offset) & ACCEPTING_FLAG;
        res.weak = *(ptr + res.offset) & WEAK_FLAG;
Michał Lenart authored
63
64
        res.sink = *(ptr + res.offset + 1) == 0;
        res.failed = !res.accepting && res.sink;
Michał Lenart authored
65
66
        return res;
    }
Michał Lenart authored
67
Michał Lenart authored
68
    vector< SegrulesState > SegrulesFSA::createInitialTransitionsVector() {
Michał Lenart authored
69
        vector< SegrulesState > res(256, SegrulesState());
Michał Lenart authored
70
71
72
73
74
75
76
77
        const unsigned char* currPtr = ptr + initialState.offset + 1;
        const unsigned char transitionsNum = *currPtr++;
        for (int i = 0; i < transitionsNum; i++) {
            unsigned char segnum = *currPtr;
            res[segnum] = this->transition2State(currPtr);
            currPtr += 4;
        }
        return res;
Michał Lenart authored
78
    }
Michał Lenart authored
79
80

}