Blame view

morfeusz/fsa/cfsa1_impl.hpp 4.76 KB
Michał Lenart authored
1
/* 
Michał Lenart authored
2
3
 * File:   cfsa1_impl.hpp
 * Author: mlenart
Michał Lenart authored
4
 *
Michał Lenart authored
5
 * Created on November 4, 2013, 1:08 PM
Michał Lenart authored
6
7
 */
Michał Lenart authored
8
9
10
11
#ifndef CFSA1_IMPL_HPP
#define	CFSA1_IMPL_HPP

#include <vector>
Michał Lenart authored
12
13
14
15
16

#include "fsa.hpp"

using namespace std;
Michał Lenart authored
17
#pragma pack(push, 1)  /* push current alignment to stack */
Michał Lenart authored
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32

struct StateData2 {
    unsigned transitionsNum: 6;
    unsigned array : 1;
    unsigned accepting : 1;
};

struct TransitionData2 {
    unsigned offsetSize : 2;
    unsigned shortLabel : 6;
};


#pragma pack(pop)   /* restore original alignment from stack */
Michał Lenart authored
33
static const unsigned int INITIAL_STATE_OFFSET = 257;
Michał Lenart authored
34
35

template <class T>
Michał Lenart authored
36
37
vector<unsigned char> CompressedFSA1<T>::initializeChar2PopularCharIdx(const unsigned char* ptr) {
    return vector<unsigned char>(ptr, ptr + INITIAL_STATE_OFFSET);
Michał Lenart authored
38
39
40
}

template <class T>
Michał Lenart authored
41
42
CompressedFSA1<T>::CompressedFSA1(const unsigned char* ptr, const Deserializer<T>& deserializer)
: FSA<T>(ptr + INITIAL_STATE_OFFSET, deserializer),
Michał Lenart authored
43
44
45
46
label2ShortLabel(initializeChar2PopularCharIdx(ptr)) {
}

template <class T>
Michał Lenart authored
47
CompressedFSA1<T>::~CompressedFSA1() {
Michał Lenart authored
48
49
50
51

}

template <class T>
Michał Lenart authored
52
void CompressedFSA1<T>::reallyDoProceed(
Michał Lenart authored
53
54
        const unsigned char* statePtr,
        State<T>& state) const {
Michał Lenart authored
55
    const StateData2* sd = reinterpret_cast<const StateData2*>(statePtr);
Michał Lenart authored
56
57
    if (sd->accepting) {
        T object;
Michał Lenart authored
58
        long size = this->deserializer.deserialize(statePtr + 1, object);
Michał Lenart authored
59
        state.setNext(statePtr - this->initialStatePtr, object, size);
Michał Lenart authored
60
61
    }
    else {
Michał Lenart authored
62
        state.setNext(statePtr - this->initialStatePtr);
Michał Lenart authored
63
64
65
66
    }
}

template <class T>
Michał Lenart authored
67
void CompressedFSA1<T>::doProceedToNextByList(
Michał Lenart authored
68
69
70
71
72
73
74
75
76
77
78
        const char c,
        const unsigned char shortLabel,
        const unsigned char* ptr,
        const unsigned int transitionsNum,
        State<T>& state) const {
    register const unsigned char* currPtr = ptr;
    //    TransitionData* foundTransition = (TransitionData*) (fromPointer + transitionsTableOffset);
    bool found = false;
    TransitionData2 td;
    for (unsigned int i = 0; i < transitionsNum; i++) {
        //        const_cast<Counter*>(&counter)->increment(1);
Michał Lenart authored
79
        td = *(reinterpret_cast<const TransitionData2*>(currPtr));
Michał Lenart authored
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
        if (td.shortLabel == shortLabel) {
            if (shortLabel == 0) {
                currPtr++;
                char label = (char) *currPtr;
                if (label == c) {
                    found = true;
                    break;
                }
                else {
                    currPtr += td.offsetSize + 1;
                }
            } else {
                found = true;
                break;
            }
        } 
        else {
            if (td.shortLabel == 0) {
                currPtr++;
            }
            currPtr += td.offsetSize + 1;
        }
    }
    if (!found) {
        state.setNextAsSink();
Michał Lenart authored
105
106
    } 
    else {
Michał Lenart authored
107
108
109
110
111
112
113
114
        currPtr++;
        switch (td.offsetSize) {
            case 0:
                break;
            case 1:
                currPtr += *currPtr + 1;
                break;
            case 2:
Michał Lenart authored
115
                currPtr += ntohs(*((const uint16_t*) currPtr)) + 2;
Michał Lenart authored
116
117
                break;
            case 3:
Michał Lenart authored
118
                currPtr += (((const unsigned int) ntohs(*((const uint16_t*) currPtr))) << 8) + currPtr[2] + 3;
Michał Lenart authored
119
120
121
122
123
124
125
                break;
        }
        reallyDoProceed(currPtr, state);
    }
}

template <class T>
Michał Lenart authored
126
void CompressedFSA1<T>::doProceedToNextByArray(
Michał Lenart authored
127
128
129
130
131
        const unsigned char shortLabel,
        const uint32_t* ptr,
        State<T>& state) const {
    uint32_t offset = ntohl(ptr[shortLabel]);
    if (offset != 0) {
Michał Lenart authored
132
        const unsigned char* currPtr = this->initialStatePtr + offset;
Michał Lenart authored
133
134
135
136
137
138
139
140
        reallyDoProceed(currPtr, state);
    }
    else {
        state.setNextAsSink();
    }
}

template <class T>
Michał Lenart authored
141
142
void CompressedFSA1<T>::proceedToNext(const char c, State<T>& state) const {
    const unsigned char* fromPointer = this->initialStatePtr + state.getOffset();
Michał Lenart authored
143
    unsigned char shortLabel = this->label2ShortLabel[(const unsigned char) c];
Michał Lenart authored
144
    unsigned long transitionsTableOffset = 1;
Michał Lenart authored
145
146
147
    if (state.isAccepting()) {
        transitionsTableOffset += state.getValueSize();
    }
Michał Lenart authored
148
    const StateData2* sd = reinterpret_cast<const StateData2*>(fromPointer);
Michał Lenart authored
149
150
151
152
    if (sd->array) {
        if (shortLabel > 0) {
            this->doProceedToNextByArray(
                    shortLabel,
Michał Lenart authored
153
                    reinterpret_cast<const uint32_t*>(fromPointer + transitionsTableOffset),
Michał Lenart authored
154
155
156
                    state);
        }
        else {
Michał Lenart authored
157
            reallyDoProceed(fromPointer + transitionsTableOffset + 256, state);
Michał Lenart authored
158
159
160
161
162
163
164
            proceedToNext(c, state);
        }
    } 
    else {
        this->doProceedToNextByList(
                c,
                shortLabel,
Michał Lenart authored
165
                fromPointer + transitionsTableOffset,
Michał Lenart authored
166
167
168
169
170
                sd->transitionsNum,
                state);
    }
}
Michał Lenart authored
171
#endif	/* CFSA1_IMPL_HPP */
Michał Lenart authored
172