cfsa2_impl.hpp
3.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/*
* File: _vfsa_impl.hpp
* Author: lennyn
*
* Created on October 29, 2013, 9:57 PM
*/
#ifndef _VFSA_IMPL_HPP
#define _VFSA_IMPL_HPP
#include <algorithm>
#include <utility>
#include <iostream>
#include "fsa.hpp"
#include "../utils.hpp"
#include "../deserialization/endianness.hpp"
static const unsigned char HAS_REMAINING_FLAG = 128;
static const unsigned char ACCEPTING_FLAG = 64;
static const unsigned char LAST_FLAG = 32;
static const unsigned char OFFSET_MASK = 0x7F; // 0b01111111;
static const unsigned char FIRST_BYTE_OFFSET_MASK = 0x1F; // 0b00011111;
using namespace std;
template <class T>
vector<unsigned char> CompressedFSA2<T>::initializeChar2PopularCharIdx(const unsigned char* ptr) {
return vector<unsigned char>();
// return vector<unsigned char>(ptr + getPopularCharsOffset(), ptr + getPopularCharsOffset() + 256);
}
template <class T>
CompressedFSA2<T>::CompressedFSA2(const unsigned char* ptr, const Deserializer<T>& deserializer)
: FSA<T>(ptr, deserializer) {
}
template <class T>
CompressedFSA2<T>::~CompressedFSA2() {
}
template <class T>
void CompressedFSA2<T>::reallyDoProceed(
const unsigned char* statePtr,
const bool accepting,
State<T>& state) const {
if (accepting) {
T object;
long size = this->deserializer.deserialize(statePtr, object);
state.setNext(statePtr - this->initialStatePtr, object, size);
} else {
state.setNext(statePtr - this->initialStatePtr);
}
}
static inline void passThroughOffsetBytes(unsigned char*& ptr) {
while (*ptr & HAS_REMAINING_FLAG) {
ptr++;
}
ptr++;
}
static inline unsigned int readOffsetBytes(unsigned char*& ptr) {
unsigned int res = *ptr & FIRST_BYTE_OFFSET_MASK;
if (*ptr & HAS_REMAINING_FLAG) {
ptr++;
while (*ptr & HAS_REMAINING_FLAG) {
res <<= 7;
res += *ptr & OFFSET_MASK;
ptr++;
}
res <<= 7;
res += *ptr & OFFSET_MASK;
}
ptr++;
return res;
}
template <class T>
void CompressedFSA2<T>::doProceedToNextByList(
const char c,
const unsigned char* ptr,
State<T>& state) const {
unsigned char* currPtr = const_cast<unsigned char*> (ptr);
while (true) {
// const_cast<Counter*>(&counter)->increment(1);
if ((char) *currPtr == c) {
currPtr++;
bool accepting = *currPtr & ACCEPTING_FLAG;
unsigned int offset = readOffsetBytes(currPtr);
currPtr += offset;
reallyDoProceed(currPtr, accepting, state);
return;
}
else {
currPtr++;
if (*currPtr & LAST_FLAG) {
state.setNextAsSink();
return;
}
else {
passThroughOffsetBytes(currPtr);
}
}
}
}
template <class T>
void CompressedFSA2<T>::proceedToNext(const char c, State<T>& state) const {
#ifdef DEBUG_BUILD
if (c <= 'z' && 'a' <= c)
cerr << "NEXT " << c << " from " << state.getOffset() << endl;
else
cerr << "NEXT " << (short) c << " from " << state.getOffset() << endl;
#endif
const unsigned char* fromPointer = this->initialStatePtr + state.getOffset();
unsigned long transitionsTableOffset = 0;
if (state.isAccepting()) {
transitionsTableOffset += state.getValueSize();
}
this->doProceedToNextByList(
c,
fromPointer + transitionsTableOffset,
state);
}
#endif /* _VFSA_IMPL_HPP */