cfsa2_impl.hpp
4.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
/*
* File: _vfsa_impl.hpp
* Author: lennyn
*
* Created on October 29, 2013, 9:57 PM
*/
#ifndef _VFSA_IMPL_HPP
#define _VFSA_IMPL_HPP
#include <algorithm>
#include <utility>
#include <iostream>
#include "fsa.hpp"
#include "../utils.hpp"
#include "../endianness.hpp"
static const unsigned char HAS_REMAINING_FLAG = 128;
static const unsigned char ACCEPTING_FLAG = 64;
static const unsigned char LAST_FLAG = 32;
static const unsigned char OFFSET_MASK = 0b01111111;
static const unsigned char FIRST_BYTE_OFFSET_MASK = 0b00011111;
using namespace std;
template <class T>
vector<unsigned char> CompressedFSA2<T>::initializeChar2PopularCharIdx(const unsigned char* ptr) {
return vector<unsigned char>();
// return vector<unsigned char>(ptr + getPopularCharsOffset(), ptr + getPopularCharsOffset() + 256);
}
template <class T>
CompressedFSA2<T>::CompressedFSA2(const unsigned char* ptr, const Deserializer<T>& deserializer)
: FSA<T>(ptr, deserializer) {
}
template <class T>
CompressedFSA2<T>::~CompressedFSA2() {
}
template <class T>
void CompressedFSA2<T>::reallyDoProceed(
const unsigned char* statePtr,
const bool accepting,
State<T>& state) const {
if (accepting) {
T object;
long size = this->deserializer.deserialize(statePtr, object);
state.setNext(statePtr - this->initialStatePtr, object, size);
} else {
state.setNext(statePtr - this->initialStatePtr);
}
}
static inline void passThroughOffsetBytes(unsigned char*& ptr) {
while (*ptr & HAS_REMAINING_FLAG) {
ptr++;
}
ptr++;
}
static inline unsigned int readOffsetBytes(unsigned char*& ptr) {
unsigned int res = *ptr & FIRST_BYTE_OFFSET_MASK;
if (*ptr & HAS_REMAINING_FLAG) {
ptr++;
while (*ptr & HAS_REMAINING_FLAG) {
res <<= 7;
res += *ptr & OFFSET_MASK;
ptr++;
}
res <<= 7;
res += *ptr & OFFSET_MASK;
}
ptr++;
return res;
}
template <class T>
void CompressedFSA2<T>::doProceedToNextByList(
const char c,
const unsigned char* ptr,
State<T>& state) const {
unsigned char* currPtr = const_cast<unsigned char*> (ptr);
while (true) {
// const_cast<Counter*>(&counter)->increment(1);
if ((char) *currPtr == c) {
currPtr++;
bool accepting = *currPtr & ACCEPTING_FLAG;
unsigned int offset = readOffsetBytes(currPtr);
currPtr += offset;
reallyDoProceed(currPtr, accepting, state);
return;
}
else {
currPtr++;
if (*currPtr & LAST_FLAG) {
state.setNextAsSink();
return;
}
else {
passThroughOffsetBytes(currPtr);
}
}
}
}
//template <class T>
//void FSAImpl<T>::doProceedToNextByArray(
// const unsigned char shortLabel,
// const uint32_t* ptr,
// State<T>& state) const {
//#ifdef DEBUG_BUILD
// cerr << "next by array " << (unsigned int) shortLabel << endl;
//#endif
// uint32_t offset = ntohl(ptr[shortLabel]);
//#ifdef DEBUG_BUILD
// cerr << "still alive " << endl;
//#endif
// if (offset != 0) {
// const unsigned char* currPtr = this->startPtr + offset;
// reallyDoProceed(currPtr, state);
// } else {
// state.setNextAsSink();
// }
//}
template <class T>
void CompressedFSA2<T>::proceedToNext(const char c, State<T>& state) const {
#ifdef DEBUG_BUILD
if (c <= 'z' && 'a' <= c)
cerr << "NEXT " << c << " from " << state.getOffset() << endl;
else
cerr << "NEXT " << (short) c << " from " << state.getOffset() << endl;
#endif
const unsigned char* fromPointer = this->initialStatePtr + state.getOffset();
unsigned long transitionsTableOffset = 0;
if (state.isAccepting()) {
transitionsTableOffset += state.getValueSize();
}
this->doProceedToNextByList(
c,
fromPointer + transitionsTableOffset,
state);
}
#endif /* _VFSA_IMPL_HPP */