cfsa2_impl.hpp
3.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/*
* File: _vfsa_impl.hpp
* Author: lennyn
*
* Created on October 29, 2013, 9:57 PM
*/
#ifndef _VFSA_IMPL_HPP
#define _VFSA_IMPL_HPP
#include <algorithm>
#include <utility>
#include <iostream>
#include "fsa.hpp"
#include "../utils.hpp"
#include "../deserialization/endianness.hpp"
namespace morfeusz {
static const unsigned char HAS_REMAINING_FLAG = 128;
static const unsigned char ACCEPTING_FLAG = 64;
static const unsigned char LAST_FLAG = 32;
static const unsigned char OFFSET_MASK = 0x7F; // 0b01111111;
static const unsigned char FIRST_BYTE_OFFSET_MASK = 0x1F; // 0b00011111;
template <class T>
std::vector<unsigned char> CompressedFSA2<T>::initializeChar2PopularCharIdx(const unsigned char* ptr) {
return std::vector<unsigned char>();
// return std::vector<unsigned char>(ptr + getPopularCharsOffset(), ptr + getPopularCharsOffset() + 256);
}
template <class T>
CompressedFSA2<T>::CompressedFSA2(const unsigned char* ptr, const Deserializer<T>& deserializer)
: FSA<T>(ptr, deserializer) {
}
template <class T>
CompressedFSA2<T>::~CompressedFSA2() {
}
template <class T>
void CompressedFSA2<T>::reallyDoProceed(
const unsigned char* statePtr,
const bool accepting,
State<T>& state) const {
if (accepting) {
T object;
long size = this->deserializer.deserialize(statePtr, object);
state.setNext(statePtr - this->initialStatePtr, object, size);
} else {
state.setNext(statePtr - this->initialStatePtr);
}
}
static inline void passThroughOffsetBytes(unsigned char*& ptr) {
while (*ptr & HAS_REMAINING_FLAG) {
ptr++;
}
ptr++;
}
static inline unsigned int readOffsetBytes(unsigned char*& ptr) {
unsigned int res = *ptr & FIRST_BYTE_OFFSET_MASK;
if (*ptr & HAS_REMAINING_FLAG) {
ptr++;
while (*ptr & HAS_REMAINING_FLAG) {
res <<= 7;
res += *ptr & OFFSET_MASK;
ptr++;
}
res <<= 7;
res += *ptr & OFFSET_MASK;
}
ptr++;
return res;
}
template <class T>
void CompressedFSA2<T>::doProceedToNextByList(
const char c,
const unsigned char* ptr,
State<T>& state) const {
unsigned char* currPtr = const_cast<unsigned char*> (ptr);
while (true) {
// const_cast<Counter*>(&counter)->increment(1);
if ((char) *currPtr == c) {
currPtr++;
bool accepting = *currPtr & ACCEPTING_FLAG;
unsigned int offset = readOffsetBytes(currPtr);
currPtr += offset;
reallyDoProceed(currPtr, accepting, state);
return;
}
else {
currPtr++;
if (*currPtr & LAST_FLAG) {
state.setNextAsSink();
return;
}
else {
passThroughOffsetBytes(currPtr);
}
}
}
}
template <class T>
void CompressedFSA2<T>::proceedToNext(const char c, State<T>& state) const {
#ifdef DEBUG_BUILD
if (c <= 'z' && 'a' <= c)
cerr << "NEXT " << c << " from " << state.getOffset() << endl;
else
cerr << "NEXT " << (short) c << " from " << state.getOffset() << endl;
#endif
const unsigned char* fromPointer = this->initialStatePtr + state.getOffset();
unsigned long transitionsTableOffset = 0;
if (state.isAccepting()) {
transitionsTableOffset += state.getValueSize();
}
this->doProceedToNextByList(
c,
fromPointer + transitionsTableOffset,
state);
}
}
#endif /* _VFSA_IMPL_HPP */