|
1
2
3
4
5
6
7
8
9
10
11
|
/*
* File: fsa.hh
* Author: mlenart
*
* Created on October 17, 2013, 2:00 PM
*/
#ifndef FSA_HPP
#define FSA_HPP
//#include <iostream>
|
|
12
|
#include <cstring>
|
|
13
|
#include <cassert>
|
|
14
|
#include <typeinfo>
|
|
15
16
17
|
#include <exception>
#include <string>
#include <vector>
|
|
18
|
#include <inttypes.h>
|
|
19
20
21
22
|
template <class T> class State;
template <class T> class FSA;
template <class T> class Deserializer;
|
|
23
|
class FSAException;
|
|
24
25
26
27
28
29
30
31
32
|
template <class T>
class Deserializer {
public:
/**
* Deserialize object from ptr.
* Returns number of bytes read or -1 on error.
*/
|
|
33
|
virtual long deserialize(const unsigned char* ptr, T& object) const = 0;
|
|
34
|
virtual ~Deserializer() {}
|
|
35
36
37
38
39
40
41
42
43
44
45
|
};
/**
* Finite state automaton.
*/
template <class T>
class FSA {
public:
/**
* Get this automaton's initial state.
*/
|
|
46
|
State<T> getInitialState() const;
|
|
47
|
|
|
48
|
bool tryToRecognize(const char* input, T& value) const;
|
|
49
50
51
|
virtual ~FSA() {
}
|
|
52
53
54
55
56
57
|
/**
* Create an FSA object from pointer
*/
static FSA<T>* getFSA(const unsigned char* ptr, const Deserializer<T>& deserializer);
|
|
58
59
60
61
62
|
/**
* Create an FSA object from file
*/
static FSA<T>* getFSA(const std::string& filename, const Deserializer<T>& deserializer);
|
|
63
64
|
protected:
|
|
65
66
67
68
69
|
/**
* Create FSA object for givent initial state pointer and deserializer
*/
FSA(const unsigned char* initialStatePtr, const Deserializer<T>& deserializer);
|
|
70
71
72
73
|
/**
* Proceed to next state
*/
virtual void proceedToNext(const char c, State<T>& state) const = 0;
|
|
74
|
const unsigned char* initialStatePtr;
|
|
75
76
77
78
79
80
81
82
83
|
const Deserializer<T>& deserializer;
friend class State<T>;
private:
// FSA();
};
template <class T>
class SimpleFSA : public FSA<T> {
public:
|
|
84
|
SimpleFSA(const unsigned char* ptr, const Deserializer<T>& deserializer, bool isTransducer=false);
|
|
85
86
87
88
|
virtual ~SimpleFSA();
protected:
void proceedToNext(const char c, State<T>& state) const;
private:
|
|
89
|
bool isTransducer;
|
|
90
91
92
|
};
template <class T>
|
|
93
|
class CompressedFSA1 : public FSA<T> {
|
|
94
|
public:
|
|
95
96
|
CompressedFSA1(const unsigned char* ptr, const Deserializer<T>& deserializer);
virtual ~CompressedFSA1();
|
|
97
98
99
|
protected:
void proceedToNext(const char c, State<T>& state) const;
private:
|
|
100
|
const std::vector<unsigned char> label2ShortLabel;
|
|
101
|
|
|
102
|
static std::vector<unsigned char> initializeChar2PopularCharIdx(const unsigned char* ptr);
|
|
103
|
|
|
104
105
106
|
void doProceedToNextByList(
const char c,
const unsigned char shortLabel,
|
|
107
108
109
110
111
|
const unsigned char* ptr,
const unsigned int transitionsNum,
State<T>& state) const;
void reallyDoProceed(
const unsigned char* statePtr,
|
|
112
113
|
State<T>& state) const;
void doProceedToNextByArray(
|
|
114
115
116
117
118
119
120
121
122
123
124
|
const unsigned char shortLabel,
const uint32_t* ptr,
State<T>& state) const;
};
template <class T>
class CompressedFSA2 : public FSA<T> {
public:
CompressedFSA2(const unsigned char* ptr, const Deserializer<T>& deserializer);
virtual ~CompressedFSA2();
|
|
125
|
|
|
126
127
|
protected:
void proceedToNext(const char c, State<T>& state) const;
|
|
128
|
|
|
129
130
131
132
133
134
135
|
private:
static std::vector<unsigned char> initializeChar2PopularCharIdx(const unsigned char* ptr);
void doProceedToNextByList(
const char c,
const unsigned char* ptr,
|
|
136
137
138
|
State<T>& state) const;
void reallyDoProceed(
const unsigned char* statePtr,
|
|
139
|
const bool accepting,
|
|
140
|
State<T>& state) const;
|
|
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
|
};
/**
* A state in an FSA.
*/
template <class T>
class State {
public:
/**
* Is this a "sink" state - non-accepting state without outgoing transitions
*/
bool isSink() const;
/**
* Is this an accepting state
*/
bool isAccepting() const;
/**
* Get next state proceeding a transition for given character.
*/
void proceedToNext(const char c);
/**
* Get value of this state.
* Makes sense only for accepting states.
* For non-accepting states is throws an exception.
*/
T getValue() const;
|
|
171
172
173
174
|
unsigned char getLastTransitionValue() const;
void setLastTransitionValue(unsigned char val);
|
|
175
176
177
178
179
180
|
/**
* Get the size (in bytes) of this state's value.
* Makes sense only for accepting states.
* For non-accepting states is throws an exception.
*/
|
|
181
|
unsigned long getValueSize() const;
|
|
182
|
|
|
183
|
unsigned long getOffset() const;
|
|
184
|
|
|
185
|
void setNext(const unsigned long offset);
|
|
186
|
void setNext(const unsigned long offset, const T& value, const unsigned long valueSize);
|
|
187
|
void setNextAsSink();
|
|
188
|
|
|
189
|
explicit State(const FSA<T>& fsa);
|
|
190
|
|
|
191
192
193
|
virtual ~State();
private:
const FSA<T>& fsa;
|
|
194
|
unsigned long offset;
|
|
195
196
197
|
bool accepting;
bool sink;
T value;
|
|
198
|
long valueSize;
|
|
199
|
unsigned char lastTransitionValue;
|
|
200
201
|
};
|
|
202
203
204
|
class FSAException : public std::exception {
public:
FSAException(const char* what): msg(what) {}
|
|
205
|
FSAException(const std::string& what): msg(what.c_str()) {}
|
|
206
207
208
209
210
211
212
213
|
virtual ~FSAException() throw() {}
virtual const char* what() const throw () {
return this->msg.c_str();
}
private:
const std::string msg;
};
|
|
214
215
216
217
218
|
#include "state_impl.hpp"
#include "fsa_impl.hpp"
#include "simplefsa_impl.hpp"
#include "cfsa1_impl.hpp"
#include "cfsa2_impl.hpp"
|
|
219
220
221
222
|
#endif /* FSA_HPP */
|