|
1
2
3
4
5
6
7
8
9
10
11
|
/*
* File: fsa.hh
* Author: mlenart
*
* Created on October 17, 2013, 2:00 PM
*/
#ifndef FSA_HPP
#define FSA_HPP
//#include <iostream>
|
|
12
|
#include <cstring>
|
|
13
|
#include <cassert>
|
|
14
|
#include <typeinfo>
|
|
15
16
17
|
#include <exception>
#include <string>
#include <vector>
|
|
18
|
#include <inttypes.h>
|
|
19
|
|
|
20
21
|
#include "morfeusz2.h"
|
|
22
23
|
namespace morfeusz {
|
|
24
25
26
27
28
29
30
31
32
33
34
35
|
template <class T> class State;
template <class T> class FSA;
template <class T> class Deserializer;
template <class T>
class Deserializer {
public:
/**
* Deserialize object from ptr.
* Returns number of bytes read or -1 on error.
*/
|
|
36
|
virtual long deserialize(const unsigned char* ptr, T& object) const = 0;
|
|
37
|
virtual ~Deserializer() {}
|
|
38
39
40
41
42
43
44
45
46
47
48
|
};
/**
* Finite state automaton.
*/
template <class T>
class FSA {
public:
/**
* Get this automaton's initial state.
*/
|
|
49
|
State<T> getInitialState() const;
|
|
50
|
|
|
51
|
bool tryToRecognize(const char* input, T& value) const;
|
|
52
53
54
|
virtual ~FSA() {
}
|
|
55
56
57
58
59
60
|
/**
* Create an FSA object from pointer
*/
static FSA<T>* getFSA(const unsigned char* ptr, const Deserializer<T>& deserializer);
|
|
61
62
63
64
65
|
/**
* Create an FSA object from file
*/
static FSA<T>* getFSA(const std::string& filename, const Deserializer<T>& deserializer);
|
|
66
67
|
protected:
|
|
68
69
70
71
72
|
/**
* Create FSA object for givent initial state pointer and deserializer
*/
FSA(const unsigned char* initialStatePtr, const Deserializer<T>& deserializer);
|
|
73
74
75
76
|
/**
* Proceed to next state
*/
virtual void proceedToNext(const char c, State<T>& state) const = 0;
|
|
77
|
const unsigned char* initialStatePtr;
|
|
78
79
80
81
82
83
84
85
86
|
const Deserializer<T>& deserializer;
friend class State<T>;
private:
// FSA();
};
template <class T>
class SimpleFSA : public FSA<T> {
public:
|
|
87
|
SimpleFSA(const unsigned char* ptr, const Deserializer<T>& deserializer, bool isTransducer=false);
|
|
88
89
90
91
|
virtual ~SimpleFSA();
protected:
void proceedToNext(const char c, State<T>& state) const;
private:
|
|
92
|
bool isTransducer;
|
|
93
94
95
|
};
template <class T>
|
|
96
|
class CompressedFSA1 : public FSA<T> {
|
|
97
|
public:
|
|
98
99
|
CompressedFSA1(const unsigned char* ptr, const Deserializer<T>& deserializer);
virtual ~CompressedFSA1();
|
|
100
101
102
|
protected:
void proceedToNext(const char c, State<T>& state) const;
private:
|
|
103
|
const std::vector<unsigned char> label2ShortLabel;
|
|
104
|
|
|
105
106
107
|
std::vector< char > hasTransition;
std::vector< State<T> > initialTransitions;
|
|
108
|
static std::vector<unsigned char> initializeChar2PopularCharIdx(const unsigned char* ptr);
|
|
109
|
|
|
110
111
112
113
|
void initializeInitialTransitions();
void doProceedToNext(const char c, State<T>& state, bool initial) const;
|
|
114
115
116
|
void doProceedToNextByList(
const char c,
const unsigned char shortLabel,
|
|
117
118
119
120
121
|
const unsigned char* ptr,
const unsigned int transitionsNum,
State<T>& state) const;
void reallyDoProceed(
const unsigned char* statePtr,
|
|
122
123
|
State<T>& state) const;
void doProceedToNextByArray(
|
|
124
125
126
127
128
129
130
131
132
133
134
|
const unsigned char shortLabel,
const uint32_t* ptr,
State<T>& state) const;
};
template <class T>
class CompressedFSA2 : public FSA<T> {
public:
CompressedFSA2(const unsigned char* ptr, const Deserializer<T>& deserializer);
virtual ~CompressedFSA2();
|
|
135
|
|
|
136
137
|
protected:
void proceedToNext(const char c, State<T>& state) const;
|
|
138
|
|
|
139
140
141
142
143
144
145
|
private:
static std::vector<unsigned char> initializeChar2PopularCharIdx(const unsigned char* ptr);
void doProceedToNextByList(
const char c,
const unsigned char* ptr,
|
|
146
147
148
|
State<T>& state) const;
void reallyDoProceed(
const unsigned char* statePtr,
|
|
149
|
const bool accepting,
|
|
150
|
State<T>& state) const;
|
|
151
152
153
154
155
156
157
158
159
160
161
162
|
};
/**
* A state in an FSA.
*/
template <class T>
class State {
public:
/**
* Is this a "sink" state - non-accepting state without outgoing transitions
*/
|
|
163
|
inline bool isSink() const;
|
|
164
165
166
167
|
/**
* Is this an accepting state
*/
|
|
168
|
inline bool isAccepting() const;
|
|
169
170
171
172
|
/**
* Get next state proceeding a transition for given character.
*/
|
|
173
|
inline void proceedToNext(const FSA<T>& fsa, const char c);
|
|
174
175
176
177
178
179
|
/**
* Get value of this state.
* Makes sense only for accepting states.
* For non-accepting states is throws an exception.
*/
|
|
180
|
inline const T& getValue() const;
|
|
181
182
183
184
185
186
|
/**
* Get the size (in bytes) of this state's value.
* Makes sense only for accepting states.
* For non-accepting states is throws an exception.
*/
|
|
187
|
inline unsigned long getValueSize() const;
|
|
188
|
|
|
189
|
inline unsigned long getOffset() const;
|
|
190
|
|
|
191
|
void setNext(const unsigned long offset);
|
|
192
|
void setNext(const unsigned long offset, const T& value, const unsigned long valueSize);
|
|
193
|
void setNextAsSink();
|
|
194
|
|
|
195
196
197
|
State();
static State<T> getSink();
|
|
198
|
|
|
199
|
virtual ~State();
|
|
200
201
|
friend class CompressedFSA1<T>;
|
|
202
|
private:
|
|
203
|
|
|
204
|
// const FSA<T>& fsa;
|
|
205
|
unsigned long offset;
|
|
206
207
208
|
bool accepting;
bool sink;
T value;
|
|
209
|
long valueSize;
|
|
210
211
|
};
|
|
212
213
|
}
|
|
214
215
216
217
218
|
#include "state_impl.hpp"
#include "fsa_impl.hpp"
#include "simplefsa_impl.hpp"
#include "cfsa1_impl.hpp"
#include "cfsa2_impl.hpp"
|
|
219
220
221
222
|
#endif /* FSA_HPP */
|