Blame view

morfeusz/fsa/fsa.hpp 5.17 KB
Michał Lenart authored
1
2
3
4
5
6
7
8
9
10
11
/* 
 * File:   fsa.hh
 * Author: mlenart
 *
 * Created on October 17, 2013, 2:00 PM
 */

#ifndef FSA_HPP
#define FSA_HPP

//#include <iostream>
Michał Lenart authored
12
#include <cstring>
Michał Lenart authored
13
#include <cassert>
Michał Lenart authored
14
#include <typeinfo>
Michał Lenart authored
15
16
17
#include <exception>
#include <string>
#include <vector>
Michał Lenart authored
18
#include <inttypes.h>
Michał Lenart authored
19
20
21
22

template <class T> class State;
template <class T> class FSA;
template <class T> class Deserializer;
Michał Lenart authored
23
class FSAException;
Michał Lenart authored
24
25
26
27
28
29
30
31
32

template <class T>
class Deserializer {
public:

    /**
     * Deserialize object from ptr.
     * Returns number of bytes read or -1 on error.
     */
Michał Lenart authored
33
    virtual long deserialize(const unsigned char* ptr, T& object) const = 0;
Michał Lenart authored
34
    virtual ~Deserializer() {}
Michał Lenart authored
35
36
37
38
39
40
41
42
43
44
45
};

/**
 * Finite state automaton.
 */
template <class T>
class FSA {
public:
    /**
     * Get this automaton's initial state.
     */
Michał Lenart authored
46
    State<T> getInitialState() const;
Michał Lenart authored
47
Michał Lenart authored
48
    bool tryToRecognize(const char* input, T& value) const;
Michał Lenart authored
49
50
51

    virtual ~FSA() {
    }
Michał Lenart authored
52
53
54
55
56
57

    /**
     * Create an FSA object from pointer
     */
    static FSA<T>* getFSA(const unsigned char* ptr, const Deserializer<T>& deserializer);
Michał Lenart authored
58
59
60
61
62
    /**
     * Create an FSA object from file
     */
    static FSA<T>* getFSA(const std::string& filename, const Deserializer<T>& deserializer);
Michał Lenart authored
63
64
protected:
Michał Lenart authored
65
66
67
68
69
    /**
     * Create FSA object for givent initial state pointer and deserializer
     */
    FSA(const unsigned char* initialStatePtr, const Deserializer<T>& deserializer);
Michał Lenart authored
70
71
72
73
    /**
     * Proceed to next state
     */
    virtual void proceedToNext(const char c, State<T>& state) const = 0;
Michał Lenart authored
74
    const unsigned char* initialStatePtr;
Michał Lenart authored
75
76
77
78
79
80
81
82
83
    const Deserializer<T>& deserializer;
    friend class State<T>;
private:
    //    FSA();
};

template <class T>
class SimpleFSA : public FSA<T> {
public:
Michał Lenart authored
84
    SimpleFSA(const unsigned char* ptr, const Deserializer<T>& deserializer, bool isTransducer=false);
Michał Lenart authored
85
86
87
88
    virtual ~SimpleFSA();
protected:
    void proceedToNext(const char c, State<T>& state) const;
private:
Michał Lenart authored
89
    bool isTransducer;
Michał Lenart authored
90
91
92
};

template <class T>
Michał Lenart authored
93
class CompressedFSA1 : public FSA<T> {
Michał Lenart authored
94
public:
Michał Lenart authored
95
96
    CompressedFSA1(const unsigned char* ptr, const Deserializer<T>& deserializer);
    virtual ~CompressedFSA1();
Michał Lenart authored
97
98
99
protected:
    void proceedToNext(const char c, State<T>& state) const;
private:
Michał Lenart authored
100
    const std::vector<unsigned char> label2ShortLabel;
Michał Lenart authored
101
Michał Lenart authored
102
    static std::vector<unsigned char> initializeChar2PopularCharIdx(const unsigned char* ptr);
Michał Lenart authored
103
Michał Lenart authored
104
105
106
    void doProceedToNextByList(
        const char c,
        const unsigned char shortLabel,
Michał Lenart authored
107
108
109
110
111
        const unsigned char* ptr,
        const unsigned int transitionsNum,
        State<T>& state) const;
    void reallyDoProceed(
        const unsigned char* statePtr,
Michał Lenart authored
112
113
        State<T>& state) const;
    void doProceedToNextByArray(
Michał Lenart authored
114
115
116
117
118
119
120
121
122
123
124
        const unsigned char shortLabel,
        const uint32_t* ptr,
        State<T>& state) const;
};


template <class T>
class CompressedFSA2 : public FSA<T> {
public:
    CompressedFSA2(const unsigned char* ptr, const Deserializer<T>& deserializer);
    virtual ~CompressedFSA2();
Michał Lenart authored
125
Michał Lenart authored
126
127
protected:
    void proceedToNext(const char c, State<T>& state) const;
Michał Lenart authored
128
Michał Lenart authored
129
130
131
132
133
134
135
private:

    static std::vector<unsigned char> initializeChar2PopularCharIdx(const unsigned char* ptr);

    void doProceedToNextByList(
        const char c,
        const unsigned char* ptr, 
Michał Lenart authored
136
137
138
        State<T>& state) const;
    void reallyDoProceed(
        const unsigned char* statePtr,
Michał Lenart authored
139
        const bool accepting,
Michał Lenart authored
140
        State<T>& state) const;
Michał Lenart authored
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
};

/**
 * A state in an FSA.
 */
template <class T>
class State {
public:

    /**
     * Is this a "sink" state - non-accepting state without outgoing transitions
     */
    bool isSink() const;

    /**
     * Is this an accepting state
     */
    bool isAccepting() const;

    /**
     * Get next state proceeding a transition for given character.
     */
    void proceedToNext(const char c);

    /**
     * Get value of this state.
     * Makes sense only for accepting states.
     * For non-accepting states is throws an exception.
     */
    T getValue() const;
Michał Lenart authored
171
172
173
174

    unsigned char getLastTransitionValue() const;

    void setLastTransitionValue(unsigned char val);
Michał Lenart authored
175
176
177
178
179
180

    /**
     * Get the size (in bytes) of this state's value.
     * Makes sense only for accepting states.
     * For non-accepting states is throws an exception.
     */
Michał Lenart authored
181
    unsigned long getValueSize() const;
Michał Lenart authored
182
Michał Lenart authored
183
    unsigned long getOffset() const;
Michał Lenart authored
184
Michał Lenart authored
185
    void setNext(const unsigned long offset);
Michał Lenart authored
186
    void setNext(const unsigned long offset, const T& value, const unsigned long valueSize);
Michał Lenart authored
187
    void setNextAsSink();
Michał Lenart authored
188
Michał Lenart authored
189
    explicit State(const FSA<T>& fsa);
Michał Lenart authored
190
Michał Lenart authored
191
192
193
    virtual ~State();
private:
    const FSA<T>& fsa;
Michał Lenart authored
194
    unsigned long offset;
Michał Lenart authored
195
196
197
    bool accepting;
    bool sink;
    T value;
Michał Lenart authored
198
    long valueSize;
Michał Lenart authored
199
    unsigned char lastTransitionValue;
Michał Lenart authored
200
201
};
Michał Lenart authored
202
203
204
class FSAException : public std::exception {
public:
    FSAException(const char* what): msg(what) {}
Michał Lenart authored
205
    FSAException(const std::string& what): msg(what.c_str()) {}
Michał Lenart authored
206
207
208
209
210
211
212
213
    virtual ~FSAException() throw() {}
    virtual const char* what() const throw () {
        return this->msg.c_str();
    }
private:
    const std::string msg;
};
Michał Lenart authored
214
215
216
217
218
#include "state_impl.hpp"
#include "fsa_impl.hpp"
#include "simplefsa_impl.hpp"
#include "cfsa1_impl.hpp"
#include "cfsa2_impl.hpp"
Michał Lenart authored
219
220
221
222

#endif	/* FSA_HPP */