Blame view

morfeusz/fsa/fsa.hpp 5.07 KB
Michał Lenart authored
1
2
3
4
5
6
7
8
9
10
11
/* 
 * File:   fsa.hh
 * Author: mlenart
 *
 * Created on October 17, 2013, 2:00 PM
 */

#ifndef FSA_HPP
#define FSA_HPP

//#include <iostream>
Michał Lenart authored
12
#include <cstring>
Michał Lenart authored
13
#include <cassert>
Michał Lenart authored
14
#include <typeinfo>
Michał Lenart authored
15
16
17
#include <exception>
#include <string>
#include <vector>
Michał Lenart authored
18
#include <inttypes.h>
Michał Lenart authored
19
Michał Lenart authored
20
21
#include "morfeusz2.h"
Michał Lenart authored
22
23
namespace morfeusz {
Michał Lenart authored
24
25
26
27
28
29
30
31
32
33
34
35
template <class T> class State;
template <class T> class FSA;
template <class T> class Deserializer;

template <class T>
class Deserializer {
public:

    /**
     * Deserialize object from ptr.
     * Returns number of bytes read or -1 on error.
     */
Michał Lenart authored
36
    virtual long deserialize(const unsigned char* ptr, T& object) const = 0;
Michał Lenart authored
37
    virtual ~Deserializer() {}
Michał Lenart authored
38
39
40
41
42
43
44
45
46
47
48
};

/**
 * Finite state automaton.
 */
template <class T>
class FSA {
public:
    /**
     * Get this automaton's initial state.
     */
Michał Lenart authored
49
    State<T> getInitialState() const;
Michał Lenart authored
50
Michał Lenart authored
51
    bool tryToRecognize(const char* input, T& value) const;
Michał Lenart authored
52
53
54

    virtual ~FSA() {
    }
Michał Lenart authored
55
56
57
58
59
60

    /**
     * Create an FSA object from pointer
     */
    static FSA<T>* getFSA(const unsigned char* ptr, const Deserializer<T>& deserializer);
Michał Lenart authored
61
62
63
64
65
    /**
     * Create an FSA object from file
     */
    static FSA<T>* getFSA(const std::string& filename, const Deserializer<T>& deserializer);
Michał Lenart authored
66
67
protected:
Michał Lenart authored
68
69
70
71
72
    /**
     * Create FSA object for givent initial state pointer and deserializer
     */
    FSA(const unsigned char* initialStatePtr, const Deserializer<T>& deserializer);
Michał Lenart authored
73
74
75
76
    /**
     * Proceed to next state
     */
    virtual void proceedToNext(const char c, State<T>& state) const = 0;
Michał Lenart authored
77
    const unsigned char* initialStatePtr;
Michał Lenart authored
78
79
80
81
82
83
84
85
86
    const Deserializer<T>& deserializer;
    friend class State<T>;
private:
    //    FSA();
};

template <class T>
class SimpleFSA : public FSA<T> {
public:
Michał Lenart authored
87
    SimpleFSA(const unsigned char* ptr, const Deserializer<T>& deserializer, bool isTransducer=false);
Michał Lenart authored
88
89
90
91
    virtual ~SimpleFSA();
protected:
    void proceedToNext(const char c, State<T>& state) const;
private:
Michał Lenart authored
92
    bool isTransducer;
Michał Lenart authored
93
94
95
};

template <class T>
Michał Lenart authored
96
class CompressedFSA1 : public FSA<T> {
Michał Lenart authored
97
public:
Michał Lenart authored
98
99
    CompressedFSA1(const unsigned char* ptr, const Deserializer<T>& deserializer);
    virtual ~CompressedFSA1();
Michał Lenart authored
100
101
102
protected:
    void proceedToNext(const char c, State<T>& state) const;
private:
Michał Lenart authored
103
    const std::vector<unsigned char> label2ShortLabel;
Michał Lenart authored
104
Michał Lenart authored
105
106
107
    std::vector< char > hasTransition;
    std::vector< State<T> > initialTransitions;
Michał Lenart authored
108
    static std::vector<unsigned char> initializeChar2PopularCharIdx(const unsigned char* ptr);
Michał Lenart authored
109
Michał Lenart authored
110
111
112
113
    void initializeInitialTransitions();

    void doProceedToNext(const char c, State<T>& state, bool initial) const;
Michał Lenart authored
114
115
116
    void doProceedToNextByList(
        const char c,
        const unsigned char shortLabel,
Michał Lenart authored
117
118
119
120
121
        const unsigned char* ptr,
        const unsigned int transitionsNum,
        State<T>& state) const;
    void reallyDoProceed(
        const unsigned char* statePtr,
Michał Lenart authored
122
123
        State<T>& state) const;
    void doProceedToNextByArray(
Michał Lenart authored
124
125
126
127
128
129
130
131
132
133
134
        const unsigned char shortLabel,
        const uint32_t* ptr,
        State<T>& state) const;
};


template <class T>
class CompressedFSA2 : public FSA<T> {
public:
    CompressedFSA2(const unsigned char* ptr, const Deserializer<T>& deserializer);
    virtual ~CompressedFSA2();
Michał Lenart authored
135
Michał Lenart authored
136
137
protected:
    void proceedToNext(const char c, State<T>& state) const;
Michał Lenart authored
138
Michał Lenart authored
139
140
141
142
143
144
145
private:

    static std::vector<unsigned char> initializeChar2PopularCharIdx(const unsigned char* ptr);

    void doProceedToNextByList(
        const char c,
        const unsigned char* ptr, 
Michał Lenart authored
146
147
148
        State<T>& state) const;
    void reallyDoProceed(
        const unsigned char* statePtr,
Michał Lenart authored
149
        const bool accepting,
Michał Lenart authored
150
        State<T>& state) const;
Michał Lenart authored
151
152
153
154
155
156
157
158
159
160
161
162
};

/**
 * A state in an FSA.
 */
template <class T>
class State {
public:

    /**
     * Is this a "sink" state - non-accepting state without outgoing transitions
     */
Michał Lenart authored
163
    inline bool isSink() const;
Michał Lenart authored
164
165
166
167

    /**
     * Is this an accepting state
     */
Michał Lenart authored
168
    inline bool isAccepting() const;
Michał Lenart authored
169
170
171
172

    /**
     * Get next state proceeding a transition for given character.
     */
Michał Lenart authored
173
    inline void proceedToNext(const FSA<T>& fsa, const char c);
Michał Lenart authored
174
175
176
177
178
179

    /**
     * Get value of this state.
     * Makes sense only for accepting states.
     * For non-accepting states is throws an exception.
     */
Michał Lenart authored
180
    inline const T& getValue() const;
Michał Lenart authored
181
182
183
184
185
186

    /**
     * Get the size (in bytes) of this state's value.
     * Makes sense only for accepting states.
     * For non-accepting states is throws an exception.
     */
Michał Lenart authored
187
    inline unsigned long getValueSize() const;
Michał Lenart authored
188
Michał Lenart authored
189
    inline unsigned long getOffset() const;
Michał Lenart authored
190
Michał Lenart authored
191
    void setNext(const unsigned long offset);
Michał Lenart authored
192
    void setNext(const unsigned long offset, const T& value, const unsigned long valueSize);
Michał Lenart authored
193
    void setNextAsSink();
Michał Lenart authored
194
Michał Lenart authored
195
196
197
    State();

    static State<T> getSink();
Michał Lenart authored
198
Michał Lenart authored
199
    virtual ~State();
Michał Lenart authored
200
201

    friend class CompressedFSA1<T>;
Michał Lenart authored
202
private:
Michał Lenart authored
203
Michał Lenart authored
204
//    const FSA<T>& fsa;
Michał Lenart authored
205
    unsigned long offset;
Michał Lenart authored
206
207
208
    bool accepting;
    bool sink;
    T value;
Michał Lenart authored
209
    long valueSize;
Michał Lenart authored
210
211
};
Michał Lenart authored
212
213
}
Michał Lenart authored
214
215
216
217
218
#include "state_impl.hpp"
#include "fsa_impl.hpp"
#include "simplefsa_impl.hpp"
#include "cfsa1_impl.hpp"
#include "cfsa2_impl.hpp"
Michał Lenart authored
219
220
221
222

#endif	/* FSA_HPP */