|
1
2
3
4
5
6
7
8
9
10
|
/*
* File: _simple_fsa_impl.hpp
* Author: mlenart
*
* Created on October 20, 2013, 12:25 PM
*/
#ifndef _SIMPLE_FSA_IMPL_HPP
#define _SIMPLE_FSA_IMPL_HPP
|
|
11
|
#include <cstring>
|
|
12
13
14
|
#include <algorithm>
#include <utility>
#include <iostream>
|
|
15
|
#include <vector>
|
|
16
|
#include <string>
|
|
17
|
#include <sstream>
|
|
18
|
#include "const.hpp"
|
|
19
20
|
#include "utils.hpp"
#include "deserialization/endianness.hpp"
|
|
21
|
|
|
22
23
|
namespace morfeusz {
|
|
24
25
26
27
28
29
30
31
|
template <class T>
bool FSA<T>::tryToRecognize(const char* input, T& value) const {
State<T> currState = this->getInitialState();
int i = 0;
while (!currState.isSink() && input[i] != '\0') {
#ifdef DEBUG_BUILD
cerr << "proceed to next " << input[i] << endl;
#endif
|
|
32
|
currState.proceedToNext(this, input[i]);
|
|
33
34
35
36
37
38
39
|
i++;
}
// input[i] == '\0'
// currState.proceedToNext(0);
if (currState.isAccepting()) {
value = currState.getValue();
|
|
40
|
// DEBUG(string("recognized: ")+input);
|
|
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
return true;
} else {
return false;
}
}
template <class T>
FSA<T>::FSA(const unsigned char* initialStatePtr, const Deserializer<T>& deserializer)
: initialStatePtr(initialStatePtr), deserializer(deserializer) {
}
template <class T>
State<T> FSA<T>::getInitialState() const {
|
|
55
|
return State<T>();
|
|
56
57
58
|
}
template <class T>
|
|
59
|
FSA<T>* FSA<T>::getFSA(const std::string& filename, const Deserializer<T>& deserializer) {
|
|
60
|
return getFSA(readFile<unsigned char>(filename.c_str()), deserializer);
|
|
61
62
63
|
}
template <class T>
|
|
64
65
|
FSA<T>* FSA<T>::getFSA(const unsigned char* ptr, const Deserializer<T>& deserializer) {
|
|
66
|
uint32_t magicNumber = ntohl(*((const uint32_t*) ptr));
|
|
67
|
if (magicNumber != MAGIC_NUMBER) {
|
|
68
|
throw FileFormatException("Invalid file format");
|
|
69
70
71
72
|
}
uint8_t versionNum = *(ptr + VERSION_NUM_OFFSET);
if (versionNum != VERSION_NUM) {
|
|
73
|
std::ostringstream oss;
|
|
74
|
oss << "Invalid file format version number: " << (int) versionNum << ", should be: " << (int) VERSION_NUM;
|
|
75
|
throw FileFormatException(oss.str());
|
|
76
77
78
|
}
uint8_t implementationNum = *(ptr + IMPLEMENTATION_NUM_OFFSET);
|
|
79
|
|
|
80
|
const unsigned char* startPtr = ptr + FSA_DATA_OFFSET;
|
|
81
82
83
84
85
86
87
88
|
switch (implementationNum) {
case 0:
return new SimpleFSA<T>(startPtr, deserializer);
case 1:
return new CompressedFSA1<T>(startPtr, deserializer);
case 2:
return new CompressedFSA2<T>(startPtr, deserializer);
default:
|
|
89
90
|
std::ostringstream oss;
oss << "Invalid implementation number: " << versionNum << ", should be: " << VERSION_NUM;
|
|
91
|
throw FileFormatException(oss.str());
|
|
92
93
94
|
}
}
|
|
95
96
|
}
|
|
97
|
#endif /* _SIMPLE_FSA_IMPL_HPP */
|