Morfeusz.hpp
2.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/*
* File: Morfeusz.hpp
* Author: mlenart
*
* Created on November 13, 2013, 5:21 PM
*/
#ifndef MORFEUSZ_HPP
#define MORFEUSZ_HPP
#include <string>
#include <list>
#include <vector>
#include "EncodedInterpretation.hpp"
#include "fsa/fsa.hpp"
#include "MorphInterpretation.hpp"
#include "InterpsGroup.hpp"
#include "charset/CharsetConverter.hpp"
#include "charset/CaseConverter.hpp"
#include "InterpretedChunk.hpp"
#include "FlexionGraph.hpp"
#include "MorfeuszOptions.hpp"
#include "const.hpp"
class Morfeusz;
class ResultsIterator;
typedef FSA<std::vector<InterpsGroup > > FSAType;
typedef State<std::vector<InterpsGroup > > StateType;
class MorfeuszException : public std::exception {
public:
MorfeuszException(const std::string& what) : msg(what.c_str()) {
}
virtual ~MorfeuszException() throw () {
}
virtual const char* what() const throw () {
return this->msg.c_str();
}
private:
const std::string msg;
};
class Morfeusz {
public:
Morfeusz();
explicit Morfeusz(const std::string& filename);
virtual ~Morfeusz();
// Morfeusz(const Morfeusz& orig);
ResultsIterator analyze(const std::string& text) const;
void analyze(const std::string& text, std::vector<MorphInterpretation>& result) const;
void setEncoding(MorfeuszCharset encoding);
// Morfeusz();
friend class ResultsIterator;
private:
void processOneWord(
const char*& inputData,
const char* inputEnd,
int startNodeNum,
std::vector<MorphInterpretation>& result) const;
void doProcessOneWord(
const char*& inputData,
const char* inputEnd,
std::vector<InterpretedChunk>& accum,
FlexionGraph& graph) const;
void feedState(
StateType& state,
int codepoint) const;
void appendIgnotiumToResults(
const std::string& word,
int startNodeNum,
std::vector<MorphInterpretation>& results) const;
FSAType* fsa;
CharsetConverter* charsetConverter;
Tagset* tagset;
CaseConverter* caseConverter;
UTF8CharsetConverter utf8CharsetConverter;
MorfeuszOptions options;
};
class ResultsIterator {
public:
MorphInterpretation getNext();
bool hasNext();
friend class Morfeusz;
private:
ResultsIterator(const std::string& text, const Morfeusz& morfeusz);
const char* rawInput;
const Morfeusz& morfeusz;
std::list<MorphInterpretation> resultsBuffer;
int startNode;
};
#endif /* MORFEUSZ_HPP */