Morfeusz.hpp
2.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
/*
* File: Morfeusz.hpp
* Author: mlenart
*
* Created on November 13, 2013, 5:21 PM
*/
#ifndef MORFEUSZ_HPP
#define MORFEUSZ_HPP
#include <string>
#include <list>
#include <vector>
#include "EncodedInterpretation.hpp"
#include "fsa.hpp"
#include "MorphInterpretation.hpp"
#include "InterpsGroup.hpp"
#include "charset/CharsetConverter.hpp"
#include "charset/CaseConverter.hpp"
#include "InterpretedChunk.hpp"
#include "FlexionGraph.hpp"
class Morfeusz;
//class AnalyzeResult;
class ResultsIterator;
typedef FSA<std::vector<InterpsGroup >> FSAType;
typedef State<std::vector<InterpsGroup >> StateType;
class Morfeusz {
public:
explicit Morfeusz(const std::string& filename);
virtual ~Morfeusz();
// Morfeusz(const Morfeusz& orig);
ResultsIterator analyze(const std::string& text) const;
void analyze(const std::string& text, std::vector<MorphInterpretation>& result) const;
void processOneWord(
const char*& inputData,
const char* inputEnd,
int startNodeNum,
std::vector<MorphInterpretation>& result) const;
// Morfeusz();
friend class ResultsIterator;
private:
void doProcessOneWord(
const char*& inputData,
const char* inputEnd,
std::vector<InterpretedChunk>& accum,
FlexionGraph& graph) const;
void feedState(
StateType& state,
int codepoint) const;
void appendIgnotiumToResults(
const std::string& word,
int startNodeNum,
std::vector<MorphInterpretation>& results) const;
FSAType* fsa;
CharsetConverter* charsetConverter;
Tagset* tagset;
CaseConverter* caseConverter;
bool caseSensitive;
};
class ResultsIterator {
public:
ResultsIterator(const std::string& text, const Morfeusz& morfeusz);
MorphInterpretation getNext();
bool hasNext();
private:
const char* rawInput;
const Morfeusz& morfeusz;
std::list<MorphInterpretation> resultsBuffer;
int startNode;
};
//class ResultsIterator {
//public:
// ResultsIterator(
// const char* startOfInput,
// const char* endOfInput,
// const Morfeusz& morfeusz);
// virtual ~ResultsIterator();
// ResultsIterator(const ResultsIterator& mit);
// ResultsIterator& operator++();
// ResultsIterator operator++(int);
// bool operator==(const ResultsIterator& rhs);
// bool operator!=(const ResultsIterator& rhs);
// MorphInterpretation& operator*();
//private:
// const char* rawInput;
// const char* endOfInput;
// const Morfeusz& morfeusz;
// vector<MorphInterpretation> resultsBuffer;
//};
//struct AnalyzeResult {
// ResultsIterator iterator;
// const ResultsIterator end;
//};
#endif /* MORFEUSZ_HPP */