Morfeusz.hpp
2.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/*
* File: Morfeusz.hpp
* Author: mlenart
*
* Created on November 13, 2013, 5:21 PM
*/
#ifndef MORFEUSZ_HPP
#define MORFEUSZ_HPP
#include <string>
#include <list>
#include <vector>
#include <map>
#include "EncodedInterpretation.hpp"
#include "fsa/fsa.hpp"
#include "MorphInterpretation.hpp"
#include "InterpsGroup.hpp"
#include "charset/CharsetConverter.hpp"
#include "charset/CaseConverter.hpp"
#include "InterpretedChunk.hpp"
#include "FlexionGraph.hpp"
#include "MorfeuszOptions.hpp"
#include "const.hpp"
#include "exceptions.hpp"
#include "Generator.hpp"
#include "Environment.hpp"
#include "segrules/segrules.hpp"
class Morfeusz;
class ResultsIterator;
typedef FSA< std::vector<InterpsGroup > > FSAType;
typedef State< std::vector<InterpsGroup > > StateType;
class Morfeusz {
public:
Morfeusz();
// explicit Morfeusz(const std::string& filename);
void setAnalyzerFile(const std::string& filename);
void setSynthesizerFile(const std::string& filename);
virtual ~Morfeusz();
// Morfeusz(const Morfeusz& orig);
ResultsIterator analyze(const std::string& text) const;
void analyze(const std::string& text, std::vector<MorphInterpretation>& result) const;
void generate(const std::string& lemma, std::vector<MorphInterpretation>& result) const;
ResultsIterator generate(const std::string& lemma) const;
void setCharset(MorfeuszCharset encoding);
// Morfeusz();
friend class ResultsIterator;
private:
void analyzeOneWord(
const char*& inputData,
const char* inputEnd,
int startNodeNum,
std::vector<MorphInterpretation>& result) const;
void doAnalyzeOneWord(
const char*& inputData,
const char* inputEnd,
std::vector<InterpretedChunk>& accum,
FlexionGraph& graph,
SegrulesStateType segrulesState) const;
void appendIgnotiumToResults(
const std::string& word,
int startNodeNum,
std::vector<MorphInterpretation>& results) const;
Environment env;
const unsigned char* analyzerPtr;
FSAType* analyzerFSA;
std::map<SegrulesOptions, SegrulesFSAType*> segrulesFSAsMap;
bool isAnalyzerFSAFromFile;
Generator generator;
// const CharsetConverter* charsetConverter;
// const Tagset* tagset;
// const CaseConverter* caseConverter;
//
// UTF8CharsetConverter utf8CharsetConverter;
MorfeuszOptions options;
};
class ResultsIterator {
public:
MorphInterpretation getNext();
bool hasNext();
friend class Morfeusz;
private:
ResultsIterator(vector<MorphInterpretation>& res);
const char* rawInput;
std::list<MorphInterpretation> resultsBuffer;
int startNode;
};
#endif /* MORFEUSZ_HPP */