Morfeusz.hpp
3.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
/*
* File: Morfeusz.hpp
* Author: mlenart
*
* Created on November 13, 2013, 5:21 PM
*/
#ifndef MORFEUSZ_HPP
#define MORFEUSZ_HPP
#include <string>
#include <list>
#include <vector>
#include <map>
#include "EncodedInterpretation.hpp"
#include "fsa/fsa.hpp"
#include "MorphInterpretation.hpp"
#include "InterpsGroup.hpp"
#include "charset/CharsetConverter.hpp"
#include "charset/CaseConverter.hpp"
#include "InterpretedChunk.hpp"
#include "FlexionGraph.hpp"
#include "MorfeuszOptions.hpp"
#include "const.hpp"
#include "exceptions.hpp"
#include "Generator.hpp"
#include "Environment.hpp"
#include "segrules/segrules.hpp"
class Morfeusz;
class ResultsIterator;
typedef FSA< std::vector<InterpsGroup > > FSAType;
typedef State< std::vector<InterpsGroup > > StateType;
class Morfeusz {
public:
/**
* Create new instance of Morfeusz class.
*/
Morfeusz();
/**
* Set a file used for morphological analysis.
*
* @param filename
*/
void setAnalyzerFile(const std::string& filename);
/**
* Set a file used for morphological synthesis.
*
* @param filename
*/
void setGeneratorFile(const std::string& filename);
virtual ~Morfeusz();
/**
* Analyze given text and return the results as iterator.
*
* @param text - text for morphological analysis
* @return - iterator over morphological analysis results
*/
ResultsIterator analyze(const std::string& text) const;
/**
* Perform morphological analysis on a given text and put results in a vector.
*
* @param text - text to be analyzed
* @param result - results vector
*/
void analyze(const std::string& text, std::vector<MorphInterpretation>& result) const;
/**
* Perform morphological synthesis on a given lemma and return the results as iterator.
*
* @param text - text for morphological analysis
* @return - iterator over morphological analysis results
*/
ResultsIterator generate(const std::string& lemma) const;
/**
* Perform morphological synthesis on a given lemma and put results in a vector.
*
* @param lemma - lemma to be analyzed
* @param result - results vector
*/
void generate(const std::string& lemma, std::vector<MorphInterpretation>& result) const;
/**
* Set encoding for input and output string objects.
*
* @param encoding
*/
void setCharset(MorfeuszCharset encoding);
friend class ResultsIterator;
private:
void analyzeOneWord(
const char*& inputData,
const char* inputEnd,
int startNodeNum,
std::vector<MorphInterpretation>& result) const;
void doAnalyzeOneWord(
const char*& inputData,
const char* inputEnd,
std::vector<InterpretedChunk>& accum,
FlexionGraph& graph,
SegrulesStateType segrulesState) const;
void appendIgnotiumToResults(
const std::string& word,
int startNodeNum,
std::vector<MorphInterpretation>& results) const;
Environment env;
const unsigned char* analyzerPtr;
FSAType* analyzerFSA;
std::map<SegrulesOptions, SegrulesFSAType*> segrulesFSAsMap;
bool isAnalyzerFSAFromFile;
const unsigned char* generatorPtr;
bool isGeneratorFSAFromFile;
Generator generator;
// const CharsetConverter* charsetConverter;
// const Tagset* tagset;
// const CaseConverter* caseConverter;
//
// UTF8CharsetConverter utf8CharsetConverter;
MorfeuszOptions options;
};
class ResultsIterator {
public:
MorphInterpretation getNext();
bool hasNext();
friend class Morfeusz;
private:
ResultsIterator(vector<MorphInterpretation>& res);
const char* rawInput;
std::list<MorphInterpretation> resultsBuffer;
int startNode;
};
#endif /* MORFEUSZ_HPP */