|
1
2
3
4
5
6
7
8
9
10
|
/*
* File: Environment.hpp
* Author: mlenart
*
* Created on 22 styczeń 2014, 12:08
*/
#ifndef ENVIRONMENT_HPP
#define ENVIRONMENT_HPP
|
|
11
|
#include <vector>
|
|
12
|
#include <set>
|
|
13
|
|
|
14
|
#include "case/CaseConverter.hpp"
|
|
15
|
#include "charset/CharsetConverter.hpp"
|
|
16
17
|
#include "fsa/fsa.hpp"
#include "segrules/segrules.hpp"
|
|
18
|
#include "const.hpp"
|
|
19
|
#include "IdResolverImpl.hpp"
|
|
20
|
#include "InterpsGroup.hpp"
|
|
21
22
|
#include "case/CasePatternHelper.hpp"
#include "deserialization/InterpsGroupsReader.hpp"
|
|
23
|
#include "Dictionary.hpp"
|
|
24
|
|
|
25
26
27
28
|
namespace morfeusz {
class InterpretedChunksDecoder;
class CasePatternHelper;
|
|
29
|
struct InterpsGroup;
|
|
30
|
typedef FSA<InterpsGroupsReader> FSAType;
|
|
31
|
|
|
32
33
34
35
36
|
/**
* This class contains data required for morphological analysis/synthesis.
* It contains references to dictionary automaton, charset converter, tagset data etc.
* All of these can be changed by setters, changing Morfeusz behavior (different dictionary, charset, and other options).
*/
|
|
37
38
|
class Environment {
public:
|
|
39
40
41
42
43
44
45
|
/**
* Creates default environment with given initial charset, processor type (analyzer/generator) and default dictionary data ptr.
*
* @param charset
* @param morfeuszProcessor
* @param fileStartPtr
*/
|
|
46
|
Environment(const std::string& dictName, MorfeuszProcessorType morfeuszProcessor, bool usable);
|
|
47
|
|
|
48
49
50
51
52
|
/**
* Sets charset for this environment.
*
* @param charset
*/
|
|
53
|
void setCharset(Charset charset);
|
|
54
|
|
|
55
56
57
58
59
|
/**
* Sets case sensitivity options.
*
* @param caseSensitive - if true, interpretations not matching case will be discarded.
*/
|
|
60
61
|
void setCaseSensitive(bool caseSensitive);
|
|
62
63
64
65
66
67
|
/**
* Gets charset converter that is currently used by this environment.
* Changed by setting charset.
*
* @return - reference to charset converter.
*/
|
|
68
69
|
const CharsetConverter& getCharsetConverter() const;
|
|
70
71
72
73
74
75
|
/**
* Returns case converter that is currently used by this environment.
* Changed by setting case sensitivity option.
*
* @return - reference to case converter.
*/
|
|
76
|
const CaseConverter& getCaseConverter() const;
|
|
77
|
|
|
78
79
80
81
82
83
|
// /**
// * Sets new tagset for this environment.
// *
// * @param tagset
// */
// void setTagset(IdResolverImpl& tagset);
|
|
84
85
86
87
88
89
|
/**
* Gets currently used tagset.
*
* @return
*/
|
|
90
|
const IdResolverImpl& getIdResolver() const;
|
|
91
|
|
|
92
|
/**
|
|
93
|
* Sets dictionary by this environment.
|
|
94
|
*
|
|
95
|
* @param dict - pointer to the dictionary
|
|
96
|
*/
|
|
97
|
void setDictionary(const Dictionary* dict);
|
|
98
|
|
|
99
100
101
102
103
104
|
/**
* Sets segmentation rules option.
*
* @param option
* @param value
*/
|
|
105
106
|
void setSegrulesOption(const std::string& option, const std::string& value);
|
|
107
|
/**
|
|
108
109
110
111
112
113
114
115
|
* Gets current segmentation rules option.
*
* @param option
* @param value
*/
std::string getSegrulesOption(const std::string& option) const;
/**
|
|
116
117
118
119
|
* Gets segmentation rules automaton.
*
* @return
*/
|
|
120
121
|
const SegrulesFSA& getCurrentSegrulesFSA() const;
|
|
122
123
124
125
126
|
/**
* Gets dictionary automaton.
*
* @return
*/
|
|
127
128
|
const FSAType& getFSA() const;
|
|
129
130
131
132
|
/**
* Returns decoder that converts interpretations to external format.
* @return
*/
|
|
133
|
const InterpretedChunksDecoder& getInterpretedChunksDecoder() const;
|
|
134
|
|
|
135
136
137
138
|
/**
* Gets processor type (info if this is analyzer or generator environment)
* @return
*/
|
|
139
140
|
MorfeuszProcessorType getProcessorType() const;
|
|
141
142
143
144
145
|
/**
* Return current case pattern helper
*
* @return
*/
|
|
146
147
|
const CasePatternHelper& getCasePatternHelper() const;
|
|
148
149
150
151
152
|
/**
* Returns true iff given codepoint denotes a separator char for ign handling.
* @param codepoint
* @return
*/
|
|
153
154
|
bool isSeparator(uint32_t codepoint) const;
|
|
155
156
157
158
159
160
161
162
163
164
|
const std::set<std::string>& getAvailableAgglOptions() const;
const std::set<std::string>& getAvailablePraetOptions() const;
/**
* Returns true iff this Environment has dictionary attached to it
* @return
*/
bool isUsable() const;
|
|
165
166
|
virtual ~Environment();
private:
|
|
167
|
bool usable;
|
|
168
169
|
const CharsetConverter* currentCharsetConverter;
const CaseConverter caseConverter;
|
|
170
|
|
|
171
172
|
const Dictionary* dictionary;
IdResolverImpl idResolver;
|
|
173
|
|
|
174
|
SegrulesOptions currSegrulesOptions;
|
|
175
176
177
|
const SegrulesFSA* currSegrulesFSA;
const InterpretedChunksDecoder* chunksDecoder;
|
|
178
|
MorfeuszProcessorType processorType;
|
|
179
|
CasePatternHelper* casePatternHelper;
|
|
180
|
|
|
181
|
const CharsetConverter* getCharsetConverter(Charset charset) const;
|
|
182
183
|
std::string getAvailableOptionsAsString(const std::string& option) const;
|
|
184
185
|
};
|
|
186
187
|
}
|
|
188
189
|
#endif /* ENVIRONMENT_HPP */
|