|
1
2
3
4
5
6
7
8
9
10
|
/*
* File: Environment.hpp
* Author: mlenart
*
* Created on 22 styczeń 2014, 12:08
*/
#ifndef ENVIRONMENT_HPP
#define ENVIRONMENT_HPP
|
|
11
|
#include <vector>
|
|
12
|
#include <set>
|
|
13
|
|
|
14
|
#include "case/CaseConverter.hpp"
|
|
15
|
#include "charset/CharsetConverter.hpp"
|
|
16
17
|
#include "fsa/fsa.hpp"
#include "segrules/segrules.hpp"
|
|
18
|
#include "const.hpp"
|
|
19
|
#include "IdResolverImpl.hpp"
|
|
20
|
#include "InterpsGroup.hpp"
|
|
21
22
|
#include "case/CasePatternHelper.hpp"
#include "deserialization/InterpsGroupsReader.hpp"
|
|
23
|
#include "Dictionary.hpp"
|
|
24
|
|
|
25
26
27
28
|
namespace morfeusz {
class InterpretedChunksDecoder;
class CasePatternHelper;
|
|
29
|
struct InterpsGroup;
|
|
30
|
typedef FSA<InterpsGroupsReader> FSAType;
|
|
31
|
|
|
32
33
34
35
36
|
/**
* This class contains data required for morphological analysis/synthesis.
* It contains references to dictionary automaton, charset converter, tagset data etc.
* All of these can be changed by setters, changing Morfeusz behavior (different dictionary, charset, and other options).
*/
|
|
37
38
|
class Environment {
public:
|
|
39
40
41
42
43
44
45
|
/**
* Creates default environment with given initial charset, processor type (analyzer/generator) and default dictionary data ptr.
*
* @param charset
* @param morfeuszProcessor
* @param fileStartPtr
*/
|
|
46
|
explicit Environment(MorfeuszProcessorType morfeuszProcessor, bool usable);
|
|
47
|
|
|
48
49
50
51
52
|
/**
* Sets charset for this environment.
*
* @param charset
*/
|
|
53
|
void setCharset(Charset charset);
|
|
54
|
|
|
55
56
57
58
59
|
/**
* Sets case sensitivity options.
*
* @param caseSensitive - if true, interpretations not matching case will be discarded.
*/
|
|
60
61
|
void setCaseSensitive(bool caseSensitive);
|
|
62
63
64
65
66
67
|
/**
* Gets charset converter that is currently used by this environment.
* Changed by setting charset.
*
* @return - reference to charset converter.
*/
|
|
68
69
|
const CharsetConverter& getCharsetConverter() const;
|
|
70
71
72
73
74
75
|
/**
* Returns case converter that is currently used by this environment.
* Changed by setting case sensitivity option.
*
* @return - reference to case converter.
*/
|
|
76
|
const CaseConverter& getCaseConverter() const;
|
|
77
|
|
|
78
79
80
81
82
83
|
// /**
// * Sets new tagset for this environment.
// *
// * @param tagset
// */
// void setTagset(IdResolverImpl& tagset);
|
|
84
85
86
87
88
89
|
/**
* Gets currently used tagset.
*
* @return
*/
|
|
90
|
const IdResolverImpl& getIdResolver() const;
|
|
91
|
|
|
92
|
/**
|
|
93
|
* Sets dictionary by this environment.
|
|
94
|
*
|
|
95
|
* @param dict - pointer to the dictionary
|
|
96
|
*/
|
|
97
|
void setDictionary(const Dictionary* dict);
|
|
98
|
|
|
99
100
101
102
103
104
|
/**
* Sets segmentation rules option.
*
* @param option
* @param value
*/
|
|
105
106
|
void setSegrulesOption(const std::string& option, const std::string& value);
|
|
107
108
109
110
111
|
/**
* Gets segmentation rules automaton.
*
* @return
*/
|
|
112
113
|
const SegrulesFSA& getCurrentSegrulesFSA() const;
|
|
114
115
116
117
118
|
/**
* Gets dictionary automaton.
*
* @return
*/
|
|
119
120
|
const FSAType& getFSA() const;
|
|
121
122
123
124
|
/**
* Returns decoder that converts interpretations to external format.
* @return
*/
|
|
125
|
const InterpretedChunksDecoder& getInterpretedChunksDecoder() const;
|
|
126
|
|
|
127
128
129
130
|
/**
* Gets processor type (info if this is analyzer or generator environment)
* @return
*/
|
|
131
132
|
MorfeuszProcessorType getProcessorType() const;
|
|
133
134
135
136
137
|
/**
* Return current case pattern helper
*
* @return
*/
|
|
138
139
|
const CasePatternHelper& getCasePatternHelper() const;
|
|
140
141
142
143
144
|
/**
* Returns true iff given codepoint denotes a separator char for ign handling.
* @param codepoint
* @return
*/
|
|
145
146
|
bool isSeparator(uint32_t codepoint) const;
|
|
147
148
149
150
151
152
153
154
155
156
|
const std::set<std::string>& getAvailableAgglOptions() const;
const std::set<std::string>& getAvailablePraetOptions() const;
/**
* Returns true iff this Environment has dictionary attached to it
* @return
*/
bool isUsable() const;
|
|
157
158
|
virtual ~Environment();
private:
|
|
159
|
bool usable;
|
|
160
161
|
const CharsetConverter* currentCharsetConverter;
const CaseConverter caseConverter;
|
|
162
|
|
|
163
164
165
166
167
168
169
170
|
const Dictionary* dictionary;
IdResolverImpl idResolver;
// IdResolverImpl tagset;
//
// const unsigned char* fsaFileStartPtr;
// const FSAType* fsa;
// std::vector<uint32_t> separatorsList;
// std::map<SegrulesOptions, SegrulesFSA*> segrulesFSAsMap;
|
|
171
|
SegrulesOptions currSegrulesOptions;
|
|
172
|
const SegrulesFSA* currSegrulesFSA;
|
|
173
|
// bool isFromFile;
|
|
174
175
|
const InterpretedChunksDecoder* chunksDecoder;
|
|
176
|
MorfeuszProcessorType processorType;
|
|
177
|
CasePatternHelper* casePatternHelper;
|
|
178
|
|
|
179
|
const CharsetConverter* getCharsetConverter(Charset charset) const;
|
|
180
181
|
std::string getAvailableOptionsAsString(const std::string& option) const;
|
|
182
183
|
};
|
|
184
185
|
}
|
|
186
187
|
#endif /* ENVIRONMENT_HPP */
|