|
1
2
3
4
5
6
7
|
/*
* File: Environment.cpp
* Author: mlenart
*
* Created on 22 styczeń 2014, 12:08
*/
|
|
8
9
|
#include <vector>
#include <algorithm>
|
|
10
11
|
#include <string>
#include "DictionariesRepository.hpp"
|
|
12
|
#include "Environment.hpp"
|
|
13
14
15
16
|
#include "deserialization/MorphDeserializer.hpp"
#include "deserialization/morphInterps/InterpretedChunksDecoder.hpp"
#include "deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.hpp"
#include "deserialization/morphInterps/InterpretedChunksDecoder4Generator.hpp"
|
|
17
|
#include "DictionariesRepository.hpp"
|
|
18
|
|
|
19
|
namespace morfeusz {
|
|
20
|
|
|
21
22
|
using namespace std;
|
|
23
|
Environment::Environment(const string& dictName, MorfeuszProcessorType processorType, bool usable)
|
|
24
25
|
: usable(usable),
currentCharsetConverter(getCharsetConverter(DEFAULT_MORFEUSZ_CHARSET)),
|
|
26
|
caseConverter(),
|
|
27
28
29
|
dictionary(usable
? DictionariesRepository::getInstance().getDictionary(dictName, processorType)
: Dictionary::getEmpty()),
|
|
30
31
32
33
34
35
36
37
38
|
idResolver(dictionary->idResolver),
currSegrulesOptions(dictionary->defaultSegrulesOptions),
currSegrulesFSA(dictionary->defaultSegrulesFSA),
chunksDecoder(
processorType == ANALYZER
? (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Analyzer(*this)
: (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Generator(*this)),
processorType(processorType),
casePatternHelper(new CasePatternHelper()) {
|
|
39
40
|
}
|
|
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
const CharsetConverter* Environment::getCharsetConverter(Charset charset) const {
switch (charset) {
case UTF8:
return &UTF8CharsetConverter::getInstance();
case ISO8859_2:
return &ISO8859_2_CharsetConverter::getInstance();
case CP1250:
return &Windows_1250_CharsetConverter::getInstance();
case CP852:
return &CP852_CharsetConverter::getInstance();
default:
throw MorfeuszException("invalid charset");
}
}
|
|
55
|
|
|
56
57
58
|
Environment::~Environment() {
delete this->chunksDecoder;
delete this->casePatternHelper;
|
|
59
60
|
}
|
|
61
62
63
|
void Environment::setCharset(Charset charset) {
this->currentCharsetConverter = this->getCharsetConverter(charset);
this->idResolver.setCharsetConverter(currentCharsetConverter);
|
|
64
|
}
|
|
65
|
|
|
66
67
68
|
const CharsetConverter& Environment::getCharsetConverter() const {
return *this->currentCharsetConverter;
}
|
|
69
|
|
|
70
71
72
|
const CaseConverter& Environment::getCaseConverter() const {
return this->caseConverter;
}
|
|
73
|
|
|
74
75
76
|
const IdResolverImpl& Environment::getIdResolver() const {
return this->idResolver;
}
|
|
77
|
|
|
78
79
80
|
const SegrulesFSA& Environment::getCurrentSegrulesFSA() const {
return *(this->currSegrulesFSA);
}
|
|
81
|
|
|
82
83
84
|
const FSAType& Environment::getFSA() const {
return *(this->dictionary->fsa);
}
|
|
85
|
|
|
86
87
88
|
const InterpretedChunksDecoder& Environment::getInterpretedChunksDecoder() const {
return *(this->chunksDecoder);
}
|
|
89
|
|
|
90
|
void Environment::setSegrulesOption(const std::string& option, const std::string& value) {
|
|
91
92
93
94
95
96
97
98
99
100
101
102
|
if (this->isUsable()) {
if (this->currSegrulesOptions.find(option) == this->currSegrulesOptions.end()) {
throw MorfeuszException("Invalid segmentation option '" + option + "'");
}
SegrulesOptions prevOptions = this->currSegrulesOptions;
this->currSegrulesOptions[option] = value;
if (this->dictionary->segrulesFSAsMap.find(this->currSegrulesOptions) == this->dictionary->segrulesFSAsMap.end()) {
this->currSegrulesOptions = prevOptions;
throw MorfeuszException("Invalid \"" + option + "\" option: \"" + value + "\". Possible values: " + getAvailableOptionsAsString(option));
}
this->currSegrulesFSA = this->dictionary->segrulesFSAsMap.find(this->currSegrulesOptions)->second;
|
|
103
104
|
}
}
|
|
105
106
107
108
109
110
111
|
string Environment::getSegrulesOption(const std::string& option) const {
if (this->currSegrulesOptions.find(option) == this->currSegrulesOptions.end()) {
throw MorfeuszException("Invalid segmentation option '" + option + "'");
}
return this->currSegrulesOptions.find(option)->second;
}
|
|
112
|
|
|
113
114
115
|
MorfeuszProcessorType Environment::getProcessorType() const {
return this->processorType;
}
|
|
116
|
|
|
117
118
|
void Environment::setCaseSensitive(bool caseSensitive) {
this->casePatternHelper->setCaseSensitive(caseSensitive);
|
|
119
|
}
|
|
120
121
122
|
const CasePatternHelper& Environment::getCasePatternHelper() const {
return *this->casePatternHelper;
|
|
123
|
}
|
|
124
|
|
|
125
126
127
128
129
130
|
bool Environment::isSeparator(uint32_t codepoint) const {
return binary_search(
this->dictionary->separatorsList.begin(),
this->dictionary->separatorsList.end(),
codepoint);
}
|
|
131
|
|
|
132
|
bool Environment::isUsable() const {
|
|
133
|
return usable;
|
|
134
|
}
|
|
135
|
|
|
136
137
|
void Environment::setDictionary(const Dictionary* dict) {
this->dictionary = dict;
|
|
138
139
140
141
142
|
idResolver = dictionary->idResolver;
this->idResolver.setCharsetConverter(currentCharsetConverter);
currSegrulesOptions = dictionary->defaultSegrulesOptions;
currSegrulesFSA = dictionary->defaultSegrulesFSA;
}
|
|
143
|
|
|
144
145
146
147
|
string Environment::getAvailableOptionsAsString(const string& option) const {
const set<string>* options;
if (option == "aggl") {
options = &dictionary->availableAgglOptions;
|
|
148
|
} else {
|
|
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
|
options = &dictionary->availablePraetOptions;
}
string res;
set<string>::const_iterator it = options->begin();
while (it != options->end()) {
if (!res.empty()) {
res += ", ";
}
res += '"';
res += *it;
res += '"';
++it;
}
return res;
}
|
|
164
|
|
|
165
166
167
168
169
170
171
|
const set<string>& Environment::getAvailableAgglOptions() const {
return this->dictionary->availableAgglOptions;
}
const set<string>& Environment::getAvailablePraetOptions() const {
return this->dictionary->availablePraetOptions;
}
|
|
172
|
}
|