Environment.cpp
3.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/*
* File: Environment.cpp
* Author: mlenart
*
* Created on 22 styczeń 2014, 12:08
*/
#include "Environment.hpp"
#include "InterpretedChunksDecoder.hpp"
#include "MorphDeserializer.hpp"
#include "exceptions.hpp"
//class InterpretedChunksDecoder4Analyzer;
//class InterpretedChunksDecoder4Generator;
static Deserializer<vector<InterpsGroup> >* initializeDeserializer() {
static Deserializer < vector < InterpsGroup > > *deserializer
= new MorphDeserializer();
return deserializer;
}
static SegrulesFSA* getDefaultSegrulesFSA(const map<SegrulesOptions, SegrulesFSA*>& map) {
SegrulesOptions opts;
opts["aggl"] = "isolated";
opts["praet"] = "split";
return (*(map.find(opts))).second;
}
static void deleteSegrulesFSAs(std::map<SegrulesOptions, SegrulesFSA*>& fsasMap) {
for (
std::map<SegrulesOptions, SegrulesFSA*>::iterator it = fsasMap.begin();
it != fsasMap.end();
++it) {
delete it->second;
}
fsasMap.clear();
}
Environment::Environment(
MorfeuszCharset charset,
MorfeuszProcessorType processorType,
const unsigned char* fsaFileStartPtr)
: currentCharsetConverter(getCharsetConverter(charset)),
utf8CharsetConverter(),
isoCharsetConverter(),
cp1250CharsetConverter(),
cp852CharsetConverter(),
caseConverter(),
tagset(fsaFileStartPtr),
fsaFileStartPtr(fsaFileStartPtr),
fsa(FSAType::getFSA(fsaFileStartPtr, *initializeDeserializer())),
segrulesFSAsMap(createSegrulesFSAsMap(fsaFileStartPtr)),
currSegrulesFSA(getDefaultSegrulesFSA(segrulesFSAsMap)),
isFromFile(false),
chunksDecoder(
processorType == ANALYZER
? (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Analyzer(*this)
: (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Generator(*this))
{
}
const CharsetConverter* Environment::getCharsetConverter(MorfeuszCharset charset) const {
switch (charset) {
case UTF8:
return &this->utf8CharsetConverter;
case ISO8859_2:
return &this->isoCharsetConverter;
case CP1250:
return &this->cp1250CharsetConverter;
case CP852:
return &this->cp852CharsetConverter;
default:
throw MorfeuszException("invalid charset");
}
}
Environment::~Environment() {
delete this->fsa;
if (this->isFromFile) {
deleteSegrulesFSAs(this->segrulesFSAsMap);
delete this->fsaFileStartPtr;
}
delete this->chunksDecoder;
}
void Environment::setCharset(MorfeuszCharset charset) {
this->currentCharsetConverter = this->getCharsetConverter(charset);
}
const CharsetConverter& Environment::getCharsetConverter() const {
return *this->currentCharsetConverter;
}
const CaseConverter& Environment::getCaseConverter() const {
return this->caseConverter;
}
void Environment::setTagset(const Tagset& tagset) {
this->tagset = tagset;
}
const Tagset& Environment::getTagset() const {
return this->tagset;
}
void Environment::setFSAFile(const std::string& filename) {
if (this->isFromFile) {
delete this->fsa;
deleteSegrulesFSAs(this->segrulesFSAsMap);
delete this->fsaFileStartPtr;
}
this->fsaFileStartPtr = readFile<unsigned char>(filename.c_str());
this->fsa = FSA< vector<InterpsGroup> > ::getFSA(fsaFileStartPtr, *initializeDeserializer());
this->segrulesFSAsMap = createSegrulesFSAsMap(this->fsaFileStartPtr);
this->isFromFile = true;
}
const SegrulesFSA& Environment::getCurrentSegrulesFSA() const {
return *(this->currSegrulesFSA);
}
const FSAType& Environment::getFSA() const {
return *(this->fsa);
}
const InterpretedChunksDecoder& Environment::getInterpretedChunksDecoder() const {
return *(this->chunksDecoder);
}