Dictionary.cpp
2.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
/*
* File: Dictionary.cpp
* Author: lennyn
*
* Created on August 8, 2014, 3:15 PM
*/
#include "Dictionary.hpp"
#include "charset/CharsetConverter.hpp"
#include "deserialization/MorphDeserializer.hpp"
using namespace std;
namespace morfeusz {
static Deserializer<InterpsGroupsReader>& initializeDeserializer(MorfeuszProcessorType processorType) {
static Deserializer<InterpsGroupsReader> *analyzerDeserializer
= new MorphDeserializer();
static Deserializer<InterpsGroupsReader> *generatorDeserializer
= new MorphDeserializer();
return *(processorType == ANALYZER ? analyzerDeserializer : generatorDeserializer);
}
static set<string> getAvailableOptions(const map<SegrulesOptions, SegrulesFSA*> segrulesFSAsMap, const string& option) {
set<string> res;
for (
map<SegrulesOptions, SegrulesFSA*>::const_iterator it = segrulesFSAsMap.begin();
it != segrulesFSAsMap.end();
++it) {
res.insert(it->first.find(option)->second);
}
return res;
}
Dictionary* Dictionary::getEmpty() {
static Dictionary* dict = new Dictionary();
return dict;
}
Dictionary::Dictionary()
: fsa(NULL),
id(),
copyright(),
idResolver(),
separatorsList(),
segrulesFSAsMap(),
defaultSegrulesOptions(),
defaultSegrulesFSA(NULL),
availableAgglOptions(),
availablePraetOptions() {
}
Dictionary::Dictionary(const unsigned char* fsaFileStartPtr, MorfeuszProcessorType processorType)
: fsa(FSAType::getFSA(fsaFileStartPtr, initializeDeserializer(processorType))),
id(),
copyright(),
idResolver(fsaFileStartPtr, &UTF8CharsetConverter::getInstance()),
separatorsList(getSeparatorsList(fsaFileStartPtr)),
segrulesFSAsMap(createSegrulesFSAsMap(fsaFileStartPtr)),
defaultSegrulesOptions(getDefaultSegrulesOptions(fsaFileStartPtr)),
defaultSegrulesFSA(getDefaultSegrulesFSA(this->segrulesFSAsMap, fsaFileStartPtr)),
availableAgglOptions(getAvailableOptions(segrulesFSAsMap, "aggl")),
availablePraetOptions(getAvailableOptions(segrulesFSAsMap, "praet")) {
const unsigned char* currPtr = getEpiloguePtr(fsaFileStartPtr) + 4;
this->id = readString(currPtr);
this->copyright = readString(currPtr);
}
bool Dictionary::isCompatibleWith(const Dictionary& other) const {
return this->id == other.id
&& this->idResolver.isCompatibleWith(other.idResolver)
&& this->availableAgglOptions == other.availableAgglOptions
&& this->availablePraetOptions == other.availablePraetOptions
&& this->defaultSegrulesOptions == other.defaultSegrulesOptions
&& this->separatorsList == other.separatorsList;
}
}