Blame view

morfeusz/Environment.cpp 6 KB
Michał Lenart authored
1
2
3
4
5
6
7
/* 
 * File:   Environment.cpp
 * Author: mlenart
 * 
 * Created on 22 styczeń 2014, 12:08
 */
Michał Lenart authored
8
9
#include <vector>
#include <algorithm>
Michał Lenart authored
10
11
#include <string>
#include "DictionariesRepository.hpp"
Michał Lenart authored
12
#include "Environment.hpp"
Michał Lenart authored
13
14
15
16
#include "deserialization/MorphDeserializer.hpp"
#include "deserialization/morphInterps/InterpretedChunksDecoder.hpp"
#include "deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.hpp"
#include "deserialization/morphInterps/InterpretedChunksDecoder4Generator.hpp"
Michał Lenart authored
17
#include "DictionariesRepository.hpp"
Michał Lenart authored
18
Michał Lenart authored
19
namespace morfeusz {
Michał Lenart authored
20
Michał Lenart authored
21
22
    using namespace std;
Michał Lenart authored
23
    Environment::Environment(const string& dictName, MorfeuszProcessorType processorType, bool usable)
Michał Lenart authored
24
25
    : usable(usable),
    currentCharsetConverter(getCharsetConverter(DEFAULT_MORFEUSZ_CHARSET)),
Michał Lenart authored
26
    caseConverter(),
Michał Lenart authored
27
28
29
    dictionary(usable
    ? DictionariesRepository::getInstance().getDictionary(dictName, processorType)
    : Dictionary::getEmpty()),
Michał Lenart authored
30
31
32
33
34
35
36
37
38
    idResolver(dictionary->idResolver),
    currSegrulesOptions(dictionary->defaultSegrulesOptions),
    currSegrulesFSA(dictionary->defaultSegrulesFSA),
    chunksDecoder(
    processorType == ANALYZER
    ? (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Analyzer(*this)
    : (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Generator(*this)),
    processorType(processorType),
    casePatternHelper(new CasePatternHelper()) {
Michał Lenart authored
39
40
    }
Michał Lenart authored
41
42
43
44
45
46
47
48
49
50
51
52
53
54
    const CharsetConverter* Environment::getCharsetConverter(Charset charset) const {
        switch (charset) {
            case UTF8:
                return &UTF8CharsetConverter::getInstance();
            case ISO8859_2:
                return &ISO8859_2_CharsetConverter::getInstance();
            case CP1250:
                return &Windows_1250_CharsetConverter::getInstance();
            case CP852:
                return &CP852_CharsetConverter::getInstance();
            default:
                throw MorfeuszException("invalid charset");
        }
    }
Michał Lenart authored
55
Michał Lenart authored
56
57
58
    Environment::~Environment() {
        delete this->chunksDecoder;
        delete this->casePatternHelper;
Michał Lenart authored
59
60
    }
Michał Lenart authored
61
62
63
    void Environment::setCharset(Charset charset) {
        this->currentCharsetConverter = this->getCharsetConverter(charset);
        this->idResolver.setCharsetConverter(currentCharsetConverter);
Michał Lenart authored
64
    }
Michał Lenart authored
65
Michał Lenart authored
66
67
68
    const CharsetConverter& Environment::getCharsetConverter() const {
        return *this->currentCharsetConverter;
    }
Michał Lenart authored
69
Michał Lenart authored
70
71
72
    const CaseConverter& Environment::getCaseConverter() const {
        return this->caseConverter;
    }
Michał Lenart authored
73
Michał Lenart authored
74
75
76
    const IdResolverImpl& Environment::getIdResolver() const {
        return this->idResolver;
    }
Michał Lenart authored
77
Michał Lenart authored
78
79
80
    const SegrulesFSA& Environment::getCurrentSegrulesFSA() const {
        return *(this->currSegrulesFSA);
    }
Michał Lenart authored
81
Michał Lenart authored
82
83
84
    const FSAType& Environment::getFSA() const {
        return *(this->dictionary->fsa);
    }
Michał Lenart authored
85
Michał Lenart authored
86
87
88
    const InterpretedChunksDecoder& Environment::getInterpretedChunksDecoder() const {
        return *(this->chunksDecoder);
    }
Michał Lenart authored
89
Michał Lenart authored
90
    void Environment::setSegrulesOption(const std::string& option, const std::string& value) {
Michał Lenart authored
91
92
93
94
95
96
97
98
99
100
101
102
        if (this->isUsable()) {
            if (this->currSegrulesOptions.find(option) == this->currSegrulesOptions.end()) {
                throw MorfeuszException("Invalid segmentation option '" + option + "'");
            }
            SegrulesOptions prevOptions = this->currSegrulesOptions;
            this->currSegrulesOptions[option] = value;
            if (this->dictionary->segrulesFSAsMap.find(this->currSegrulesOptions) == this->dictionary->segrulesFSAsMap.end()) {
                this->currSegrulesOptions = prevOptions;

                throw MorfeuszException("Invalid \"" + option + "\" option: \"" + value + "\". Possible values: " + getAvailableOptionsAsString(option));
            }
            this->currSegrulesFSA = this->dictionary->segrulesFSAsMap.find(this->currSegrulesOptions)->second;
Michał Lenart authored
103
104
        }
    }
Michał Lenart authored
105
106
107
108
109
110
111

    string Environment::getSegrulesOption(const std::string& option) const {
        if (this->currSegrulesOptions.find(option) == this->currSegrulesOptions.end()) {
                throw MorfeuszException("Invalid segmentation option '" + option + "'");
            }
        return this->currSegrulesOptions.find(option)->second;
    }
Michał Lenart authored
112
Michał Lenart authored
113
114
115
    MorfeuszProcessorType Environment::getProcessorType() const {
        return this->processorType;
    }
Michał Lenart authored
116
Michał Lenart authored
117
118
    void Environment::setCaseSensitive(bool caseSensitive) {
        this->casePatternHelper->setCaseSensitive(caseSensitive);
Michał Lenart authored
119
    }
Michał Lenart authored
120
121
122

    const CasePatternHelper& Environment::getCasePatternHelper() const {
        return *this->casePatternHelper;
Michał Lenart authored
123
    }
Michał Lenart authored
124
Michał Lenart authored
125
126
127
128
129
130
    bool Environment::isSeparator(uint32_t codepoint) const {
        return binary_search(
                this->dictionary->separatorsList.begin(),
                this->dictionary->separatorsList.end(),
                codepoint);
    }
Michał Lenart authored
131
Michał Lenart authored
132
    bool Environment::isUsable() const {
Michał Lenart authored
133
        return usable;
Michał Lenart authored
134
    }
Michał Lenart authored
135
Michał Lenart authored
136
137
    void Environment::setDictionary(const Dictionary* dict) {
        this->dictionary = dict;
Michał Lenart authored
138
139
140
141
142
        idResolver = dictionary->idResolver;
        this->idResolver.setCharsetConverter(currentCharsetConverter);
        currSegrulesOptions = dictionary->defaultSegrulesOptions;
        currSegrulesFSA = dictionary->defaultSegrulesFSA;
    }
Michał Lenart authored
143
Michał Lenart authored
144
145
146
147
    string Environment::getAvailableOptionsAsString(const string& option) const {
        const set<string>* options;
        if (option == "aggl") {
            options = &dictionary->availableAgglOptions;
Michał Lenart authored
148
        } else {
Michał Lenart authored
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
            options = &dictionary->availablePraetOptions;
        }
        string res;
        set<string>::const_iterator it = options->begin();
        while (it != options->end()) {
            if (!res.empty()) {
                res += ", ";
            }
            res += '"';
            res += *it;
            res += '"';
            ++it;
        }
        return res;
    }
Michał Lenart authored
164
Michał Lenart authored
165
166
167
168
169
170
171
    const set<string>& Environment::getAvailableAgglOptions() const {
        return this->dictionary->availableAgglOptions;
    }

    const set<string>& Environment::getAvailablePraetOptions() const {
        return this->dictionary->availablePraetOptions;
    }
Michał Lenart authored
172
}