/* * File: Environment.cpp * Author: mlenart * * Created on 22 styczeń 2014, 12:08 */ #include <vector> #include <algorithm> #include <string> #include "DictionariesRepository.hpp" #include "Environment.hpp" #include "deserialization/MorphDeserializer.hpp" #include "deserialization/morphInterps/InterpretedChunksDecoder.hpp" #include "deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.hpp" #include "deserialization/morphInterps/InterpretedChunksDecoder4Generator.hpp" #include "DictionariesRepository.hpp" namespace morfeusz { using namespace std; Environment::Environment(const string& dictName, MorfeuszProcessorType processorType, bool usable) : usable(usable), currentCharsetConverter(getCharsetConverter(DEFAULT_MORFEUSZ_CHARSET)), caseConverter(), dictionary(usable ? DictionariesRepository::getInstance().getDictionary(dictName, processorType) : Dictionary::getEmpty()), idResolver(dictionary->idResolver), currSegrulesOptions(dictionary->defaultSegrulesOptions), currSegrulesFSA(dictionary->defaultSegrulesFSA), chunksDecoder( processorType == ANALYZER ? (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Analyzer(*this) : (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Generator(*this)), processorType(processorType), casePatternHelper(new CasePatternHelper()) { } const CharsetConverter* Environment::getCharsetConverter(Charset charset) const { switch (charset) { case UTF8: return &UTF8CharsetConverter::getInstance(); case ISO8859_2: return &ISO8859_2_CharsetConverter::getInstance(); case CP1250: return &Windows_1250_CharsetConverter::getInstance(); case CP852: return &CP852_CharsetConverter::getInstance(); default: throw MorfeuszException("invalid charset"); } } Environment::~Environment() { delete this->chunksDecoder; delete this->casePatternHelper; } void Environment::setCharset(Charset charset) { this->currentCharsetConverter = this->getCharsetConverter(charset); this->idResolver.setCharsetConverter(currentCharsetConverter); } const CharsetConverter& Environment::getCharsetConverter() const { return *this->currentCharsetConverter; } const CaseConverter& Environment::getCaseConverter() const { return this->caseConverter; } const IdResolverImpl& Environment::getIdResolver() const { return this->idResolver; } const SegrulesFSA& Environment::getCurrentSegrulesFSA() const { return *(this->currSegrulesFSA); } const FSAType& Environment::getFSA() const { return *(this->dictionary->fsa); } const InterpretedChunksDecoder& Environment::getInterpretedChunksDecoder() const { return *(this->chunksDecoder); } void Environment::setSegrulesOption(const std::string& option, const std::string& value) { if (this->isUsable()) { if (this->currSegrulesOptions.find(option) == this->currSegrulesOptions.end()) { throw MorfeuszException("Invalid segmentation option '" + option + "'"); } SegrulesOptions prevOptions = this->currSegrulesOptions; this->currSegrulesOptions[option] = value; if (this->dictionary->segrulesFSAsMap.find(this->currSegrulesOptions) == this->dictionary->segrulesFSAsMap.end()) { this->currSegrulesOptions = prevOptions; throw MorfeuszException("Invalid \"" + option + "\" option: \"" + value + "\". Possible values: " + getAvailableOptionsAsString(option)); } this->currSegrulesFSA = this->dictionary->segrulesFSAsMap.find(this->currSegrulesOptions)->second; } } string Environment::getSegrulesOption(const std::string& option) const { if (this->currSegrulesOptions.find(option) == this->currSegrulesOptions.end()) { throw MorfeuszException("Invalid segmentation option '" + option + "'"); } return this->currSegrulesOptions.find(option)->second; } MorfeuszProcessorType Environment::getProcessorType() const { return this->processorType; } void Environment::setCaseSensitive(bool caseSensitive) { this->casePatternHelper->setCaseSensitive(caseSensitive); } const CasePatternHelper& Environment::getCasePatternHelper() const { return *this->casePatternHelper; } bool Environment::isSeparator(uint32_t codepoint) const { return binary_search( this->dictionary->separatorsList.begin(), this->dictionary->separatorsList.end(), codepoint); } bool Environment::isUsable() const { return usable; } void Environment::setDictionary(const Dictionary* dict) { this->dictionary = dict; idResolver = dictionary->idResolver; this->idResolver.setCharsetConverter(currentCharsetConverter); currSegrulesOptions = dictionary->defaultSegrulesOptions; currSegrulesFSA = dictionary->defaultSegrulesFSA; } string Environment::getAvailableOptionsAsString(const string& option) const { const set<string>* options; if (option == "aggl") { options = &dictionary->availableAgglOptions; } else { options = &dictionary->availablePraetOptions; } string res; set<string>::const_iterator it = options->begin(); while (it != options->end()) { if (!res.empty()) { res += ", "; } res += '"'; res += *it; res += '"'; ++it; } return res; } const set<string>& Environment::getAvailableAgglOptions() const { return this->dictionary->availableAgglOptions; } const set<string>& Environment::getAvailablePraetOptions() const { return this->dictionary->availablePraetOptions; } }