Commit 40c791419d285c2c3513e60324680fc945315efd

Authored by Michał Lenart
1 parent 0133e003

- generator w zasadzie już działa

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@82 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
CMakeLists.txt
... ... @@ -29,6 +29,7 @@ set (PROJECT_VERSION "${Morfeusz_VERSION_MAJOR}.${Morfeusz_VERSION_MINOR}.${Morf
29 29  
30 30 # INPUT_DICTIONARY_CPP
31 31 set (INPUT_DICTIONARY_CPP "${CMAKE_CURRENT_BINARY_DIR}/default_fsa.cpp")
  32 +set (INPUT_SYNTH_DICTIONARY_CPP "${CMAKE_CURRENT_BINARY_DIR}/default_synth_fsa.cpp")
32 33 if ("${INPUT_DICTIONARY}" STREQUAL "")
33 34 if ("${EMPTY_INPUT_DICTIONARY}" STREQUAL "TRUE")
34 35 set (INPUT_DICTIONARY ${PROJECT_SOURCE_DIR}/input/empty.txt)
... ... @@ -52,7 +53,10 @@ endif ()
52 53 ### Compilation and linking flags
53 54  
54 55 if (${CMAKE_SYSTEM_NAME} MATCHES "Linux")
55   - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++98 -Wall -pedantic -Wcast-align -Wextra -Wmissing-noreturn -Wconversion -Wcast-qual -Wcast-align -O2")
  56 + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++98 -Wall -pedantic -Wcast-align -Wextra -Wmissing-noreturn -Wconversion -Wcast-qual -Wcast-align")
  57 + if (${CMAKE_BUILD_TYPE} STREQUAL "Release")
  58 + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
  59 + endif ()
56 60 elseif (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
57 61 set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x -Wall -O2")
58 62 set (CMAKE_SHARED_LIBRARY_PREFIX "")
... ... @@ -107,7 +111,8 @@ add_subdirectory (fsabuilder)
107 111 ########## add tests ##########
108 112  
109 113 macro (test_build_and_recognize fname method)
110   - add_test (TestBuild-${method}-${fname} python fsabuilder/buildfsa.py --analyzer -i testfiles/${fname} -o /tmp/test-${method}-${fname}.fsa --tagset-file=testfiles/polimorf.tagset --output-format=BINARY --serialization-method=${method})
  114 + add_test (TestBuild-${method}-${fname} python fsabuilder/buildfsa.py --analyzer -i testfiles/${fname} -o /tmp/test-${method}-${fname}.fsa --tagset-file=testfiles/polimorf.tagset --serialization-method=${method})
  115 + add_test (TestBuild4Synth-${method}-${fname} python fsabuilder/buildfsa.py --generator -i testfiles/${fname} -o /tmp/test-synth-${method}-${fname}.fsa --tagset-file=testfiles/polimorf.tagset --serialization-method=${method})
111 116 add_test (TestRecognize-${method}-${fname} morfeusz/test_recognize_dict /tmp/test-${method}-${fname}.fsa testfiles/${fname})
112 117 # add_test (TestNOTRecognize-${method}-${fname} fsa/test_not_recognize /tmp/test-${method}-${fname}.fsa testfiles/out_of_dict)
113 118 # add_test (TestSpeed-${method}-${fname} fsa/test_speed /tmp/test-${method}-${fname}.fsa testfiles/speed_test_data)
... ...
fsabuilder/buildfsa.py
... ... @@ -162,7 +162,7 @@ def buildGeneratorFromPoliMorf(inputFile, tagsetFile):
162 162 encoder = encode.Encoder4Generator()
163 163 tagset = common.Tagset(tagsetFile)
164 164 fsa = FSA(encoder, tagset)
165   - inputData = _readPolimorfInput4Analyzer(inputFile, tagset, encoder)
  165 + inputData = _readPolimorfInput4Generator(inputFile, tagset, encoder)
166 166 for word, data in inputData:
167 167 fsa.addEntry(word, data)
168 168 fsa.close()
... ... @@ -192,7 +192,7 @@ def main(opts):
192 192 }[opts.serializationMethod](fsa)
193 193  
194 194 if opts.cpp:
195   - serializer.serialize2CppFile(opts.outputFile)
  195 + serializer.serialize2CppFile(opts.outputFile, generator=opts.generator)
196 196 else:
197 197 serializer.serialize2BinaryFile(opts.outputFile)
198 198 # {
... ...
fsabuilder/morfeuszbuilder/fsa/common.py
... ... @@ -54,7 +54,6 @@ class Interpretation4Generator(object):
54 54 self.orth = EncodedForm(base, orth)
55 55 self.tagnum = tagnum
56 56 self.namenum = namenum
57   - logging.warn(self)
58 57  
59 58 def getSortKey(self):
60 59 return (
... ... @@ -74,7 +73,10 @@ class Interpretation4Generator(object):
74 73 return hash(self.getSortKey())
75 74  
76 75 def __unicode__(self):
77   - return u'%s %d %s %d %d' % (self.lemma, self.orth.cutLength, self.orth.suffixToAdd, self.tagnum, self.namenum)
  76 + return u'%s,(%d %s),%d,%d' % (self.lemma, self.orth.cutLength, self.orth.suffixToAdd, self.tagnum, self.namenum)
  77 +
  78 + def __repr__(self):
  79 + return unicode(self)
78 80  
79 81 class Tagset(object):
80 82  
... ... @@ -86,8 +88,8 @@ class Tagset(object):
86 88 self.tag2tagnum = {}
87 89 self.name2namenum = {}
88 90 self._doInit(filename, encoding)
89   - print self.tag2tagnum
90   - print self.name2namenum
  91 +# print self.tag2tagnum
  92 +# print self.name2namenum
91 93  
92 94 def _doInit(self, filename, encoding):
93 95 addingTo = None
... ...
fsabuilder/morfeuszbuilder/fsa/convertinput.py
... ... @@ -480,10 +480,9 @@ class PolimorfConverter4Generator(object):
480 480 line = line.decode(self.inputEncoding).strip(u'\n')
481 481 if line:
482 482 # print line
483   - orth, base, tagnum, namenum, typenum = line.split(u' ')
  483 + orth, base, tagnum, namenum = line.split(u' ')
484 484 tagnum = int(tagnum)
485 485 namenum = int(namenum)
486   - typenum = int(typenum)
487 486 yield (base, Interpretation4Generator(orth, base, tagnum, namenum))
488 487  
489 488 def convert(self, inputLines):
... ...
fsabuilder/morfeuszbuilder/fsa/encode.py
... ... @@ -29,9 +29,6 @@ class Encoder(object):
29 29  
30 30 def decodeData(self, rawData):
31 31 return NotImplementedError()
32   -# print unicode(str(rawData), self.encoding)[:-1]
33   -# print unicode(str(rawData), self.encoding)[:-1].split(u'|')
34   -# return unicode(str(rawData), self.encoding)[:-1].split(u'|')
35 32  
36 33 def decodeWord(self, rawWord):
37 34 return unicode(str(rawWord).strip('\x00'), self.encoding)
... ... @@ -49,7 +46,8 @@ class Encoder(object):
49 46 res.append(form.cutLength)
50 47 res.extend(self.encodeWord(form.suffixToAdd, lowercase=False))
51 48 res.append(0)
52   - res.extend(self._encodeCasePattern(form.casePattern))
  49 + if withCasePattern:
  50 + res.extend(self._encodeCasePattern(form.casePattern))
53 51 return res
54 52  
55 53 def _encodeCasePattern(self, casePattern):
... ... @@ -96,17 +94,6 @@ class Encoder(object):
96 94 assert namenum < 256 and namenum >= 0
97 95 return bytearray([namenum])
98 96  
99   -# class SimpleEncoder(Encoder):
100   -#
101   -# def __init__(self, encoding='utf8'):
102   -# super(SimpleEncoder, self).__init__(encoding)
103   -#
104   -# def encodeData(self, data):
105   -# return bytearray(data, encoding=self.encoding) + bytearray([0])
106   -#
107   -# def decodeData(self, rawData):
108   -# return unicode(str(rawData)[:-1], self.encoding)
109   -
110 97 class MorphEncoder(Encoder):
111 98  
112 99 def __init__(self, encoding='utf8'):
... ... @@ -133,11 +120,10 @@ class MorphEncoder(Encoder):
133 120 class Encoder4Generator(Encoder):
134 121  
135 122 def __init__(self, encoding='utf8'):
136   - super(MorphEncoder, self).__init__(encoding)
  123 + super(Encoder4Generator, self).__init__(encoding)
137 124  
138 125 def encodeData(self, interpsList):
139 126 res = bytearray()
140   -# print interpsList
141 127 firstByte = len(interpsList)
142 128 assert firstByte < 256
143 129 assert firstByte > 0
... ... @@ -148,3 +134,6 @@ class Encoder4Generator(Encoder):
148 134 res.extend(self._encodeTagNum(interp.tagnum))
149 135 res.extend(self._encodeNameNum(interp.namenum))
150 136 return res
  137 +#
  138 +# def decodeData(self, data):
  139 +#
... ...
fsabuilder/morfeuszbuilder/fsa/fsa.py
... ... @@ -54,30 +54,6 @@ class FSA(object):
54 54 self.encodedPrevWord = None
55 55 self.closed = True
56 56  
57   -# def feed(self, input):
58   -#
59   -# # allWords = []
60   -# for n, (word, data) in enumerate(input, start=1):
61   -# assert data is not None
62   -# encodedWord = self.encodeWord(word)
63   -# assert encodedWord > self.encodedPrevWord
64   -# if encodedWord > self.encodedPrevWord:
65   -# self._addSorted(encodedWord, self.encodeData(data))
66   -# self.encodedPrevWord = encodedWord
67   -# # assert self.tryToRecognize(word) == data
68   -# if n % 10000 == 0:
69   -# logging.info(word)
70   -# logging.info(str(self.register.getStatesNum()))
71   -# # allWords.append(word)
72   -# for label in encodedWord:
73   -# self.label2Freq[label] = self.label2Freq.get(label, 0) + 1
74   -#
75   -# self.initialState = self._replaceOrRegister(self.initialState, self.encodeWord(word))
76   -# self.encodedPrevWord = None
77   -
78   -# for w in allWords:
79   -# self.tryToRecognize(w, True)
80   -
81 57 def train(self, trainData):
82 58 self.label2Freq = {}
83 59 for idx, word in enumerate(trainData):
... ...
fsabuilder/morfeuszbuilder/fsa/fsa.pyc
No preview for this file type
fsabuilder/morfeuszbuilder/fsa/serializer.py
... ... @@ -22,14 +22,17 @@ class Serializer(object):
22 22 def getVersion(self):
23 23 return 9
24 24  
25   - def serialize2CppFile(self, fname):
  25 + def serialize2CppFile(self, fname, generator):
26 26 res = []
27 27 # self.fsa.calculateOffsets(sizeCounter=lambda state: self.getStateSize(state))
28 28 res.append('\n')
29 29 res.append('#include "%s"' % self.headerFilename)
30 30 res.append('\n')
31 31 res.append('\n')
32   - res.append('extern const unsigned char DEFAULT_FSA[] = {')
  32 + if generator:
  33 + res.append('extern const unsigned char DEFAULT_SYNTH_FSA[] = {')
  34 + else:
  35 + res.append('extern const unsigned char DEFAULT_FSA[] = {')
33 36 res.append('\n')
34 37 for byte in self.fsa2bytearray():
35 38 res.append(hex(byte));
... ...
fsabuilder/morfeuszbuilder/fsa/serializer.pyc
No preview for this file type
morfeusz/CMakeLists.txt
... ... @@ -2,7 +2,13 @@
2 2 ########## generate default dictionary data #################
3 3 add_custom_command (
4 4 OUTPUT "${INPUT_DICTIONARY_CPP}"
5   - COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/buildfsa.py --analyzer -i "${INPUT_DICTIONARY}" -o "${INPUT_DICTIONARY_CPP}" "--tagset-file=${INPUT_TAGSET}" --output-format=CPP --serialization-method=SIMPLE
  5 + COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/buildfsa.py --analyzer -i "${INPUT_DICTIONARY}" -o "${INPUT_DICTIONARY_CPP}" "--tagset-file=${INPUT_TAGSET}" --cpp --serialization-method=SIMPLE
  6 + DEPENDS "${INPUT_DICTIONARY}"
  7 + COMMENT "Building default dictionary C++ file"
  8 +)
  9 +add_custom_command (
  10 + OUTPUT "${INPUT_SYNTH_DICTIONARY_CPP}"
  11 + COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/buildfsa.py --generator -i "${INPUT_DICTIONARY}" -o "${INPUT_SYNTH_DICTIONARY_CPP}" "--tagset-file=${INPUT_TAGSET}" --cpp --serialization-method=SIMPLE
6 12 DEPENDS "${INPUT_DICTIONARY}"
7 13 COMMENT "Building default dictionary C++ file"
8 14 )
... ... @@ -14,10 +20,13 @@ include_directories( ${CMAKE_CURRENT_SOURCE_DIR} )
14 20 #### build #####
15 21  
16 22 set(SRC_FILES
17   - const.cpp
  23 + const.cpp
18 24 ${INPUT_DICTIONARY_CPP}
  25 + ${INPUT_SYNTH_DICTIONARY_CPP}
  26 + Environment.cpp
19 27 MorphDeserializer.cpp
20 28 GeneratorDeserializer.cpp
  29 + Generator.cpp
21 30 Tagset.cpp
22 31 fsa/const.cpp
23 32 MorphInterpretation.cpp
... ... @@ -31,7 +40,9 @@ set(SRC_FILES
31 40 set(INCLUDE_FILES
32 41 const.hpp
33 42 data/default_fsa.hpp
34   - MorphDeserializer.hpp
  43 + MorphDeserializer.hpp
  44 + GeneratorDeserializer.hpp
  45 + Generator.hpp
35 46 Tagset.hpp
36 47 fsa/const.hpp
37 48 MorphInterpretation.hpp
... ... @@ -47,11 +58,13 @@ set_source_files_properties ( SOURCE &quot;${INPUT_DICTIONARY_CPP}&quot; PROPERTIES GENERA
47 58 # add_dependencies (libmorfeusz dupa)
48 59 set_target_properties (libmorfeusz PROPERTIES OUTPUT_NAME "morfeusz")
49 60  
50   -add_executable (morfeusz main.cpp)
  61 +add_executable (morfeusz_analyzer morfeusz_analyzer.cpp)
  62 +add_executable (morfeusz_generator morfeusz_generator.cpp)
51 63 add_executable (test_result_equals test_result_equals.cpp)
52 64 add_executable (test_recognize_dict test_recognize_dict.cpp)
53 65  
54   -target_link_libraries (morfeusz libmorfeusz)
  66 +target_link_libraries (morfeusz_analyzer libmorfeusz)
  67 +target_link_libraries (morfeusz_generator libmorfeusz)
55 68 target_link_libraries (test_result_equals libmorfeusz)
56 69 target_link_libraries (test_recognize_dict libmorfeusz)
57 70  
... ... @@ -67,4 +80,4 @@ add_subdirectory (python)
67 80  
68 81 install (FILES ${INCLUDE_FILES} DESTINATION include/morfeusz)
69 82 install (TARGETS libmorfeusz DESTINATION ${TARGET_LIB_DIR})
70   -install (TARGETS morfeusz DESTINATION bin)
  83 +install (TARGETS morfeusz_analyzer morfeusz_generator DESTINATION bin)
... ...
morfeusz/Environment.cpp 0 → 100644
  1 +/*
  2 + * File: Environment.cpp
  3 + * Author: mlenart
  4 + *
  5 + * Created on 22 styczeń 2014, 12:08
  6 + */
  7 +
  8 +#include "Environment.hpp"
  9 +#include "exceptions.hpp"
  10 +
  11 +Environment::Environment(
  12 + const Tagset& analyzerTagset,
  13 + const Tagset& generatorTagset,
  14 + MorfeuszCharset charset)
  15 +: currentCharsetConverter(getCharsetConverter(charset)),
  16 + analyzerTagset(analyzerTagset),
  17 +generatorTagset(generatorTagset) {
  18 +
  19 +}
  20 +
  21 +const CharsetConverter* Environment::getCharsetConverter(MorfeuszCharset charset) const {
  22 + switch (charset) {
  23 + case UTF8:
  24 + return &this->utf8CharsetConverter;
  25 + case ISO8859_2:
  26 + return &this->isoCharsetConverter;
  27 + case CP1250:
  28 + return &this->cp1250CharsetConverter;
  29 + case CP852:
  30 + return &this->cp852CharsetConverter;
  31 + default:
  32 + throw MorfeuszException("invalid charset");
  33 + }
  34 +}
  35 +
  36 +Environment::~Environment() {
  37 +}
  38 +
  39 +void Environment::setCharset(MorfeuszCharset charset) {
  40 + this->currentCharsetConverter = this->getCharsetConverter(charset);
  41 +}
  42 +
  43 +const CharsetConverter& Environment::getCharsetConverter() const {
  44 + return *this->currentCharsetConverter;
  45 +}
  46 +
  47 +void Environment::setAnalyzerTagset(const Tagset& tagset) {
  48 + this->analyzerTagset = tagset;
  49 +}
  50 +
  51 +const Tagset& Environment::getAnalyzerTagset() const {
  52 + return this->analyzerTagset;
  53 +}
  54 +
  55 +void Environment::setGeneratorTagset(const Tagset& tagset) {
  56 + this->generatorTagset = tagset;
  57 +}
  58 +
  59 +const Tagset& Environment::getGeneratorTagset() const {
  60 + return this->generatorTagset;
  61 +}
  62 +
  63 +const CaseConverter& Environment::getCaseConverter() const {
  64 + return this->caseConverter;
  65 +}
... ...
morfeusz/Environment.hpp 0 → 100644
  1 +/*
  2 + * File: Environment.hpp
  3 + * Author: mlenart
  4 + *
  5 + * Created on 22 styczeń 2014, 12:08
  6 + */
  7 +
  8 +#ifndef ENVIRONMENT_HPP
  9 +#define ENVIRONMENT_HPP
  10 +
  11 +#include "charset/CaseConverter.hpp"
  12 +#include "charset/CharsetConverter.hpp"
  13 +#include "const.hpp"
  14 +#include "Tagset.hpp"
  15 +
  16 +
  17 +class Environment {
  18 +public:
  19 + Environment(
  20 + const Tagset& analyzerTagset,
  21 + const Tagset& generatorTagset,
  22 + MorfeuszCharset charset);
  23 + void setCharset(MorfeuszCharset charset);
  24 + const CharsetConverter& getCharsetConverter() const;
  25 +
  26 + void setAnalyzerTagset(const Tagset& tagset);
  27 + const Tagset& getAnalyzerTagset() const;
  28 +
  29 + void setGeneratorTagset(const Tagset& tagset);
  30 + const Tagset& getGeneratorTagset() const;
  31 +
  32 + const CaseConverter& getCaseConverter() const;
  33 +
  34 + virtual ~Environment();
  35 +private:
  36 + const CharsetConverter* currentCharsetConverter;
  37 + const UTF8CharsetConverter utf8CharsetConverter;
  38 + const ISO8859_2_CharsetConverter isoCharsetConverter;
  39 + const Windows_1250_CharsetConverter cp1250CharsetConverter;
  40 + const CP852_CharsetConverter cp852CharsetConverter;
  41 + Tagset analyzerTagset;
  42 + Tagset generatorTagset;
  43 + const CaseConverter caseConverter;
  44 +
  45 + const CharsetConverter* getCharsetConverter(MorfeuszCharset charset) const;
  46 +};
  47 +
  48 +#endif /* ENVIRONMENT_HPP */
  49 +
... ...
morfeusz/Generator.cpp 0 → 100644
  1 +/*
  2 + * File: Generator.cpp
  3 + * Author: mlenart
  4 + *
  5 + * Created on 21 styczeń 2014, 14:38
  6 + */
  7 +
  8 +#include <string>
  9 +#include <iostream>
  10 +#include "charset/charset_utils.hpp"
  11 +#include "MorphInterpretation.hpp"
  12 +#include "Generator.hpp"
  13 +#include "Environment.hpp"
  14 +
  15 +
  16 +using namespace std;
  17 +
  18 +Generator::Generator(
  19 + const unsigned char* ptr,
  20 + const Environment& env)
  21 +: deserializer(env),
  22 +fsa(SynthFSAType::getFSA(ptr, deserializer)),
  23 +env(env) {
  24 +}
  25 +
  26 +Generator::~Generator() {
  27 +}
  28 +
  29 +std::string Generator::decodeOrth(
  30 + const EncodedOrth& orth,
  31 + const std::vector<uint32_t>& lemma) const {
  32 + string res;
  33 + for (unsigned int i = 0; i < lemma.size() - orth.suffixToCut; i++) {
  34 + uint32_t cp = lemma[i];
  35 + env.getCharsetConverter().append(cp, res);
  36 + }
  37 + const char* suffixPtr = orth.suffixToAdd.c_str();
  38 + const char* suffixEnd = suffixPtr + orth.suffixToAdd.length();
  39 + while (suffixPtr != suffixEnd) {
  40 + uint32_t cp = UTF8CharsetConverter().next(suffixPtr, suffixEnd);
  41 + env.getCharsetConverter().append(cp, res);
  42 + }
  43 + return res;
  44 +}
  45 +
  46 +void Generator::decodeRes(
  47 + const std::vector<EncodedGeneratorInterpretation>& encodedRes,
  48 + const std::string& lemma,
  49 + const std::vector<uint32_t>& lemmaCodepoints,
  50 + std::vector<MorphInterpretation>& result) const {
  51 +
  52 + for (unsigned int i = 0; i < encodedRes.size(); i++) {
  53 + EncodedGeneratorInterpretation egi = encodedRes[i];
  54 + string decodedOrth = this->decodeOrth(egi.orth, lemmaCodepoints);
  55 + MorphInterpretation mi(
  56 + 0, 0,
  57 + decodedOrth, lemma,
  58 + egi.tag,
  59 + egi.nameClassifier,
  60 + env.getAnalyzerTagset(),
  61 + env.getCharsetConverter());
  62 + result.push_back(mi);
  63 + }
  64 +}
  65 +
  66 +void Generator::generate(const string& lemma, vector<MorphInterpretation>& result) const {
  67 + const char* currInput = lemma.c_str();
  68 + const char* inputEnd = currInput + lemma.length();
  69 + vector<uint32_t> codepoints;
  70 + SynthStateType state = this->fsa->getInitialState();
  71 + while (currInput != inputEnd && !state.isSink()) {
  72 + uint32_t codepoint = this->env.getCharsetConverter().next(currInput, inputEnd);
  73 + feedState(state, codepoint, this->env.getCharsetConverter());
  74 + codepoints.push_back(codepoint);
  75 + }
  76 + if (state.isAccepting()) {
  77 + vector<EncodedGeneratorInterpretation> encodedRes = state.getValue();
  78 + decodeRes(encodedRes, lemma, codepoints, result);
  79 + }
  80 +}
... ...
morfeusz/Generator.hpp 0 → 100644
  1 +/*
  2 + * File: Generator.hpp
  3 + * Author: mlenart
  4 + *
  5 + * Created on 21 styczeń 2014, 14:38
  6 + */
  7 +
  8 +#ifndef GENERATOR_HPP
  9 +#define GENERATOR_HPP
  10 +
  11 +#include <string>
  12 +#include <vector>
  13 +#include "charset/CharsetConverter.hpp"
  14 +#include "MorphInterpretation.hpp"
  15 +#include "Tagset.hpp"
  16 +#include "GeneratorDeserializer.hpp"
  17 +
  18 +typedef FSA< std::vector<EncodedGeneratorInterpretation > > SynthFSAType;
  19 +typedef State< std::vector<EncodedGeneratorInterpretation > > SynthStateType;
  20 +
  21 +class Generator {
  22 +public:
  23 + Generator(
  24 + const unsigned char* ptr,
  25 + const Environment& env);
  26 + void generate(const std::string& lemma, std::vector<MorphInterpretation>& result) const;
  27 + virtual ~Generator();
  28 +private:
  29 +// Generator(const SynthDeserializer& deserializer);
  30 + GeneratorDeserializer deserializer;
  31 + const SynthFSAType* fsa;
  32 + const Environment& env;
  33 +
  34 + std::string decodeOrth(
  35 + const EncodedOrth& orth,
  36 + const std::vector<uint32_t>& lemmaCodepoints) const;
  37 +
  38 + void decodeRes(
  39 + const std::vector<EncodedGeneratorInterpretation>& encodedRes,
  40 + const std::string& lemma,
  41 + const std::vector<uint32_t>& lemmaCodepoints,
  42 + std::vector<MorphInterpretation>& result) const;
  43 +};
  44 +
  45 +#endif /* GENERATOR_HPP */
  46 +
... ...
morfeusz/GeneratorDeserializer.cpp
... ... @@ -6,24 +6,47 @@
6 6 */
7 7  
8 8 #include "GeneratorDeserializer.hpp"
  9 +#include "EncodedGeneratorInterpretation.hpp"
9 10  
10 11 using namespace std;
11 12  
12   -GeneratorDeserializer::GeneratorDeserializer(const string& lemma)
13   -: lemma(&lemma) {
14   -
  13 +GeneratorDeserializer::GeneratorDeserializer(const Environment& env)
  14 +: env(env) {
  15 +
  16 +}
  17 +
  18 +void GeneratorDeserializer::deserializeOrth(const unsigned char*& ptr, EncodedOrth& orth) const {
  19 + // XXX uważać na poprawność danych
  20 + orth.suffixToCut = *ptr;
  21 + ptr++;
  22 + orth.suffixToAdd = (const char*) ptr;
  23 + ptr += strlen((const char*) ptr) + 1;
15 24 }
16 25  
17   -void GeneratorDeserializer::setCurrentLemma(const string& lemma) {
18   - this->lemma = &lemma;
  26 +void GeneratorDeserializer::deserializeInterp(const unsigned char*& ptr, EncodedGeneratorInterpretation& interp) const {
  27 + deserializeOrth(ptr, interp.orth);
  28 + interp.tag = ntohs(*(reinterpret_cast<const uint16_t*> (ptr)));
  29 + ptr += 2;
  30 + interp.nameClassifier = *ptr;
  31 + ptr++;
19 32 }
20 33  
21 34 long GeneratorDeserializer::deserialize(
22 35 const unsigned char* ptr,
23   - std::vector<MorphInterpretation>& interps) const {
24   -
  36 + std::vector<EncodedGeneratorInterpretation>& interps) const {
  37 + const unsigned char* currPtr = ptr;
  38 + uint8_t interpsNum = *ptr;
  39 + interps.clear();
  40 + interps.reserve(interpsNum);
  41 + currPtr++;
  42 + for (unsigned int i = 0; i < interpsNum; ++i) {
  43 + EncodedGeneratorInterpretation interp;
  44 + this->deserializeInterp(currPtr, interp);
  45 + interps.push_back(interp);
  46 + }
  47 + return currPtr - ptr;
25 48 }
26 49  
27 50 GeneratorDeserializer::~GeneratorDeserializer() {
28   -
  51 +
29 52 }
... ...
morfeusz/GeneratorDeserializer.hpp
... ... @@ -5,25 +5,29 @@
5 5 * Created on 20 styczeń 2014, 17:14
6 6 */
7 7  
8   -#ifndef GENERATORDESERIALIZER_HPP
9   -#define GENERATORDESERIALIZER_HPP
  8 +#ifndef SYNTHDESERIALIZER_HPP
  9 +#define SYNTHDESERIALIZER_HPP
10 10  
11 11 #include <string>
12 12 #include <vector>
13 13 #include "fsa/fsa.hpp"
14   -#include "MorphInterpretation.hpp"
  14 +#include "Tagset.hpp"
  15 +#include "EncodedGeneratorInterpretation.hpp"
  16 +#include "Environment.hpp"
15 17  
16   -class GeneratorDeserializer: public Deserializer< std::vector<MorphInterpretation> > {
  18 +class GeneratorDeserializer: public Deserializer< std::vector<EncodedGeneratorInterpretation> > {
17 19 public:
18   - GeneratorDeserializer(const std::string& lemma);
19   - void setCurrentLemma(const std::string& lemma);
  20 + explicit GeneratorDeserializer(const Environment& env);
20 21 long deserialize(
21 22 const unsigned char* ptr,
22   - std::vector<MorphInterpretation>& interps) const;
  23 + std::vector<EncodedGeneratorInterpretation>& interps) const;
23 24 virtual ~GeneratorDeserializer();
24 25 private:
25   - const std::string* lemma;
  26 + const Environment& env;
  27 +
  28 + void deserializeInterp(const unsigned char*& ptr, EncodedGeneratorInterpretation& interp) const;
  29 + void deserializeOrth(const unsigned char*& ptr, EncodedOrth& orth) const;
26 30 };
27 31  
28   -#endif /* GENERATORDESERIALIZER_HPP */
  32 +#endif /* SYNTHDESERIALIZER_HPP */
29 33  
... ...
morfeusz/InterpretedChunksDecoder.hpp
... ... @@ -13,18 +13,13 @@
13 13 #include "InterpretedChunk.hpp"
14 14 #include "EncodedInterpretation.hpp"
15 15 #include "charset/CaseConverter.hpp"
  16 +#include "Environment.hpp"
16 17  
17 18 class InterpretedChunksDecoder {
18 19 public:
19 20  
20   - InterpretedChunksDecoder(
21   - const Tagset& tagset,
22   - const CharsetConverter& charsetConverter,
23   - const CaseConverter& caseConverter)
24   - : tagset(tagset),
25   - charsetConverter(charsetConverter),
26   - utf8CharsetConverter(),
27   - caseConverter(caseConverter) {
  21 + InterpretedChunksDecoder(const Environment& env)
  22 + : env(env) {
28 23  
29 24 }
30 25  
... ... @@ -34,7 +29,7 @@ public:
34 29 unsigned int endNode,
35 30 const InterpretedChunk& interpretedChunk,
36 31 OutputIterator out) {
37   - string orth = charsetConverter.toString(interpretedChunk.originalCodepoints);
  32 + string orth = env.getCharsetConverter().toString(interpretedChunk.originalCodepoints);
38 33 for (unsigned int i = 0; i < interpretedChunk.interpsGroup.interps.size(); i++) {
39 34 const EncodedInterpretation& ei = interpretedChunk.interpsGroup.interps[i];
40 35 string lemma = convertLemma(
... ... @@ -45,8 +40,8 @@ public:
45 40 orth, lemma,
46 41 ei.tag,
47 42 ei.nameClassifier,
48   - tagset,
49   - charsetConverter);
  43 + env.getAnalyzerTagset(),
  44 + env.getCharsetConverter());
50 45 ++out;
51 46 }
52 47 return out;
... ... @@ -61,28 +56,20 @@ private:
61 56 for (unsigned int i = 0; i < orth.size() - lemma.suffixToCut; i++) {
62 57 uint32_t cp =
63 58 (i < lemma.casePattern.size() && lemma.casePattern[i])
64   - ? this->caseConverter.toTitle(orth[i])
  59 + ? env.getCaseConverter().toTitle(orth[i])
65 60 : orth[i];
66   - charsetConverter.append(cp, res);
  61 + env.getCharsetConverter().append(cp, res);
67 62 }
68 63 const char* suffixPtr = lemma.suffixToAdd.c_str();
69 64 const char* suffixEnd = suffixPtr + lemma.suffixToAdd.length();
70 65 while (suffixPtr != suffixEnd) {
71   - uint32_t cp = utf8CharsetConverter.next(suffixPtr, suffixEnd);
72   - charsetConverter.append(cp, res);
  66 + uint32_t cp = UTF8CharsetConverter().next(suffixPtr, suffixEnd);
  67 + env.getCharsetConverter().append(cp, res);
73 68 }
74   - // string res(orth);
75   - // res.erase(
76   - // res.end() - lemma.suffixToCut,
77   - // res.end());
78   - // res.append(lemma.suffixToAdd);
79 69 return res;
80 70 }
81 71  
82   - const Tagset& tagset;
83   - const CharsetConverter& charsetConverter;
84   - const UTF8CharsetConverter utf8CharsetConverter;
85   - const CaseConverter& caseConverter;
  72 + const Environment& env;
86 73 };
87 74  
88 75 #endif /* INTERPSGROUPDECODER_HPP */
... ...
morfeusz/Morfeusz.cpp
... ... @@ -12,6 +12,7 @@
12 12 #include "data/default_fsa.hpp"
13 13 #include "Morfeusz.hpp"
14 14 #include "MorphDeserializer.hpp"
  15 +#include "GeneratorDeserializer.hpp"
15 16 #include "InterpretedChunksDecoder.hpp"
16 17 #include "charset/CharsetConverter.hpp"
17 18 #include "charset/charset_utils.hpp"
... ... @@ -22,56 +23,61 @@
22 23  
23 24 using namespace std;
24 25  
25   -static Deserializer<vector<InterpsGroup> >* initializeDeserializer() {
  26 +static Deserializer<vector<InterpsGroup> >* initializeAnalyzerDeserializer() {
26 27 static Deserializer < vector < InterpsGroup > > *deserializer
27 28 = new MorphDeserializer();
28 29 return deserializer;
29 30 }
30 31  
31   -static FSA<vector<InterpsGroup > > *initializeFSA(const string& filename) {
  32 +static FSA<vector<InterpsGroup > > *initializeAnalyzerFSA(const string& filename) {
32 33 cerr << "initialize FSA" << endl;
33   - return FSA < vector < InterpsGroup > > ::getFSA(filename, *initializeDeserializer());
34   -}
35   -
36   -static CharsetConverter* getCharsetConverter(MorfeuszCharset charset) {
37   - cerr << "initialize charset converter for " << charset << endl;
38   - static CharsetConverter* utf8Converter = new UTF8CharsetConverter();
39   -// static CharsetConverter* utf16LEConverter = new UTF16CharsetConverter(UTF16CharsetConverter::UTF16CharsetConverter::LE);
40   -// static CharsetConverter* utf16BEConverter = new UTF16CharsetConverter(UTF16CharsetConverter::Endianness::BE);
41   - static CharsetConverter* iso8859_2Converter = new ISO8859_2_CharsetConverter();
42   - static CharsetConverter* windows1250Converter = new Windows_1250_CharsetConverter();
43   - static CharsetConverter* cp852Converter = new CP852_CharsetConverter();
44   - switch (charset) {
45   - case UTF8:
46   - return utf8Converter;
47   - case ISO8859_2:
48   - return iso8859_2Converter;
49   - case CP1250:
50   - return windows1250Converter;
51   - case CP852:
52   - return cp852Converter;
53   - default:
54   - throw MorfeuszException("invalid charset");
55   - }
56   -}
57   -
58   -static Tagset* initializeTagset(const string& filename) {
59   - cerr << "initialize tagset" << endl;
60   - static Tagset* tagset = new Tagset(readFile<unsigned char>(filename.c_str()));
61   - return tagset;
62   -}
63   -
64   -static Tagset* initializeTagset(const unsigned char* data) {
65   - cerr << "initialize tagset" << endl;
66   - static Tagset* tagset = new Tagset(data);
67   - return tagset;
68   -}
69   -
70   -static CaseConverter* initializeCaseConverter() {
71   - cerr << "initialize case converter" << endl;
72   - static CaseConverter* cc = new CaseConverter();
73   - return cc;
74   -}
  34 + return FSA < vector < InterpsGroup > > ::getFSA(filename, *initializeAnalyzerDeserializer());
  35 +}
  36 +
  37 +//static FSA<vector<MorphInterpretation > > *initializeSynthFSA(const string& filename, const SynthDeserializer& deserializer) {
  38 +// cerr << "initialize synth FSA" << endl;
  39 +// return FSA < vector < EncodedGeneratorInterpretation > > ::getFSA(filename, deserializer);
  40 +//}
  41 +//
  42 +//static CharsetConverter* getCharsetConverter(MorfeuszCharset charset) {
  43 +// cerr << "initialize charset converter for " << charset << endl;
  44 +// static CharsetConverter* utf8Converter = new UTF8CharsetConverter();
  45 +//// static CharsetConverter* utf16LEConverter = new UTF16CharsetConverter(UTF16CharsetConverter::UTF16CharsetConverter::LE);
  46 +//// static CharsetConverter* utf16BEConverter = new UTF16CharsetConverter(UTF16CharsetConverter::Endianness::BE);
  47 +// static CharsetConverter* iso8859_2Converter = new ISO8859_2_CharsetConverter();
  48 +// static CharsetConverter* windows1250Converter = new Windows_1250_CharsetConverter();
  49 +// static CharsetConverter* cp852Converter = new CP852_CharsetConverter();
  50 +// switch (charset) {
  51 +// case UTF8:
  52 +// return utf8Converter;
  53 +// case ISO8859_2:
  54 +// return iso8859_2Converter;
  55 +// case CP1250:
  56 +// return windows1250Converter;
  57 +// case CP852:
  58 +// return cp852Converter;
  59 +// default:
  60 +// throw MorfeuszException("invalid charset");
  61 +// }
  62 +//}
  63 +//
  64 +//static Tagset* initializeTagset(const string& filename) {
  65 +// cerr << "initialize tagset" << endl;
  66 +// static Tagset* tagset = new Tagset(readFile<unsigned char>(filename.c_str()));
  67 +// return tagset;
  68 +//}
  69 +//
  70 +//static Tagset* initializeTagset(const unsigned char* data) {
  71 +// cerr << "initialize tagset" << endl;
  72 +// static Tagset* tagset = new Tagset(data);
  73 +// return tagset;
  74 +//}
  75 +//
  76 +//static CaseConverter* initializeCaseConverter() {
  77 +// cerr << "initialize case converter" << endl;
  78 +// static CaseConverter* cc = new CaseConverter();
  79 +// return cc;
  80 +//}
75 81  
76 82 static MorfeuszOptions createDefaultOptions() {
77 83 MorfeuszOptions res;
... ... @@ -81,44 +87,44 @@ static MorfeuszOptions createDefaultOptions() {
81 87 }
82 88  
83 89 Morfeusz::Morfeusz()
84   -: fsa(FSAType::getFSA(DEFAULT_FSA, *initializeDeserializer())),
85   -charsetConverter(getCharsetConverter(DEFAULT_MORFEUSZ_CHARSET)),
86   -tagset(initializeTagset(DEFAULT_FSA)),
87   -caseConverter(initializeCaseConverter()),
  90 +: env(Tagset(DEFAULT_FSA), Tagset(DEFAULT_SYNTH_FSA), DEFAULT_MORFEUSZ_CHARSET),
  91 +analyzerFSA(FSAType::getFSA(DEFAULT_FSA, *initializeAnalyzerDeserializer())),
  92 +isAnalyzerFSAFromFile(false),
  93 +generator(DEFAULT_SYNTH_FSA, env),
88 94 options(createDefaultOptions()) {
89 95  
90 96 }
91 97  
92   -Morfeusz::Morfeusz(const string& filename)
93   -: fsa(initializeFSA(filename)),
94   -charsetConverter(getCharsetConverter(DEFAULT_MORFEUSZ_CHARSET)),
95   -tagset(initializeTagset(filename)),
96   -caseConverter(initializeCaseConverter()),
97   -options(createDefaultOptions()) {
98   -
  98 +void Morfeusz::setAnalyzerFile(const string& filename) {
  99 + if (this->isAnalyzerFSAFromFile) {
  100 + delete this->analyzerFSA;
  101 + }
  102 + this->analyzerFSA = initializeAnalyzerFSA(filename);
  103 + this->isAnalyzerFSAFromFile = true;
99 104 }
100 105  
101 106 Morfeusz::~Morfeusz() {
102   - // delete &this->fsa;
103   - // delete &this->charsetConverter;
  107 + if (this->isAnalyzerFSAFromFile) {
  108 + delete this->analyzerFSA;
  109 + }
104 110 }
105 111  
106   -void Morfeusz::processOneWord(
  112 +void Morfeusz::analyzeOneWord(
107 113 const char*& inputData,
108 114 const char* inputEnd,
109 115 int startNodeNum,
110 116 std::vector<MorphInterpretation>& results) const {
111 117 while (inputData != inputEnd
112   - && isEndOfWord(this->charsetConverter->peek(inputData, inputEnd))) {
113   - this->charsetConverter->next(inputData, inputEnd);
  118 + && isEndOfWord(this->env.getCharsetConverter().peek(inputData, inputEnd))) {
  119 + this->env.getCharsetConverter().next(inputData, inputEnd);
114 120 }
115 121 const char* wordStart = inputData;
116 122 vector<InterpretedChunk> accum;
117 123 FlexionGraph graph;
118 124 const char* currInput = inputData;
119   - doProcessOneWord(currInput, inputEnd, accum, graph);
  125 + doAnalyzeOneWord(currInput, inputEnd, accum, graph);
120 126 if (!graph.empty()) {
121   - InterpretedChunksDecoder interpretedChunksDecoder(*tagset, *charsetConverter, *caseConverter);
  127 + InterpretedChunksDecoder interpretedChunksDecoder(env);
122 128 int srcNode = startNodeNum;
123 129 for (unsigned int i = 0; i < graph.getTheGraph().size(); i++) {
124 130 vector<FlexionGraph::Edge>& edges = graph.getTheGraph()[i];
... ... @@ -136,25 +142,25 @@ void Morfeusz::processOneWord(
136 142 inputData = currInput;
137 143 }
138 144  
139   -void Morfeusz::doProcessOneWord(
  145 +void Morfeusz::doAnalyzeOneWord(
140 146 const char*& inputData,
141 147 const char* inputEnd,
142 148 vector<InterpretedChunk>& accum,
143 149 FlexionGraph& graph) const {
144 150 bool endOfWord = inputData == inputEnd;
145 151 const char* currInput = inputData;
146   - uint32_t codepoint = endOfWord ? 0 : this->charsetConverter->next(currInput, inputEnd);
  152 + uint32_t codepoint = endOfWord ? 0 : this->env.getCharsetConverter().next(currInput, inputEnd);
147 153 // UnicodeChunk uchunk(*(this->charsetConverter), *(this->caseConverter));
148 154 vector<uint32_t> originalCodepoints;
149 155 vector<uint32_t> lowercaseCodepoints;
150 156  
151   - StateType state = this->fsa->getInitialState();
  157 + StateType state = this->analyzerFSA->getInitialState();
152 158  
153 159 while (!isEndOfWord(codepoint)) {
154   - uint32_t lowerCP = this->caseConverter->toLower(codepoint);
  160 + uint32_t lowerCP = this->env.getCaseConverter().toLower(codepoint);
155 161 originalCodepoints.push_back(codepoint);
156 162 lowercaseCodepoints.push_back(lowerCP);
157   - this->feedState(state, lowerCP);
  163 + feedState(state, lowerCP, UTF8CharsetConverter());
158 164 if (state.isAccepting()) {
159 165 vector< InterpsGroup > val(state.getValue());
160 166 for (unsigned int i = 0; i < val.size(); i++) {
... ... @@ -162,13 +168,13 @@ void Morfeusz::doProcessOneWord(
162 168 InterpretedChunk ic = {inputData, originalCodepoints, lowercaseCodepoints, ig};
163 169 accum.push_back(ic);
164 170 const char* newCurrInput = currInput;
165   - doProcessOneWord(newCurrInput, inputEnd, accum, graph);
  171 + doAnalyzeOneWord(newCurrInput, inputEnd, accum, graph);
166 172 accum.pop_back();
167 173 }
168 174 }
169   - codepoint = currInput == inputEnd ? 0 : this->charsetConverter->peek(currInput, inputEnd);
  175 + codepoint = currInput == inputEnd ? 0 : this->env.getCharsetConverter().peek(currInput, inputEnd);
170 176 if (!isEndOfWord(codepoint)) {
171   - this->charsetConverter->next(currInput, inputEnd);
  177 + this->env.getCharsetConverter().next(currInput, inputEnd);
172 178 }
173 179 }
174 180 if (state.isAccepting()) {
... ... @@ -184,28 +190,20 @@ void Morfeusz::doProcessOneWord(
184 190 inputData = currInput;
185 191 }
186 192  
187   -void Morfeusz::feedState(
188   - StateType& state,
189   - int codepoint) const {
190   - string chars;
191   - this->utf8CharsetConverter.append(codepoint, chars);
192   - for (unsigned int i = 0; i < chars.length(); i++) {
193   - state.proceedToNext(chars[i]);
194   - }
195   -}
196   -
197 193 void Morfeusz::appendIgnotiumToResults(
198 194 const string& word,
199 195 int startNodeNum,
200 196 std::vector<MorphInterpretation>& results) const {
201   - MorphInterpretation interp = MorphInterpretation::createIgn(startNodeNum, word, *this->tagset, *this->charsetConverter);
  197 + MorphInterpretation interp = MorphInterpretation::createIgn(startNodeNum, word, env.getAnalyzerTagset(), env.getCharsetConverter());
202 198 results.push_back(interp);
203 199 }
204 200  
205 201 ResultsIterator Morfeusz::analyze(const string& text) const {
206 202 // const char* textStart = text.c_str();
207 203 // const char* textEnd = text.c_str() + text.length();
208   - return ResultsIterator(text, *this);
  204 + vector<MorphInterpretation> res;
  205 + this->analyze(text, res);
  206 + return ResultsIterator(res);
209 207 }
210 208  
211 209 void Morfeusz::analyze(const string& text, vector<MorphInterpretation>& results) const {
... ... @@ -213,21 +211,28 @@ void Morfeusz::analyze(const string&amp; text, vector&lt;MorphInterpretation&gt;&amp; results)
213 211 const char* inputEnd = input + text.length();
214 212 while (input != inputEnd) {
215 213 int startNode = results.empty() ? 0 : results.back().getEndNode();
216   - DEBUG("process " + string(input, inputEnd));
217   - this->processOneWord(input, inputEnd, startNode, results);
  214 + this->analyzeOneWord(input, inputEnd, startNode, results);
218 215 }
219 216 }
220 217  
221   -void Morfeusz::setEncoding(MorfeuszCharset encoding) {
222   - this->options.encoding = encoding;
223   - this->charsetConverter = getCharsetConverter(encoding);
  218 +ResultsIterator Morfeusz::generate(const string& text) const {
  219 + // const char* textStart = text.c_str();
  220 + // const char* textEnd = text.c_str() + text.length();
  221 + vector<MorphInterpretation> res;
  222 + this->generate(text, res);
  223 + return ResultsIterator(res);
  224 +}
  225 +
  226 +void Morfeusz::generate(const string& text, vector<MorphInterpretation>& results) const {
  227 + this->generator.generate(text, results);
224 228 }
225 229  
226   -ResultsIterator::ResultsIterator(const string& text, const Morfeusz& morfeusz)
227   -: rawInput(text.c_str()),
228   -morfeusz(morfeusz) {
229   - vector<MorphInterpretation> res;
230   - morfeusz.analyze(text, res);
  230 +void Morfeusz::setCharset(MorfeuszCharset charset) {
  231 + this->options.encoding = charset;
  232 + this->env.setCharset(charset);
  233 +}
  234 +
  235 +ResultsIterator::ResultsIterator(vector<MorphInterpretation>& res) {
231 236 resultsBuffer.insert(resultsBuffer.begin(), res.begin(), res.end());
232 237 }
233 238  
... ...
morfeusz/Morfeusz.hpp
... ... @@ -21,71 +21,61 @@
21 21 #include "FlexionGraph.hpp"
22 22 #include "MorfeuszOptions.hpp"
23 23 #include "const.hpp"
  24 +#include "exceptions.hpp"
  25 +#include "Generator.hpp"
  26 +#include "Environment.hpp"
24 27  
25 28 class Morfeusz;
26 29 class ResultsIterator;
27 30  
28   -typedef FSA<std::vector<InterpsGroup > > FSAType;
29   -typedef State<std::vector<InterpsGroup > > StateType;
30   -
31   -class MorfeuszException : public std::exception {
32   -public:
33   -
34   - MorfeuszException(const std::string& what) : msg(what.c_str()) {
35   - }
36   -
37   - virtual ~MorfeuszException() throw () {
38   - }
39   -
40   - virtual const char* what() const throw () {
41   - return this->msg.c_str();
42   - }
43   -private:
44   - const std::string msg;
45   -};
  31 +typedef FSA< std::vector<InterpsGroup > > FSAType;
  32 +typedef State< std::vector<InterpsGroup > > StateType;
46 33  
47 34 class Morfeusz {
48 35 public:
49 36 Morfeusz();
50   - explicit Morfeusz(const std::string& filename);
  37 + // explicit Morfeusz(const std::string& filename);
  38 + void setAnalyzerFile(const std::string& filename);
  39 + void setSynthesizerFile(const std::string& filename);
51 40 virtual ~Morfeusz();
52 41 // Morfeusz(const Morfeusz& orig);
53 42 ResultsIterator analyze(const std::string& text) const;
54 43 void analyze(const std::string& text, std::vector<MorphInterpretation>& result) const;
55 44  
56   - void setEncoding(MorfeuszCharset encoding);
  45 + void generate(const std::string& lemma, std::vector<MorphInterpretation>& result) const;
  46 + ResultsIterator generate(const std::string& lemma) const;
  47 +
  48 + void setCharset(MorfeuszCharset encoding);
57 49  
58 50 // Morfeusz();
59 51 friend class ResultsIterator;
60 52 private:
61 53  
62   - void processOneWord(
  54 + void analyzeOneWord(
63 55 const char*& inputData,
64 56 const char* inputEnd,
65 57 int startNodeNum,
66 58 std::vector<MorphInterpretation>& result) const;
67 59  
68   - void doProcessOneWord(
  60 + void doAnalyzeOneWord(
69 61 const char*& inputData,
70 62 const char* inputEnd,
71 63 std::vector<InterpretedChunk>& accum,
72 64 FlexionGraph& graph) const;
73 65  
74   - void feedState(
75   - StateType& state,
76   - int codepoint) const;
77   -
78 66 void appendIgnotiumToResults(
79 67 const std::string& word,
80 68 int startNodeNum,
81 69 std::vector<MorphInterpretation>& results) const;
82   -
83   - FSAType* fsa;
84   - CharsetConverter* charsetConverter;
85   - Tagset* tagset;
86   - CaseConverter* caseConverter;
87   -
88   - UTF8CharsetConverter utf8CharsetConverter;
  70 + Environment env;
  71 + FSAType* analyzerFSA;
  72 + bool isAnalyzerFSAFromFile;
  73 + Generator generator;
  74 +// const CharsetConverter* charsetConverter;
  75 +// const Tagset* tagset;
  76 +// const CaseConverter* caseConverter;
  77 +//
  78 +// UTF8CharsetConverter utf8CharsetConverter;
89 79  
90 80 MorfeuszOptions options;
91 81 };
... ... @@ -96,9 +86,8 @@ public:
96 86 bool hasNext();
97 87 friend class Morfeusz;
98 88 private:
99   - ResultsIterator(const std::string& text, const Morfeusz& morfeusz);
  89 + ResultsIterator(vector<MorphInterpretation>& res);
100 90 const char* rawInput;
101   - const Morfeusz& morfeusz;
102 91 std::list<MorphInterpretation> resultsBuffer;
103 92 int startNode;
104 93 };
... ...
morfeusz/Tagset.cpp
... ... @@ -36,6 +36,11 @@ Tagset::Tagset(const unsigned char* fsaData) {
36 36 readTags(currPtr, this->names);
37 37 }
38 38  
  39 +//Tagset::Tagset(const Tagset& tagset)
  40 +//: tags(tagset.tags), names(tagset.names) {
  41 +//
  42 +//}
  43 +
39 44 const string Tagset::getTag(const int tagNum, const CharsetConverter& charsetConverter) const {
40 45 return charsetConverter.fromUTF8(this->tags.at(tagNum));
41 46 }
... ...
morfeusz/Tagset.hpp
... ... @@ -15,6 +15,7 @@
15 15 class Tagset {
16 16 public:
17 17 explicit Tagset(const unsigned char* fsaData);
  18 +// Tagset(const Tagset& tagset);
18 19 const std::string getTag(const int tagNum, const CharsetConverter& charsetConverter) const;
19 20 const std::string getName(const int nameNum, const CharsetConverter& charsetConverter) const;
20 21 private:
... ...
morfeusz/charset/charset_utils.hpp
... ... @@ -8,7 +8,9 @@
8 8 #ifndef CHARSET_UTILS_HPP
9 9 #define CHARSET_UTILS_HPP
10 10  
  11 +#include <string>
11 12 #include <set>
  13 +#include "CharsetConverter.hpp"
12 14  
13 15 static inline std::set<int> initializeWhitespaces() {
14 16 std::set<int> res;
... ... @@ -18,10 +20,22 @@ static inline std::set&lt;int&gt; initializeWhitespaces() {
18 20 return res;
19 21 }
20 22  
21   -bool isEndOfWord(int codepoint) {
  23 +inline bool isEndOfWord(int codepoint) {
22 24 static std::set<int> whitespaces(initializeWhitespaces());
23 25 return whitespaces.count(codepoint);
24 26 }
25 27  
  28 +template <class StateClass>
  29 +void feedState(
  30 + StateClass& state,
  31 + int codepoint,
  32 + const CharsetConverter& charsetConverter) {
  33 + std::string chars;
  34 + charsetConverter.append(codepoint, chars);
  35 + for (unsigned int i = 0; i < chars.length(); i++) {
  36 + state.proceedToNext(chars[i]);
  37 + }
  38 +}
  39 +
26 40 #endif /* CHARSET_UTILS_HPP */
27 41  
... ...
morfeusz/data/default_fsa.hpp
... ... @@ -9,6 +9,7 @@
9 9 #define DEFAULT_FSA_HPP
10 10  
11 11 extern const unsigned char DEFAULT_FSA[];
  12 +extern const unsigned char DEFAULT_SYNTH_FSA[];
12 13  
13 14 #endif /* DEFAULT_FSA_HPP */
14 15  
... ...
morfeusz/exceptions.hpp 0 → 100644
  1 +/*
  2 + * File: exceptions.hpp
  3 + * Author: mlenart
  4 + *
  5 + * Created on 22 styczeń 2014, 13:16
  6 + */
  7 +
  8 +#ifndef EXCEPTIONS_HPP
  9 +#define EXCEPTIONS_HPP
  10 +
  11 +class MorfeuszException : public std::exception {
  12 +public:
  13 +
  14 + MorfeuszException(const std::string& what) : msg(what.c_str()) {
  15 + }
  16 +
  17 + virtual ~MorfeuszException() throw () {
  18 + }
  19 +
  20 + virtual const char* what() const throw () {
  21 + return this->msg.c_str();
  22 + }
  23 +private:
  24 + const std::string msg;
  25 +};
  26 +
  27 +#endif /* EXCEPTIONS_HPP */
  28 +
... ...
morfeusz/fsa/cfsa1_impl.hpp
... ... @@ -118,10 +118,10 @@ void CompressedFSA1&lt;T&gt;::doProceedToNextByList(
118 118 currPtr += *currPtr + 1;
119 119 break;
120 120 case 2:
121   - currPtr += ntohs(*((uint16_t*) currPtr)) + 2;
  121 + currPtr += ntohs(*((const uint16_t*) currPtr)) + 2;
122 122 break;
123 123 case 3:
124   - currPtr += (((unsigned int) ntohs(*((uint16_t*) currPtr))) << 8) + currPtr[2] + 3;
  124 + currPtr += (((const unsigned int) ntohs(*((const uint16_t*) currPtr))) << 8) + currPtr[2] + 3;
125 125 break;
126 126 }
127 127 // cerr << "FOUND " << c << " " << currPtr - this->startPtr << endl;
... ...
morfeusz/fsa/fsa_impl.hpp
... ... @@ -64,7 +64,7 @@ FSA&lt;T&gt;* FSA&lt;T&gt;::getFSA(const std::string&amp; filename, const Deserializer&lt;T&gt;&amp; deser
64 64 template <class T>
65 65 FSA<T>* FSA<T>::getFSA(const unsigned char* ptr, const Deserializer<T>& deserializer) {
66 66  
67   - uint32_t magicNumber = ntohl(*((uint32_t*) ptr));
  67 + uint32_t magicNumber = ntohl(*((const uint32_t*) ptr));
68 68 if (magicNumber != MAGIC_NUMBER) {
69 69 throw FSAException("Invalid magic number");
70 70 }
... ...
morfeusz/fsa/simplefsa_impl.hpp
... ... @@ -10,6 +10,8 @@
10 10  
11 11 #include <iostream>
12 12  
  13 +#include "fsa.hpp"
  14 +
13 15 //#pragma pack(push, 1) /* push current alignment to stack */
14 16  
15 17 struct StateData {
... ...
morfeusz/morfeusz.i
... ... @@ -12,6 +12,7 @@
12 12 %{
13 13 #include "Morfeusz.hpp"
14 14 #include "MorphInterpretation.hpp"
  15 +#include "exceptions.hpp"
15 16 #include "const.hpp"
16 17 %}
17 18  
... ... @@ -49,6 +50,7 @@
49 50 %include "Morfeusz.hpp"
50 51 %include "MorphInterpretation.hpp"
51 52 %include "const.hpp"
  53 +%include "exceptions.hpp"
52 54  
53 55 // instantiate vector of interpretations
54 56 namespace std {
... ...
morfeusz/main.cpp renamed to morfeusz/morfeusz_analyzer.cpp
... ... @@ -18,10 +18,10 @@ using namespace std;
18 18 int main(int argc, char** argv) {
19 19 Morfeusz morfeusz;
20 20 #ifdef _WIN32
21   - morfeusz.setEncoding(CP852);
  21 + morfeusz.setCharset(CP852);
22 22 #endif
23 23 #ifdef _WIN64
24   - morfeusz.setEncoding(CP852);
  24 + morfeusz.ssetCharsetCP852);
25 25 #endif
26 26 string line;
27 27 while (getline(cin, line)) {
... ... @@ -51,5 +51,3 @@ int main(int argc, char** argv) {
51 51 printf("\n");
52 52 return 0;
53 53 }
54   -
55   -
... ...
morfeusz/morfeusz_generator.cpp 0 → 100644
  1 +/*
  2 + * File: morfeusz_generator.cpp
  3 + * Author: mlenart
  4 + *
  5 + * Created on 21 styczeń 2014, 12:02
  6 + */
  7 +
  8 +#include <cstdlib>
  9 +#include <iostream>
  10 +#include <vector>
  11 +#include "fsa/fsa.hpp"
  12 +#include "Tagset.hpp"
  13 +#include "Morfeusz.hpp"
  14 +#include "const.hpp"
  15 +
  16 +using namespace std;
  17 +
  18 +int main(int argc, char** argv) {
  19 + Morfeusz morfeusz;
  20 +#ifdef _WIN32
  21 + morfeusz.setCharset(CP852);
  22 +#endif
  23 +#ifdef _WIN64
  24 + morfeusz.ssetCharsetCP852);
  25 +#endif
  26 + string line;
  27 + while (getline(cin, line)) {
  28 + // printf("%s\n", line.c_str());
  29 + vector<MorphInterpretation> res;
  30 + morfeusz.generate(line, res);
  31 + printf("[");
  32 + for (unsigned int i = 0; i < res.size(); i++) {
  33 + if (i > 0) {
  34 + printf("; ");
  35 + }
  36 + MorphInterpretation& mi = res[i];
  37 + printf("%s,%s,%s,%s",
  38 + mi.getOrth().c_str(), mi.getLemma().c_str(),
  39 + mi.getTag().c_str(), mi.getName().c_str());
  40 + }
  41 + printf("]\n");
  42 + }
  43 + printf("\n");
  44 + return 0;
  45 +}
... ...
morfeusz/test_recognize_dict.cpp
... ... @@ -16,53 +16,12 @@
16 16  
17 17 using namespace std;
18 18  
19   -//void doTest(
20   -// const FSA<vector<InterpsGroup >> &fsa,
21   -// const Tagset& tagset,
22   -// // const InterpretationsDecoder<TaggedInterpretation>& interpsConverter,
23   -// const char* fname) {
24   -// ifstream ifs;
25   -// // ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
26   -// ifs.open(fname, ios::binary);
27   -// string line;
28   -// while (getline(ifs, line)) {
29   -// vector<string> splitVector(split(line, '\t'));
30   -// string orth = splitVector[0];
31   -// string lemma = splitVector[1];
32   -// string tag = splitVector[2];
33   -// string name = splitVector[3];
34   -// vector<InterpsGroup> value2;
35   -// fsa.tryToRecognize(orth.c_str(), value2);
36   -// DEBUG("recognized " + to_string(value2.size()));
37   -// // vector<TaggedInterpretation> parsedValues;
38   -// bool found = false;
39   -//
40   -// for (InterpsGroup ig : value2)
41   -// for (MorphInterpretation interp : ig.getRealInterps(orth, 0, 0, tagset)) {
42   -// // TaggedInterpretation parsedValue = interpsConverter.getInterpretation(key, interp);
43   -// // (0, 0, orth, encodedInterp, tagset);
44   -// // parsedValues.push_back(parsedValue);
45   -// // debug(orth, parsedValue);
46   -// if (lemma == interp.getLemma() && tag == interp.getTag() && name == interp.getName()) {
47   -// DEBUG("RECOGNIZED");
48   -// found = true;
49   -// }
50   -// else {
51   -// DEBUG("not matching " + interp.getLemma() + " " + interp.getTag() + " " + interp.getName());
52   -// }
53   -// }
54   -// validate(found, "Failed to recognize " + orth + " " + lemma + ":" + tag + ":" + name);
55   -// // debug(key, value2);
56   -// // validate(fsa.tryToRecognize(key.c_str(), value2), "Failed to recognize " + key);
57   -// }
58   -// validate(ifs.eof(), "Failed to read the input file to the end");
59   -//}
60   -
61 19 int main(int argc, char** argv) {
62 20 validate(argc == 3, "Must provide exactly 2 arguments - input FSA filename and dictionary filename.");
63 21 string fsaFilename = argv[1];
64 22 string dictFilename = argv[2];
65   - Morfeusz morfeusz(fsaFilename);
  23 + Morfeusz morfeusz;
  24 + morfeusz.setAnalyzerFile(fsaFilename);
66 25 ifstream in;
67 26 in.open(dictFilename.c_str());
68 27 string line;
... ...
morfeusz/test_result_equals.cpp
... ... @@ -48,7 +48,7 @@ int main(int argc, char** argv) {
48 48 Morfeusz morfeusz;
49 49 if (argc == 4) {
50 50 MorfeuszCharset encoding = getEncoding(argv[3]);
51   - morfeusz.setEncoding(encoding);
  51 + morfeusz.setCharset(encoding);
52 52 }
53 53 string line;
54 54 while (getline(in, line)) {
... ...
morfeusz/test_synth_dict.cpp 0 → 100644
  1 +/*
  2 + * File: test_synth_dict.cpp
  3 + * Author: mlenart
  4 + *
  5 + * Created on 21 styczeń 2014, 12:00
  6 + */
  7 +
  8 +#include <cstdlib>
  9 +
  10 +using namespace std;
  11 +
  12 +/*
  13 + *
  14 + */
  15 +int main(int argc, char** argv) {
  16 +
  17 + return 0;
  18 +}
  19 +
... ...
morfeusz/utils.hpp
... ... @@ -81,7 +81,8 @@ void appendMorfeuszResults(const std::vector&lt;MorphInterpretation&gt;&amp; res, OutputSt
81 81 if (prevStart != -1
82 82 && (prevStart != mi.getStartNode() || prevEnd != mi.getEndNode())) {
83 83 out << "]\n[";
84   - } else if (prevStart != -1) {
  84 + }
  85 + else if (prevStart != -1) {
85 86 out << "; ";
86 87 }
87 88 out << mi.getStartNode() << ","
... ...
nbproject/configurations.xml
1 1 <?xml version="1.0" encoding="UTF-8"?>
2 2 <configurationDescriptor version="90">
3 3 <logicalFolder name="root" displayName="root" projectFiles="true" kind="ROOT">
  4 + <logicalFolder name="build"
  5 + displayName="build"
  6 + projectFiles="true"
  7 + root="build">
  8 + <itemPath>build/default_fsa.cpp</itemPath>
  9 + <itemPath>build/default_synth_fsa.cpp</itemPath>
  10 + </logicalFolder>
4 11 <logicalFolder name="f1" displayName="input" projectFiles="true">
5 12 </logicalFolder>
6 13 <df root="morfeusz" name="0">
... ... @@ -19,21 +26,21 @@
19 26 <in>test_recognize.cpp</in>
20 27 <in>test_speed.cpp</in>
21 28 </df>
22   - <df name="generator">
23   - <in>EncodedGeneratorInterpretation.hpp</in>
24   - <in>GeneratorDeserializer.cpp</in>
25   - <in>GeneratorDeserializer.hpp</in>
26   - </df>
  29 + <in>Environment.cpp</in>
27 30 <in>FlexionGraph.cpp</in>
  31 + <in>Generator.cpp</in>
  32 + <in>GeneratorDeserializer.cpp</in>
28 33 <in>Morfeusz.cpp</in>
29 34 <in>MorphDeserializer.cpp</in>
30 35 <in>MorphInterpretation.cpp</in>
31 36 <in>Tagset.cpp</in>
32   - <in>Toolchain-Linux-amd64.cmake</in>
33 37 <in>const.cpp</in>
34   - <in>main.cpp</in>
  38 + <in>exceptions.hpp</in>
  39 + <in>morfeusz_analyzer.cpp</in>
  40 + <in>morfeusz_generator.cpp</in>
35 41 <in>test_recognize_dict.cpp</in>
36 42 <in>test_result_equals.cpp</in>
  43 + <in>test_synth_dict.cpp</in>
37 44 </df>
38 45 <logicalFolder name="morfeusz"
39 46 displayName="morfeusz"
... ... @@ -76,9 +83,17 @@
76 83 <buildCommandWorkingDir>build</buildCommandWorkingDir>
77 84 <buildCommand>${MAKE} -f Makefile</buildCommand>
78 85 <cleanCommand>${MAKE} -f Makefile clean</cleanCommand>
79   - <executablePath>build/morfeusz/test_result_equals</executablePath>
  86 + <executablePath>build/morfeusz/morfeusz_generator</executablePath>
80 87 </makeTool>
81 88 </makefileType>
  89 + <item path="build/default_fsa.cpp" ex="false" tool="1" flavor2="4">
  90 + <ccTool>
  91 + </ccTool>
  92 + </item>
  93 + <item path="build/default_synth_fsa.cpp" ex="false" tool="1" flavor2="4">
  94 + <ccTool>
  95 + </ccTool>
  96 + </item>
82 97 <item path="build/morfeusz/java/swigJAVA.cpp" ex="false" tool="1" flavor2="4">
83 98 <ccTool>
84 99 </ccTool>
... ... @@ -94,8 +109,13 @@
94 109 <pElem>build/morfeusz/java</pElem>
95 110 </incDir>
96 111 <preprocessorList>
  112 + <Elem>NDEBUG</Elem>
  113 + <Elem>_OPTIMIZE__=1</Elem>
97 114 <Elem>jmorfeusz_EXPORTS</Elem>
98 115 </preprocessorList>
  116 + <undefinedList>
  117 + <Elem>__NO_INLINE__</Elem>
  118 + </undefinedList>
99 119 </ccTool>
100 120 </item>
101 121 <item path="build/morfeusz/morfeuszPYTHON_wrap.cxx"
... ... @@ -109,8 +129,13 @@
109 129 <pElem>build/morfeusz/python</pElem>
110 130 </incDir>
111 131 <preprocessorList>
  132 + <Elem>NDEBUG</Elem>
  133 + <Elem>_OPTIMIZE__=1</Elem>
112 134 <Elem>_morfeusz_EXPORTS</Elem>
113 135 </preprocessorList>
  136 + <undefinedList>
  137 + <Elem>__NO_INLINE__</Elem>
  138 + </undefinedList>
114 139 </ccTool>
115 140 </item>
116 141 <item path="build/morfeusz/python/swigPYTHON.cpp"
... ... @@ -124,18 +149,16 @@
124 149 <ccTool>
125 150 <incDir>
126 151 <pElem>build</pElem>
  152 + <pElem>morfeusz</pElem>
127 153 <pElem>build/morfeusz</pElem>
128 154 </incDir>
129 155 <preprocessorList>
130   - <Elem>NDEBUG</Elem>
131   - <Elem>_OPTIMIZE__=1</Elem>
132 156 <Elem>__PIC__=2</Elem>
133 157 <Elem>__pic__=2</Elem>
134 158 <Elem>libmorfeusz_EXPORTS</Elem>
135 159 </preprocessorList>
136 160 <undefinedList>
137 161 <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
138   - <Elem>__NO_INLINE__</Elem>
139 162 </undefinedList>
140 163 </ccTool>
141 164 </folder>
... ... @@ -158,28 +181,42 @@
158 181 </undefinedList>
159 182 </ccTool>
160 183 </folder>
  184 + <folder path="build">
  185 + <ccTool>
  186 + <incDir>
  187 + <pElem>build</pElem>
  188 + <pElem>morfeusz</pElem>
  189 + <pElem>build/morfeusz</pElem>
  190 + </incDir>
  191 + <preprocessorList>
  192 + <Elem>__PIC__=2</Elem>
  193 + <Elem>__pic__=2</Elem>
  194 + <Elem>libmorfeusz_EXPORTS</Elem>
  195 + </preprocessorList>
  196 + <undefinedList>
  197 + <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
  198 + </undefinedList>
  199 + </ccTool>
  200 + </folder>
161 201 <folder path="morfeusz">
162 202 <ccTool>
163 203 <incDir>
164 204 <pElem>build</pElem>
165 205 </incDir>
166 206 <preprocessorList>
167   - <Elem>NDEBUG</Elem>
168   - <Elem>_OPTIMIZE__=1</Elem>
169 207 <Elem>__PIC__=2</Elem>
170 208 <Elem>__pic__=2</Elem>
171 209 </preprocessorList>
172 210 <undefinedList>
173 211 <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
174   - <Elem>__NO_INLINE__</Elem>
175 212 </undefinedList>
176 213 </ccTool>
177 214 </folder>
178 215 <folder path="morfeusz/java">
179 216 <ccTool>
180 217 <incDir>
181   - <pElem>/usr/lib/jvm/default-java/include</pElem>
182 218 <pElem>morfeusz</pElem>
  219 + <pElem>/usr/lib/jvm/default-java/include</pElem>
183 220 </incDir>
184 221 <preprocessorList>
185 222 <Elem>jmorfeusz_EXPORTS</Elem>
... ... @@ -193,26 +230,80 @@
193 230 <pElem>morfeusz</pElem>
194 231 </incDir>
195 232 <preprocessorList>
  233 + <Elem>NDEBUG</Elem>
  234 + <Elem>_OPTIMIZE__=1</Elem>
196 235 <Elem>pymorfeusz_EXPORTS</Elem>
197 236 </preprocessorList>
  237 + <undefinedList>
  238 + <Elem>__NO_INLINE__</Elem>
  239 + </undefinedList>
198 240 </ccTool>
199 241 </folder>
  242 + <item path="morfeusz/Environment.cpp" ex="false" tool="1" flavor2="4">
  243 + <ccTool>
  244 + <incDir>
  245 + <pElem>build</pElem>
  246 + <pElem>morfeusz</pElem>
  247 + <pElem>build/morfeusz</pElem>
  248 + </incDir>
  249 + <preprocessorList>
  250 + <Elem>__PIC__=2</Elem>
  251 + <Elem>__pic__=2</Elem>
  252 + <Elem>libmorfeusz_EXPORTS</Elem>
  253 + </preprocessorList>
  254 + <undefinedList>
  255 + <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
  256 + </undefinedList>
  257 + </ccTool>
  258 + </item>
200 259 <item path="morfeusz/FlexionGraph.cpp" ex="false" tool="1" flavor2="4">
201 260 <ccTool>
202 261 <incDir>
203 262 <pElem>build</pElem>
  263 + <pElem>morfeusz</pElem>
  264 + <pElem>build/morfeusz</pElem>
  265 + </incDir>
  266 + <preprocessorList>
  267 + <Elem>__PIC__=2</Elem>
  268 + <Elem>__pic__=2</Elem>
  269 + <Elem>libmorfeusz_EXPORTS</Elem>
  270 + </preprocessorList>
  271 + <undefinedList>
  272 + <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
  273 + </undefinedList>
  274 + </ccTool>
  275 + </item>
  276 + <item path="morfeusz/Generator.cpp" ex="false" tool="1" flavor2="4">
  277 + <ccTool>
  278 + <incDir>
  279 + <pElem>build</pElem>
  280 + <pElem>morfeusz</pElem>
  281 + <pElem>build/morfeusz</pElem>
  282 + </incDir>
  283 + <preprocessorList>
  284 + <Elem>__PIC__=2</Elem>
  285 + <Elem>__pic__=2</Elem>
  286 + <Elem>libmorfeusz_EXPORTS</Elem>
  287 + </preprocessorList>
  288 + <undefinedList>
  289 + <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
  290 + </undefinedList>
  291 + </ccTool>
  292 + </item>
  293 + <item path="morfeusz/GeneratorDeserializer.cpp" ex="false" tool="1" flavor2="4">
  294 + <ccTool>
  295 + <incDir>
  296 + <pElem>build</pElem>
  297 + <pElem>morfeusz</pElem>
204 298 <pElem>build/morfeusz</pElem>
205 299 </incDir>
206 300 <preprocessorList>
207   - <Elem>NDEBUG</Elem>
208   - <Elem>_OPTIMIZE__=1</Elem>
209 301 <Elem>__PIC__=2</Elem>
210 302 <Elem>__pic__=2</Elem>
211 303 <Elem>libmorfeusz_EXPORTS</Elem>
212 304 </preprocessorList>
213 305 <undefinedList>
214 306 <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
215   - <Elem>__NO_INLINE__</Elem>
216 307 </undefinedList>
217 308 </ccTool>
218 309 </item>
... ... @@ -220,18 +311,16 @@
220 311 <ccTool>
221 312 <incDir>
222 313 <pElem>build</pElem>
  314 + <pElem>morfeusz</pElem>
223 315 <pElem>build/morfeusz</pElem>
224 316 </incDir>
225 317 <preprocessorList>
226   - <Elem>NDEBUG</Elem>
227   - <Elem>_OPTIMIZE__=1</Elem>
228 318 <Elem>__PIC__=2</Elem>
229 319 <Elem>__pic__=2</Elem>
230 320 <Elem>libmorfeusz_EXPORTS</Elem>
231 321 </preprocessorList>
232 322 <undefinedList>
233 323 <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
234   - <Elem>__NO_INLINE__</Elem>
235 324 </undefinedList>
236 325 </ccTool>
237 326 </item>
... ... @@ -239,18 +328,16 @@
239 328 <ccTool>
240 329 <incDir>
241 330 <pElem>build</pElem>
  331 + <pElem>morfeusz</pElem>
242 332 <pElem>build/morfeusz</pElem>
243 333 </incDir>
244 334 <preprocessorList>
245   - <Elem>NDEBUG</Elem>
246   - <Elem>_OPTIMIZE__=1</Elem>
247 335 <Elem>__PIC__=2</Elem>
248 336 <Elem>__pic__=2</Elem>
249 337 <Elem>libmorfeusz_EXPORTS</Elem>
250 338 </preprocessorList>
251 339 <undefinedList>
252 340 <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
253   - <Elem>__NO_INLINE__</Elem>
254 341 </undefinedList>
255 342 </ccTool>
256 343 </item>
... ... @@ -258,18 +345,16 @@
258 345 <ccTool>
259 346 <incDir>
260 347 <pElem>build</pElem>
  348 + <pElem>morfeusz</pElem>
261 349 <pElem>build/morfeusz</pElem>
262 350 </incDir>
263 351 <preprocessorList>
264   - <Elem>NDEBUG</Elem>
265   - <Elem>_OPTIMIZE__=1</Elem>
266 352 <Elem>__PIC__=2</Elem>
267 353 <Elem>__pic__=2</Elem>
268 354 <Elem>libmorfeusz_EXPORTS</Elem>
269 355 </preprocessorList>
270 356 <undefinedList>
271 357 <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
272   - <Elem>__NO_INLINE__</Elem>
273 358 </undefinedList>
274 359 </ccTool>
275 360 </item>
... ... @@ -277,26 +362,19 @@
277 362 <ccTool>
278 363 <incDir>
279 364 <pElem>build</pElem>
  365 + <pElem>morfeusz</pElem>
280 366 <pElem>build/morfeusz</pElem>
281 367 </incDir>
282 368 <preprocessorList>
283   - <Elem>NDEBUG</Elem>
284   - <Elem>_OPTIMIZE__=1</Elem>
285 369 <Elem>__PIC__=2</Elem>
286 370 <Elem>__pic__=2</Elem>
287 371 <Elem>libmorfeusz_EXPORTS</Elem>
288 372 </preprocessorList>
289 373 <undefinedList>
290 374 <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
291   - <Elem>__NO_INLINE__</Elem>
292 375 </undefinedList>
293 376 </ccTool>
294 377 </item>
295   - <item path="morfeusz/Toolchain-Linux-amd64.cmake"
296   - ex="false"
297   - tool="3"
298   - flavor2="0">
299   - </item>
300 378 <item path="morfeusz/charset/CaseConverter.cpp" ex="false" tool="1" flavor2="4">
301 379 <ccTool>
302 380 </ccTool>
... ... @@ -323,18 +401,16 @@
323 401 <ccTool>
324 402 <incDir>
325 403 <pElem>build</pElem>
  404 + <pElem>morfeusz</pElem>
326 405 <pElem>build/morfeusz</pElem>
327 406 </incDir>
328 407 <preprocessorList>
329   - <Elem>NDEBUG</Elem>
330   - <Elem>_OPTIMIZE__=1</Elem>
331 408 <Elem>__PIC__=2</Elem>
332 409 <Elem>__pic__=2</Elem>
333 410 <Elem>libmorfeusz_EXPORTS</Elem>
334 411 </preprocessorList>
335 412 <undefinedList>
336 413 <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
337   - <Elem>__NO_INLINE__</Elem>
338 414 </undefinedList>
339 415 </ccTool>
340 416 </item>
... ... @@ -342,22 +418,22 @@
342 418 <ccTool>
343 419 </ccTool>
344 420 </item>
  421 + <item path="morfeusz/exceptions.hpp" ex="false" tool="3" flavor2="0">
  422 + </item>
345 423 <item path="morfeusz/fsa/const.cpp" ex="false" tool="1" flavor2="4">
346 424 <ccTool>
347 425 <incDir>
348 426 <pElem>build</pElem>
  427 + <pElem>morfeusz</pElem>
349 428 <pElem>build/morfeusz</pElem>
350 429 </incDir>
351 430 <preprocessorList>
352   - <Elem>NDEBUG</Elem>
353   - <Elem>_OPTIMIZE__=1</Elem>
354 431 <Elem>__PIC__=2</Elem>
355 432 <Elem>__pic__=2</Elem>
356 433 <Elem>libmorfeusz_EXPORTS</Elem>
357 434 </preprocessorList>
358 435 <undefinedList>
359 436 <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
360   - <Elem>__NO_INLINE__</Elem>
361 437 </undefinedList>
362 438 </ccTool>
363 439 </item>
... ... @@ -385,64 +461,44 @@
385 461 </incDir>
386 462 </ccTool>
387 463 </item>
388   - <item path="morfeusz/generator/EncodedGeneratorInterpretation.hpp"
389   - ex="false"
390   - tool="3"
391   - flavor2="0">
392   - </item>
393   - <item path="morfeusz/generator/GeneratorDeserializer.cpp"
394   - ex="false"
395   - tool="1"
396   - flavor2="0">
397   - </item>
398   - <item path="morfeusz/generator/GeneratorDeserializer.hpp"
399   - ex="false"
400   - tool="3"
401   - flavor2="0">
  464 + <item path="morfeusz/morfeusz_analyzer.cpp" ex="false" tool="1" flavor2="4">
  465 + <ccTool>
  466 + <incDir>
  467 + <pElem>build</pElem>
  468 + <pElem>morfeusz</pElem>
  469 + <pElem>build/morfeusz</pElem>
  470 + </incDir>
  471 + </ccTool>
402 472 </item>
403   - <item path="morfeusz/main.cpp" ex="false" tool="1" flavor2="4">
  473 + <item path="morfeusz/morfeusz_generator.cpp" ex="false" tool="1" flavor2="4">
404 474 <ccTool>
405 475 <incDir>
406 476 <pElem>build</pElem>
  477 + <pElem>morfeusz</pElem>
407 478 <pElem>build/morfeusz</pElem>
408 479 </incDir>
409   - <preprocessorList>
410   - <Elem>NDEBUG</Elem>
411   - <Elem>_OPTIMIZE__=1</Elem>
412   - </preprocessorList>
413   - <undefinedList>
414   - <Elem>__NO_INLINE__</Elem>
415   - </undefinedList>
416 480 </ccTool>
417 481 </item>
418 482 <item path="morfeusz/test_recognize_dict.cpp" ex="false" tool="1" flavor2="4">
419 483 <ccTool>
420 484 <incDir>
421 485 <pElem>build</pElem>
  486 + <pElem>morfeusz</pElem>
422 487 <pElem>build/morfeusz</pElem>
423 488 </incDir>
424   - <preprocessorList>
425   - <Elem>NDEBUG</Elem>
426   - <Elem>_OPTIMIZE__=1</Elem>
427   - </preprocessorList>
428   - <undefinedList>
429   - <Elem>__NO_INLINE__</Elem>
430   - </undefinedList>
431 489 </ccTool>
432 490 </item>
433 491 <item path="morfeusz/test_result_equals.cpp" ex="false" tool="1" flavor2="4">
434 492 <ccTool>
435 493 <incDir>
436 494 <pElem>build</pElem>
  495 + <pElem>morfeusz</pElem>
437 496 <pElem>build/morfeusz</pElem>
438 497 </incDir>
439   - <preprocessorList>
440   - <Elem>NDEBUG</Elem>
441   - <Elem>_OPTIMIZE__=1</Elem>
442   - </preprocessorList>
443   - <undefinedList>
444   - <Elem>__NO_INLINE__</Elem>
445   - </undefinedList>
  498 + </ccTool>
  499 + </item>
  500 + <item path="morfeusz/test_synth_dict.cpp" ex="false" tool="1" flavor2="4">
  501 + <ccTool>
446 502 </ccTool>
447 503 </item>
448 504 </conf>
... ...