- generator w zasadzie już działa

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@82 ff4e3ee1-f430-4e82-ade0-24591c43f1fd

- generator w zasadzie już działa
git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@82 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Michał Lenart
1 parent 0133e003
Showing 35 changed files with 723 additions and 357 deletions
CMakeLists.txt
fsabuilder/buildfsa.py
fsabuilder/morfeuszbuilder/fsa/common.py
fsabuilder/morfeuszbuilder/fsa/convertinput.py
fsabuilder/morfeuszbuilder/fsa/encode.py
fsabuilder/morfeuszbuilder/fsa/fsa.py
fsabuilder/morfeuszbuilder/fsa/fsa.pyc
fsabuilder/morfeuszbuilder/fsa/serializer.py
fsabuilder/morfeuszbuilder/fsa/serializer.pyc
morfeusz/CMakeLists.txt
morfeusz/Environment.cpp
morfeusz/Environment.hpp
morfeusz/Generator.cpp
morfeusz/Generator.hpp
morfeusz/GeneratorDeserializer.cpp
morfeusz/GeneratorDeserializer.hpp
morfeusz/InterpretedChunksDecoder.hpp
morfeusz/Morfeusz.cpp
morfeusz/Morfeusz.hpp
morfeusz/Tagset.cpp
@@ -29,6 +29,7 @@ set (PROJECT_VERSION &quot;${Morfeusz_VERSION_MAJOR}.${Morfeusz_VERSION_MINOR}.${Morf
  
 # INPUT_DICTIONARY_CPP
 set (INPUT_DICTIONARY_CPP "${CMAKE_CURRENT_BINARY_DIR}/default_fsa.cpp")
+set (INPUT_SYNTH_DICTIONARY_CPP "${CMAKE_CURRENT_BINARY_DIR}/default_synth_fsa.cpp")
 if ("${INPUT_DICTIONARY}" STREQUAL "")
    if ("${EMPTY_INPUT_DICTIONARY}" STREQUAL "TRUE")
     set (INPUT_DICTIONARY ${PROJECT_SOURCE_DIR}/input/empty.txt)
@@ -52,7 +53,10 @@ endif ()
 ### Compilation and linking flags
  
 if (${CMAKE_SYSTEM_NAME} MATCHES "Linux")
-    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++98 -Wall -pedantic -Wcast-align -Wextra -Wmissing-noreturn -Wconversion -Wcast-qual -Wcast-align -O2")
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++98 -Wall -pedantic -Wcast-align -Wextra -Wmissing-noreturn -Wconversion -Wcast-qual -Wcast-align")
+    if (${CMAKE_BUILD_TYPE} STREQUAL "Release")
+        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
+    endif ()
 elseif (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
     set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x -Wall -O2")
     set (CMAKE_SHARED_LIBRARY_PREFIX "")
@@ -107,7 +111,8 @@ add_subdirectory (fsabuilder)
 ########## add tests ##########
  
 macro (test_build_and_recognize fname method)
-    add_test (TestBuild-${method}-${fname} python fsabuilder/buildfsa.py --analyzer -i testfiles/${fname} -o /tmp/test-${method}-${fname}.fsa --tagset-file=testfiles/polimorf.tagset --output-format=BINARY --serialization-method=${method})
+    add_test (TestBuild-${method}-${fname} python fsabuilder/buildfsa.py --analyzer -i testfiles/${fname} -o /tmp/test-${method}-${fname}.fsa --tagset-file=testfiles/polimorf.tagset --serialization-method=${method})
+    add_test (TestBuild4Synth-${method}-${fname} python fsabuilder/buildfsa.py --generator -i testfiles/${fname} -o /tmp/test-synth-${method}-${fname}.fsa --tagset-file=testfiles/polimorf.tagset --serialization-method=${method})
     add_test (TestRecognize-${method}-${fname} morfeusz/test_recognize_dict /tmp/test-${method}-${fname}.fsa testfiles/${fname})
     # add_test (TestNOTRecognize-${method}-${fname} fsa/test_not_recognize /tmp/test-${method}-${fname}.fsa testfiles/out_of_dict)
     # add_test (TestSpeed-${method}-${fname} fsa/test_speed /tmp/test-${method}-${fname}.fsa testfiles/speed_test_data)
@@ -162,7 +162,7 @@ def buildGeneratorFromPoliMorf(inputFile, tagsetFile):
     encoder = encode.Encoder4Generator()
     tagset = common.Tagset(tagsetFile)
     fsa = FSA(encoder, tagset)
-    inputData = _readPolimorfInput4Analyzer(inputFile, tagset, encoder)
+    inputData = _readPolimorfInput4Generator(inputFile, tagset, encoder)
     for word, data in inputData:
         fsa.addEntry(word, data)
     fsa.close()
@@ -192,7 +192,7 @@ def main(opts):
                   }[opts.serializationMethod](fsa)
  
     if opts.cpp:
-        serializer.serialize2CppFile(opts.outputFile)
+        serializer.serialize2CppFile(opts.outputFile, generator=opts.generator)
     else:
         serializer.serialize2BinaryFile(opts.outputFile)
 #     {
@@ -54,7 +54,6 @@ class Interpretation4Generator(object):
         self.orth = EncodedForm(base, orth)
         self.tagnum = tagnum
         self.namenum = namenum
-        logging.warn(self)
  
     def getSortKey(self):
         return (
@@ -74,7 +73,10 @@ class Interpretation4Generator(object):
         return hash(self.getSortKey())
  
     def __unicode__(self):
-        return u'%s %d %s %d %d' % (self.lemma, self.orth.cutLength, self.orth.suffixToAdd, self.tagnum, self.namenum)
+        return u'%s,(%d %s),%d,%d' % (self.lemma, self.orth.cutLength, self.orth.suffixToAdd, self.tagnum, self.namenum)
+    
+    def __repr__(self):
+        return unicode(self)
  
 class Tagset(object):
  
@@ -86,8 +88,8 @@ class Tagset(object):
         self.tag2tagnum = {}
         self.name2namenum = {}
         self._doInit(filename, encoding)
-        print self.tag2tagnum
-        print self.name2namenum
+#         print self.tag2tagnum
+#         print self.name2namenum
  
     def _doInit(self, filename, encoding):
         addingTo = None
@@ -480,10 +480,9 @@ class PolimorfConverter4Generator(object):
             line = line.decode(self.inputEncoding).strip(u'\n')
             if line:
     #             print line
-                orth, base, tagnum, namenum, typenum = line.split(u' ')
+                orth, base, tagnum, namenum = line.split(u' ')
                 tagnum = int(tagnum)
                 namenum = int(namenum)
-                typenum = int(typenum)
                 yield (base, Interpretation4Generator(orth, base, tagnum, namenum))
  
     def convert(self, inputLines):
@@ -29,9 +29,6 @@ class Encoder(object):
  
     def decodeData(self, rawData):
         return NotImplementedError()
-#         print unicode(str(rawData), self.encoding)[:-1]
-#         print unicode(str(rawData), self.encoding)[:-1].split(u'|')
-#         return unicode(str(rawData), self.encoding)[:-1].split(u'|')
  
     def decodeWord(self, rawWord):
         return unicode(str(rawWord).strip('\x00'), self.encoding)
@@ -49,7 +46,8 @@ class Encoder(object):
         res.append(form.cutLength)
         res.extend(self.encodeWord(form.suffixToAdd, lowercase=False))
         res.append(0)
-        res.extend(self._encodeCasePattern(form.casePattern))
+        if withCasePattern:
+            res.extend(self._encodeCasePattern(form.casePattern))
         return res
  
     def _encodeCasePattern(self, casePattern):
@@ -96,17 +94,6 @@ class Encoder(object):
         assert namenum < 256 and namenum >= 0
         return bytearray([namenum])
  
-# class SimpleEncoder(Encoder):
-#     
-#     def __init__(self, encoding='utf8'):
-#         super(SimpleEncoder, self).__init__(encoding)
-#     
-#     def encodeData(self, data):
-#         return bytearray(data, encoding=self.encoding) + bytearray([0])
-#     
-#     def decodeData(self, rawData):
-#         return unicode(str(rawData)[:-1], self.encoding)
-
 class MorphEncoder(Encoder):
  
     def __init__(self, encoding='utf8'):
@@ -133,11 +120,10 @@ class MorphEncoder(Encoder):
 class Encoder4Generator(Encoder):
  
     def __init__(self, encoding='utf8'):
-        super(MorphEncoder, self).__init__(encoding)
+        super(Encoder4Generator, self).__init__(encoding)
  
     def encodeData(self, interpsList):
         res = bytearray()
-#         print interpsList
         firstByte = len(interpsList)
         assert firstByte < 256
         assert firstByte > 0
@@ -148,3 +134,6 @@ class Encoder4Generator(Encoder):
             res.extend(self._encodeTagNum(interp.tagnum))
             res.extend(self._encodeNameNum(interp.namenum))
         return res
+#     
+#     def decodeData(self, data):
+#         
@@ -54,30 +54,6 @@ class FSA(object):
         self.encodedPrevWord = None
         self.closed = True
  
-#     def feed(self, input):
-#         
-# #         allWords = []
-#         for n, (word, data) in enumerate(input, start=1):
-#             assert data is not None
-#             encodedWord = self.encodeWord(word)
-#             assert encodedWord > self.encodedPrevWord
-#             if encodedWord > self.encodedPrevWord:
-#                 self._addSorted(encodedWord, self.encodeData(data))
-#                 self.encodedPrevWord = encodedWord
-# #                 assert self.tryToRecognize(word) == data
-#                 if n % 10000 == 0:
-#                     logging.info(word)
-#                     logging.info(str(self.register.getStatesNum()))
-#     #             allWords.append(word)
-#                 for label in encodedWord:
-#                     self.label2Freq[label] = self.label2Freq.get(label, 0) + 1
-#         
-#         self.initialState = self._replaceOrRegister(self.initialState, self.encodeWord(word))
-#         self.encodedPrevWord = None
-        
-#         for w in allWords:
-#             self.tryToRecognize(w, True)
-    
     def train(self, trainData):
         self.label2Freq = {}
         for idx, word in enumerate(trainData):
@@ -22,14 +22,17 @@ class Serializer(object):
     def getVersion(self):
         return 9
  
-    def serialize2CppFile(self, fname):
+    def serialize2CppFile(self, fname, generator):
         res = []
 #         self.fsa.calculateOffsets(sizeCounter=lambda state: self.getStateSize(state))
         res.append('\n')
         res.append('#include "%s"' % self.headerFilename)
         res.append('\n')
         res.append('\n')
-        res.append('extern const unsigned char DEFAULT_FSA[] = {')
+        if generator:
+            res.append('extern const unsigned char DEFAULT_SYNTH_FSA[] = {')
+        else:
+            res.append('extern const unsigned char DEFAULT_FSA[] = {')
         res.append('\n')
         for byte in self.fsa2bytearray():
             res.append(hex(byte));
@@ -2,7 +2,13 @@
 ########## generate default dictionary data #################
 add_custom_command (
         OUTPUT "${INPUT_DICTIONARY_CPP}"
-        COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/buildfsa.py --analyzer -i "${INPUT_DICTIONARY}" -o "${INPUT_DICTIONARY_CPP}" "--tagset-file=${INPUT_TAGSET}" --output-format=CPP --serialization-method=SIMPLE
+        COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/buildfsa.py --analyzer -i "${INPUT_DICTIONARY}" -o "${INPUT_DICTIONARY_CPP}" "--tagset-file=${INPUT_TAGSET}" --cpp --serialization-method=SIMPLE
+        DEPENDS "${INPUT_DICTIONARY}"
+        COMMENT "Building default dictionary C++ file"
+)
+add_custom_command (
+        OUTPUT "${INPUT_SYNTH_DICTIONARY_CPP}"
+        COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/buildfsa.py --generator -i "${INPUT_DICTIONARY}" -o "${INPUT_SYNTH_DICTIONARY_CPP}" "--tagset-file=${INPUT_TAGSET}" --cpp --serialization-method=SIMPLE
         DEPENDS "${INPUT_DICTIONARY}"
         COMMENT "Building default dictionary C++ file"
 )
@@ -14,10 +20,13 @@ include_directories( ${CMAKE_CURRENT_SOURCE_DIR} )
 #### build #####
  
 set(SRC_FILES 
-    const.cpp 
+    const.cpp
     ${INPUT_DICTIONARY_CPP}
+    ${INPUT_SYNTH_DICTIONARY_CPP}
+    Environment.cpp
     MorphDeserializer.cpp
     GeneratorDeserializer.cpp
+    Generator.cpp
     Tagset.cpp 
     fsa/const.cpp 
     MorphInterpretation.cpp 
@@ -31,7 +40,9 @@ set(SRC_FILES
 set(INCLUDE_FILES 
     const.hpp 
     data/default_fsa.hpp
-    MorphDeserializer.hpp 
+    MorphDeserializer.hpp
+    GeneratorDeserializer.hpp
+    Generator.hpp
     Tagset.hpp 
     fsa/const.hpp 
     MorphInterpretation.hpp 
@@ -47,11 +58,13 @@ set_source_files_properties ( SOURCE &quot;${INPUT_DICTIONARY_CPP}&quot; PROPERTIES GENERA
 # add_dependencies (libmorfeusz dupa)
 set_target_properties (libmorfeusz PROPERTIES OUTPUT_NAME "morfeusz")
  
-add_executable (morfeusz main.cpp)
+add_executable (morfeusz_analyzer morfeusz_analyzer.cpp)
+add_executable (morfeusz_generator morfeusz_generator.cpp)
 add_executable (test_result_equals test_result_equals.cpp)
 add_executable (test_recognize_dict test_recognize_dict.cpp)
  
-target_link_libraries (morfeusz libmorfeusz)
+target_link_libraries (morfeusz_analyzer libmorfeusz)
+target_link_libraries (morfeusz_generator libmorfeusz)
 target_link_libraries (test_result_equals libmorfeusz)
 target_link_libraries (test_recognize_dict libmorfeusz)
  
@@ -67,4 +80,4 @@ add_subdirectory (python)
  
 install (FILES ${INCLUDE_FILES} DESTINATION include/morfeusz)
 install (TARGETS libmorfeusz DESTINATION ${TARGET_LIB_DIR})
-install (TARGETS morfeusz DESTINATION bin)
+install (TARGETS morfeusz_analyzer morfeusz_generator DESTINATION bin)
+/* 
+ * File:   Environment.cpp
+ * Author: mlenart
+ * 
+ * Created on 22 styczeń 2014, 12:08
+ */
+
+#include "Environment.hpp"
+#include "exceptions.hpp"
+
+Environment::Environment(
+        const Tagset& analyzerTagset,
+        const Tagset& generatorTagset,
+        MorfeuszCharset charset)
+: currentCharsetConverter(getCharsetConverter(charset)),
+        analyzerTagset(analyzerTagset),
+generatorTagset(generatorTagset) {
+
+}
+
+const CharsetConverter* Environment::getCharsetConverter(MorfeuszCharset charset) const {
+    switch (charset) {
+        case UTF8:
+            return &this->utf8CharsetConverter;
+        case ISO8859_2:
+            return &this->isoCharsetConverter;
+        case CP1250:
+            return &this->cp1250CharsetConverter;
+        case CP852:
+            return &this->cp852CharsetConverter;
+        default:
+            throw MorfeuszException("invalid charset");
+    }
+}
+
+Environment::~Environment() {
+}
+
+void Environment::setCharset(MorfeuszCharset charset) {
+    this->currentCharsetConverter = this->getCharsetConverter(charset);
+}
+
+const CharsetConverter& Environment::getCharsetConverter() const {
+    return *this->currentCharsetConverter;
+}
+
+void Environment::setAnalyzerTagset(const Tagset& tagset) {
+    this->analyzerTagset = tagset;
+}
+
+const Tagset& Environment::getAnalyzerTagset() const {
+    return this->analyzerTagset;
+}
+
+void Environment::setGeneratorTagset(const Tagset& tagset) {
+    this->generatorTagset = tagset;
+}
+
+const Tagset& Environment::getGeneratorTagset() const {
+    return this->generatorTagset;
+}
+
+const CaseConverter& Environment::getCaseConverter() const {
+    return this->caseConverter;
+}
+/* 
+ * File:   Environment.hpp
+ * Author: mlenart
+ *
+ * Created on 22 styczeń 2014, 12:08
+ */
+
+#ifndef ENVIRONMENT_HPP
+#define	ENVIRONMENT_HPP
+
+#include "charset/CaseConverter.hpp"
+#include "charset/CharsetConverter.hpp"
+#include "const.hpp"
+#include "Tagset.hpp"
+
+
+class Environment {
+public:
+    Environment(
+            const Tagset& analyzerTagset, 
+            const Tagset& generatorTagset,
+            MorfeuszCharset charset);
+    void setCharset(MorfeuszCharset charset);
+    const CharsetConverter& getCharsetConverter() const;
+    
+    void setAnalyzerTagset(const Tagset& tagset);
+    const Tagset& getAnalyzerTagset() const;
+    
+    void setGeneratorTagset(const Tagset& tagset);
+    const Tagset& getGeneratorTagset() const;
+    
+    const CaseConverter& getCaseConverter() const;
+    
+    virtual ~Environment();
+private:
+    const CharsetConverter* currentCharsetConverter;
+    const UTF8CharsetConverter utf8CharsetConverter;
+    const ISO8859_2_CharsetConverter isoCharsetConverter;
+    const Windows_1250_CharsetConverter cp1250CharsetConverter;
+    const CP852_CharsetConverter cp852CharsetConverter;
+    Tagset analyzerTagset;
+    Tagset generatorTagset;
+    const CaseConverter caseConverter;
+    
+    const CharsetConverter* getCharsetConverter(MorfeuszCharset charset) const;
+};
+
+#endif	/* ENVIRONMENT_HPP */
+
+/* 
+ * File:   Generator.cpp
+ * Author: mlenart
+ * 
+ * Created on 21 styczeń 2014, 14:38
+ */
+
+#include <string>
+#include <iostream>
+#include "charset/charset_utils.hpp"
+#include "MorphInterpretation.hpp"
+#include "Generator.hpp"
+#include "Environment.hpp"
+
+
+using namespace std;
+
+Generator::Generator(
+        const unsigned char* ptr,
+        const Environment& env)
+: deserializer(env),
+fsa(SynthFSAType::getFSA(ptr, deserializer)),
+env(env) {
+}
+
+Generator::~Generator() {
+}
+
+std::string Generator::decodeOrth(
+        const EncodedOrth& orth,
+        const std::vector<uint32_t>& lemma) const {
+    string res;
+    for (unsigned int i = 0; i < lemma.size() - orth.suffixToCut; i++) {
+        uint32_t cp = lemma[i];
+        env.getCharsetConverter().append(cp, res);
+    }
+    const char* suffixPtr = orth.suffixToAdd.c_str();
+    const char* suffixEnd = suffixPtr + orth.suffixToAdd.length();
+    while (suffixPtr != suffixEnd) {
+        uint32_t cp = UTF8CharsetConverter().next(suffixPtr, suffixEnd);
+        env.getCharsetConverter().append(cp, res);
+    }
+    return res;
+}
+
+void Generator::decodeRes(
+        const std::vector<EncodedGeneratorInterpretation>& encodedRes,
+        const std::string& lemma,
+        const std::vector<uint32_t>& lemmaCodepoints,
+        std::vector<MorphInterpretation>& result) const {
+
+    for (unsigned int i = 0; i < encodedRes.size(); i++) {
+        EncodedGeneratorInterpretation egi = encodedRes[i];
+        string decodedOrth = this->decodeOrth(egi.orth, lemmaCodepoints);
+        MorphInterpretation mi(
+                0, 0,
+                decodedOrth, lemma,
+                egi.tag,
+                egi.nameClassifier,
+                env.getAnalyzerTagset(),
+                env.getCharsetConverter());
+        result.push_back(mi);
+    }
+}
+
+void Generator::generate(const string& lemma, vector<MorphInterpretation>& result) const {
+    const char* currInput = lemma.c_str();
+    const char* inputEnd = currInput + lemma.length();
+    vector<uint32_t> codepoints;
+    SynthStateType state = this->fsa->getInitialState();
+    while (currInput != inputEnd && !state.isSink()) {
+        uint32_t codepoint = this->env.getCharsetConverter().next(currInput, inputEnd);
+        feedState(state, codepoint, this->env.getCharsetConverter());
+        codepoints.push_back(codepoint);
+    }
+    if (state.isAccepting()) {
+        vector<EncodedGeneratorInterpretation> encodedRes = state.getValue();
+        decodeRes(encodedRes, lemma, codepoints, result);
+    }
+}
+/* 
+ * File:   Generator.hpp
+ * Author: mlenart
+ *
+ * Created on 21 styczeń 2014, 14:38
+ */
+
+#ifndef GENERATOR_HPP
+#define	GENERATOR_HPP
+
+#include <string>
+#include <vector>
+#include "charset/CharsetConverter.hpp"
+#include "MorphInterpretation.hpp"
+#include "Tagset.hpp"
+#include "GeneratorDeserializer.hpp"
+
+typedef FSA< std::vector<EncodedGeneratorInterpretation > > SynthFSAType;
+typedef State< std::vector<EncodedGeneratorInterpretation > > SynthStateType;
+
+class Generator {
+public:
+    Generator(
+            const unsigned char* ptr, 
+            const Environment& env);
+    void generate(const std::string& lemma, std::vector<MorphInterpretation>& result) const;
+    virtual ~Generator();
+private:
+//    Generator(const SynthDeserializer& deserializer);
+    GeneratorDeserializer deserializer;
+    const SynthFSAType* fsa;
+    const Environment& env;
+    
+    std::string decodeOrth(
+        const EncodedOrth& orth, 
+        const std::vector<uint32_t>& lemmaCodepoints) const;
+    
+    void decodeRes(
+        const std::vector<EncodedGeneratorInterpretation>& encodedRes, 
+        const std::string& lemma, 
+        const std::vector<uint32_t>& lemmaCodepoints,
+        std::vector<MorphInterpretation>& result) const;
+};
+
+#endif	/* GENERATOR_HPP */
+
@@ -6,24 +6,47 @@
  */
  
 #include "GeneratorDeserializer.hpp"
+#include "EncodedGeneratorInterpretation.hpp"
  
 using namespace std;
  
-GeneratorDeserializer::GeneratorDeserializer(const string& lemma)
-: lemma(&lemma) {
-    
+GeneratorDeserializer::GeneratorDeserializer(const Environment& env)
+: env(env) {
+
+}
+
+void GeneratorDeserializer::deserializeOrth(const unsigned char*& ptr, EncodedOrth& orth) const {
+       // XXX uważać na poprawność danych
+    orth.suffixToCut = *ptr;
+    ptr++;
+    orth.suffixToAdd = (const char*) ptr;
+    ptr += strlen((const char*) ptr) + 1;
 }
  
-void GeneratorDeserializer::setCurrentLemma(const string& lemma) {
-    this->lemma = &lemma;
+void GeneratorDeserializer::deserializeInterp(const unsigned char*& ptr, EncodedGeneratorInterpretation& interp) const {
+    deserializeOrth(ptr, interp.orth);
+    interp.tag = ntohs(*(reinterpret_cast<const uint16_t*> (ptr)));
+    ptr += 2;
+    interp.nameClassifier = *ptr;
+    ptr++;
 }
  
 long GeneratorDeserializer::deserialize(
         const unsigned char* ptr,
-        std::vector<MorphInterpretation>& interps) const {
-    
+        std::vector<EncodedGeneratorInterpretation>& interps) const {
+    const unsigned char* currPtr = ptr;
+    uint8_t interpsNum = *ptr;
+    interps.clear();
+    interps.reserve(interpsNum);
+    currPtr++;
+    for (unsigned int i = 0; i < interpsNum; ++i) {
+        EncodedGeneratorInterpretation interp;
+        this->deserializeInterp(currPtr, interp);
+        interps.push_back(interp);
+    }
+    return currPtr - ptr;
 }
  
 GeneratorDeserializer::~GeneratorDeserializer() {
-    
+
 }
@@ -5,25 +5,29 @@
  * Created on 20 styczeń 2014, 17:14
  */
  
-#ifndef GENERATORDESERIALIZER_HPP
-#define	GENERATORDESERIALIZER_HPP
+#ifndef SYNTHDESERIALIZER_HPP
+#define	SYNTHDESERIALIZER_HPP
  
 #include <string>
 #include <vector>
 #include "fsa/fsa.hpp"
-#include "MorphInterpretation.hpp"
+#include "Tagset.hpp"
+#include "EncodedGeneratorInterpretation.hpp"
+#include "Environment.hpp"
  
-class GeneratorDeserializer: public Deserializer< std::vector<MorphInterpretation> > {
+class GeneratorDeserializer: public Deserializer< std::vector<EncodedGeneratorInterpretation> > {
 public:
-    GeneratorDeserializer(const std::string& lemma);
-    void setCurrentLemma(const std::string& lemma);
+    explicit GeneratorDeserializer(const Environment& env);
     long deserialize(
         const unsigned char* ptr, 
-        std::vector<MorphInterpretation>& interps) const;
+        std::vector<EncodedGeneratorInterpretation>& interps) const;
     virtual ~GeneratorDeserializer();
 private:
-    const std::string* lemma;
+    const Environment& env;
+    
+    void deserializeInterp(const unsigned char*& ptr, EncodedGeneratorInterpretation& interp) const;
+    void deserializeOrth(const unsigned char*& ptr, EncodedOrth& orth) const;
 };
  
-#endif	/* GENERATORDESERIALIZER_HPP */
+#endif	/* SYNTHDESERIALIZER_HPP */
  
@@ -13,18 +13,13 @@
 #include "InterpretedChunk.hpp"
 #include "EncodedInterpretation.hpp"
 #include "charset/CaseConverter.hpp"
+#include "Environment.hpp"
  
 class InterpretedChunksDecoder {
 public:
  
-    InterpretedChunksDecoder(
-            const Tagset& tagset,
-            const CharsetConverter& charsetConverter,
-            const CaseConverter& caseConverter)
-    : tagset(tagset),
-    charsetConverter(charsetConverter),
-    utf8CharsetConverter(),
-    caseConverter(caseConverter) {
+    InterpretedChunksDecoder(const Environment& env)
+    : env(env) {
  
     }
  
@@ -34,7 +29,7 @@ public:
             unsigned int endNode,
             const InterpretedChunk& interpretedChunk,
             OutputIterator out) {
-        string orth = charsetConverter.toString(interpretedChunk.originalCodepoints);
+        string orth = env.getCharsetConverter().toString(interpretedChunk.originalCodepoints);
         for (unsigned int i = 0; i < interpretedChunk.interpsGroup.interps.size(); i++) {
             const EncodedInterpretation& ei = interpretedChunk.interpsGroup.interps[i];
             string lemma = convertLemma(
@@ -45,8 +40,8 @@ public:
                     orth, lemma,
                     ei.tag,
                     ei.nameClassifier,
-                    tagset,
-                    charsetConverter);
+                    env.getAnalyzerTagset(),
+                    env.getCharsetConverter());
             ++out;
         }
         return out;
@@ -61,28 +56,20 @@ private:
         for (unsigned int i = 0; i < orth.size() - lemma.suffixToCut; i++) {
             uint32_t cp = 
                     (i < lemma.casePattern.size() && lemma.casePattern[i])
-                    ? this->caseConverter.toTitle(orth[i])
+                    ? env.getCaseConverter().toTitle(orth[i])
                     : orth[i];
-            charsetConverter.append(cp, res);
+            env.getCharsetConverter().append(cp, res);
         }
         const char* suffixPtr = lemma.suffixToAdd.c_str();
         const char* suffixEnd = suffixPtr + lemma.suffixToAdd.length();
         while (suffixPtr != suffixEnd) {
-            uint32_t cp = utf8CharsetConverter.next(suffixPtr, suffixEnd);
-            charsetConverter.append(cp, res);
+            uint32_t cp = UTF8CharsetConverter().next(suffixPtr, suffixEnd);
+            env.getCharsetConverter().append(cp, res);
         }
-        //        string res(orth);
-        //        res.erase(
-        //                res.end() - lemma.suffixToCut,
-        //                res.end());
-        //        res.append(lemma.suffixToAdd);
         return res;
     }
  
-    const Tagset& tagset;
-    const CharsetConverter& charsetConverter;
-    const UTF8CharsetConverter utf8CharsetConverter;
-    const CaseConverter& caseConverter;
+    const Environment& env;
 };
  
 #endif	/* INTERPSGROUPDECODER_HPP */
@@ -12,6 +12,7 @@
 #include "data/default_fsa.hpp"
 #include "Morfeusz.hpp"
 #include "MorphDeserializer.hpp"
+#include "GeneratorDeserializer.hpp"
 #include "InterpretedChunksDecoder.hpp"
 #include "charset/CharsetConverter.hpp"
 #include "charset/charset_utils.hpp"
@@ -22,56 +23,61 @@
  
 using namespace std;
  
-static Deserializer<vector<InterpsGroup> >* initializeDeserializer() {
+static Deserializer<vector<InterpsGroup> >* initializeAnalyzerDeserializer() {
     static Deserializer < vector < InterpsGroup > > *deserializer
             = new MorphDeserializer();
     return deserializer;
 }
  
-static FSA<vector<InterpsGroup > > *initializeFSA(const string& filename) {
+static FSA<vector<InterpsGroup > > *initializeAnalyzerFSA(const string& filename) {
     cerr << "initialize FSA" << endl;
-    return FSA < vector < InterpsGroup > > ::getFSA(filename, *initializeDeserializer());
-}
-
-static CharsetConverter* getCharsetConverter(MorfeuszCharset charset) {
-    cerr << "initialize charset converter for " << charset << endl;
-    static CharsetConverter* utf8Converter = new UTF8CharsetConverter();
-//    static CharsetConverter* utf16LEConverter = new UTF16CharsetConverter(UTF16CharsetConverter::UTF16CharsetConverter::LE);
-//    static CharsetConverter* utf16BEConverter = new UTF16CharsetConverter(UTF16CharsetConverter::Endianness::BE);
-    static CharsetConverter* iso8859_2Converter = new ISO8859_2_CharsetConverter();
-    static CharsetConverter* windows1250Converter = new Windows_1250_CharsetConverter();
-    static CharsetConverter* cp852Converter = new CP852_CharsetConverter();
-    switch (charset) {
-        case UTF8:
-            return utf8Converter;
-        case ISO8859_2:
-            return iso8859_2Converter;
-        case CP1250:
-            return windows1250Converter;
-        case CP852:
-            return cp852Converter;
-        default:
-            throw MorfeuszException("invalid charset");
-    }
-}
-
-static Tagset* initializeTagset(const string& filename) {
-    cerr << "initialize tagset" << endl;
-    static Tagset* tagset = new Tagset(readFile<unsigned char>(filename.c_str()));
-    return tagset;
-}
-
-static Tagset* initializeTagset(const unsigned char* data) {
-    cerr << "initialize tagset" << endl;
-    static Tagset* tagset = new Tagset(data);
-    return tagset;
-}
-
-static CaseConverter* initializeCaseConverter() {
-    cerr << "initialize case converter" << endl;
-    static CaseConverter* cc = new CaseConverter();
-    return cc;
-}
+    return FSA < vector < InterpsGroup > > ::getFSA(filename, *initializeAnalyzerDeserializer());
+}
+
+//static FSA<vector<MorphInterpretation > > *initializeSynthFSA(const string& filename, const SynthDeserializer& deserializer) {
+//    cerr << "initialize synth FSA" << endl;
+//    return FSA < vector < EncodedGeneratorInterpretation > > ::getFSA(filename, deserializer);
+//}
+//
+//static CharsetConverter* getCharsetConverter(MorfeuszCharset charset) {
+//    cerr << "initialize charset converter for " << charset << endl;
+//    static CharsetConverter* utf8Converter = new UTF8CharsetConverter();
+////    static CharsetConverter* utf16LEConverter = new UTF16CharsetConverter(UTF16CharsetConverter::UTF16CharsetConverter::LE);
+////    static CharsetConverter* utf16BEConverter = new UTF16CharsetConverter(UTF16CharsetConverter::Endianness::BE);
+//    static CharsetConverter* iso8859_2Converter = new ISO8859_2_CharsetConverter();
+//    static CharsetConverter* windows1250Converter = new Windows_1250_CharsetConverter();
+//    static CharsetConverter* cp852Converter = new CP852_CharsetConverter();
+//    switch (charset) {
+//        case UTF8:
+//            return utf8Converter;
+//        case ISO8859_2:
+//            return iso8859_2Converter;
+//        case CP1250:
+//            return windows1250Converter;
+//        case CP852:
+//            return cp852Converter;
+//        default:
+//            throw MorfeuszException("invalid charset");
+//    }
+//}
+//
+//static Tagset* initializeTagset(const string& filename) {
+//    cerr << "initialize tagset" << endl;
+//    static Tagset* tagset = new Tagset(readFile<unsigned char>(filename.c_str()));
+//    return tagset;
+//}
+//
+//static Tagset* initializeTagset(const unsigned char* data) {
+//    cerr << "initialize tagset" << endl;
+//    static Tagset* tagset = new Tagset(data);
+//    return tagset;
+//}
+//
+//static CaseConverter* initializeCaseConverter() {
+//    cerr << "initialize case converter" << endl;
+//    static CaseConverter* cc = new CaseConverter();
+//    return cc;
+//}
  
 static MorfeuszOptions createDefaultOptions() {
     MorfeuszOptions res;
@@ -81,44 +87,44 @@ static MorfeuszOptions createDefaultOptions() {
 }
  
 Morfeusz::Morfeusz()
-: fsa(FSAType::getFSA(DEFAULT_FSA, *initializeDeserializer())),
-charsetConverter(getCharsetConverter(DEFAULT_MORFEUSZ_CHARSET)),
-tagset(initializeTagset(DEFAULT_FSA)),
-caseConverter(initializeCaseConverter()),
+: env(Tagset(DEFAULT_FSA), Tagset(DEFAULT_SYNTH_FSA), DEFAULT_MORFEUSZ_CHARSET),
+analyzerFSA(FSAType::getFSA(DEFAULT_FSA, *initializeAnalyzerDeserializer())),
+isAnalyzerFSAFromFile(false),
+generator(DEFAULT_SYNTH_FSA, env),
 options(createDefaultOptions()) {
  
 }
  
-Morfeusz::Morfeusz(const string& filename)
-: fsa(initializeFSA(filename)),
-charsetConverter(getCharsetConverter(DEFAULT_MORFEUSZ_CHARSET)),
-tagset(initializeTagset(filename)),
-caseConverter(initializeCaseConverter()),
-options(createDefaultOptions()) {
-
+void Morfeusz::setAnalyzerFile(const string& filename) {
+    if (this->isAnalyzerFSAFromFile) {
+        delete this->analyzerFSA;
+    }
+    this->analyzerFSA = initializeAnalyzerFSA(filename);
+    this->isAnalyzerFSAFromFile = true;
 }
  
 Morfeusz::~Morfeusz() {
-    //    delete &this->fsa;
-    //    delete &this->charsetConverter;
+    if (this->isAnalyzerFSAFromFile) {
+        delete this->analyzerFSA;
+    }
 }
  
-void Morfeusz::processOneWord(
+void Morfeusz::analyzeOneWord(
         const char*& inputData,
         const char* inputEnd,
         int startNodeNum,
         std::vector<MorphInterpretation>& results) const {
     while (inputData != inputEnd
-            && isEndOfWord(this->charsetConverter->peek(inputData, inputEnd))) {
-        this->charsetConverter->next(inputData, inputEnd);
+            && isEndOfWord(this->env.getCharsetConverter().peek(inputData, inputEnd))) {
+        this->env.getCharsetConverter().next(inputData, inputEnd);
     }
     const char* wordStart = inputData;
     vector<InterpretedChunk> accum;
     FlexionGraph graph;
     const char* currInput = inputData;
-    doProcessOneWord(currInput, inputEnd, accum, graph);
+    doAnalyzeOneWord(currInput, inputEnd, accum, graph);
     if (!graph.empty()) {
-        InterpretedChunksDecoder interpretedChunksDecoder(*tagset, *charsetConverter, *caseConverter);
+        InterpretedChunksDecoder interpretedChunksDecoder(env);
         int srcNode = startNodeNum;
         for (unsigned int i = 0; i < graph.getTheGraph().size(); i++) {
             vector<FlexionGraph::Edge>& edges = graph.getTheGraph()[i];
@@ -136,25 +142,25 @@ void Morfeusz::processOneWord(
     inputData = currInput;
 }
  
-void Morfeusz::doProcessOneWord(
+void Morfeusz::doAnalyzeOneWord(
         const char*& inputData,
         const char* inputEnd,
         vector<InterpretedChunk>& accum,
         FlexionGraph& graph) const {
     bool endOfWord = inputData == inputEnd;
     const char* currInput = inputData;
-    uint32_t codepoint = endOfWord ? 0 : this->charsetConverter->next(currInput, inputEnd);
+    uint32_t codepoint = endOfWord ? 0 : this->env.getCharsetConverter().next(currInput, inputEnd);
     //    UnicodeChunk uchunk(*(this->charsetConverter), *(this->caseConverter));
     vector<uint32_t> originalCodepoints;
     vector<uint32_t> lowercaseCodepoints;
  
-    StateType state = this->fsa->getInitialState();
+    StateType state = this->analyzerFSA->getInitialState();
  
     while (!isEndOfWord(codepoint)) {
-        uint32_t lowerCP = this->caseConverter->toLower(codepoint);
+        uint32_t lowerCP = this->env.getCaseConverter().toLower(codepoint);
         originalCodepoints.push_back(codepoint);
         lowercaseCodepoints.push_back(lowerCP);
-        this->feedState(state, lowerCP);
+        feedState(state, lowerCP, UTF8CharsetConverter());
         if (state.isAccepting()) {
             vector< InterpsGroup > val(state.getValue());
             for (unsigned int i = 0; i < val.size(); i++) {
@@ -162,13 +168,13 @@ void Morfeusz::doProcessOneWord(
                 InterpretedChunk ic = {inputData, originalCodepoints, lowercaseCodepoints, ig};
                 accum.push_back(ic);
                 const char* newCurrInput = currInput;
-                doProcessOneWord(newCurrInput, inputEnd, accum, graph);
+                doAnalyzeOneWord(newCurrInput, inputEnd, accum, graph);
                 accum.pop_back();
             }
         }
-        codepoint = currInput == inputEnd ? 0 : this->charsetConverter->peek(currInput, inputEnd);
+        codepoint = currInput == inputEnd ? 0 : this->env.getCharsetConverter().peek(currInput, inputEnd);
         if (!isEndOfWord(codepoint)) {
-            this->charsetConverter->next(currInput, inputEnd);
+            this->env.getCharsetConverter().next(currInput, inputEnd);
         }
     }
     if (state.isAccepting()) {
@@ -184,28 +190,20 @@ void Morfeusz::doProcessOneWord(
     inputData = currInput;
 }
  
-void Morfeusz::feedState(
-        StateType& state,
-        int codepoint) const {
-    string chars;
-    this->utf8CharsetConverter.append(codepoint, chars);
-    for (unsigned int i = 0; i < chars.length(); i++) {
-        state.proceedToNext(chars[i]);
-    }
-}
-
 void Morfeusz::appendIgnotiumToResults(
         const string& word,
         int startNodeNum,
         std::vector<MorphInterpretation>& results) const {
-    MorphInterpretation interp = MorphInterpretation::createIgn(startNodeNum, word, *this->tagset, *this->charsetConverter);
+    MorphInterpretation interp = MorphInterpretation::createIgn(startNodeNum, word, env.getAnalyzerTagset(), env.getCharsetConverter());
     results.push_back(interp);
 }
  
 ResultsIterator Morfeusz::analyze(const string& text) const {
     //    const char* textStart = text.c_str();
     //    const char* textEnd = text.c_str() + text.length();
-    return ResultsIterator(text, *this);
+    vector<MorphInterpretation> res;
+    this->analyze(text, res);
+    return ResultsIterator(res);
 }
  
 void Morfeusz::analyze(const string& text, vector<MorphInterpretation>& results) const {
@@ -213,21 +211,28 @@ void Morfeusz::analyze(const string&amp; text, vector&lt;MorphInterpretation&gt;&amp; results)
     const char* inputEnd = input + text.length();
     while (input != inputEnd) {
         int startNode = results.empty() ? 0 : results.back().getEndNode();
-        DEBUG("process " + string(input, inputEnd));
-        this->processOneWord(input, inputEnd, startNode, results);
+        this->analyzeOneWord(input, inputEnd, startNode, results);
     }
 }
  
-void Morfeusz::setEncoding(MorfeuszCharset encoding) {
-    this->options.encoding = encoding;
-    this->charsetConverter = getCharsetConverter(encoding);
+ResultsIterator Morfeusz::generate(const string& text) const {
+    //    const char* textStart = text.c_str();
+    //    const char* textEnd = text.c_str() + text.length();
+    vector<MorphInterpretation> res;
+    this->generate(text, res);
+    return ResultsIterator(res);
+}
+
+void Morfeusz::generate(const string& text, vector<MorphInterpretation>& results) const {
+    this->generator.generate(text, results);
 }
  
-ResultsIterator::ResultsIterator(const string& text, const Morfeusz& morfeusz)
-: rawInput(text.c_str()),
-morfeusz(morfeusz) {
-    vector<MorphInterpretation> res;
-    morfeusz.analyze(text, res);
+void Morfeusz::setCharset(MorfeuszCharset charset) {
+    this->options.encoding = charset;
+    this->env.setCharset(charset);
+}
+
+ResultsIterator::ResultsIterator(vector<MorphInterpretation>& res) {
     resultsBuffer.insert(resultsBuffer.begin(), res.begin(), res.end());
 }
  
@@ -21,71 +21,61 @@
 #include "FlexionGraph.hpp"
 #include "MorfeuszOptions.hpp"
 #include "const.hpp"
+#include "exceptions.hpp"
+#include "Generator.hpp"
+#include "Environment.hpp"
  
 class Morfeusz;
 class ResultsIterator;
  
-typedef FSA<std::vector<InterpsGroup > > FSAType;
-typedef State<std::vector<InterpsGroup > > StateType;
-
-class MorfeuszException : public std::exception {
-public:
-
-    MorfeuszException(const std::string& what) : msg(what.c_str()) {
-    }
-
-    virtual ~MorfeuszException() throw () {
-    }
-
-    virtual const char* what() const throw () {
-        return this->msg.c_str();
-    }
-private:
-    const std::string msg;
-};
+typedef FSA< std::vector<InterpsGroup > > FSAType;
+typedef State< std::vector<InterpsGroup > > StateType;
  
 class Morfeusz {
 public:
     Morfeusz();
-    explicit Morfeusz(const std::string& filename);
+    //    explicit Morfeusz(const std::string& filename);
+    void setAnalyzerFile(const std::string& filename);
+    void setSynthesizerFile(const std::string& filename);
     virtual ~Morfeusz();
     //    Morfeusz(const Morfeusz& orig);
     ResultsIterator analyze(const std::string& text) const;
     void analyze(const std::string& text, std::vector<MorphInterpretation>& result) const;
  
-    void setEncoding(MorfeuszCharset encoding);
+    void generate(const std::string& lemma, std::vector<MorphInterpretation>& result) const;
+    ResultsIterator generate(const std::string& lemma) const;
+
+    void setCharset(MorfeuszCharset encoding);
  
     //    Morfeusz();
     friend class ResultsIterator;
 private:
  
-    void processOneWord(
+    void analyzeOneWord(
             const char*& inputData,
             const char* inputEnd,
             int startNodeNum,
             std::vector<MorphInterpretation>& result) const;
  
-    void doProcessOneWord(
+    void doAnalyzeOneWord(
             const char*& inputData,
             const char* inputEnd,
             std::vector<InterpretedChunk>& accum,
             FlexionGraph& graph) const;
  
-    void feedState(
-            StateType& state,
-            int codepoint) const;
-
     void appendIgnotiumToResults(
             const std::string& word,
             int startNodeNum,
             std::vector<MorphInterpretation>& results) const;
-
-    FSAType* fsa;
-    CharsetConverter* charsetConverter;
-    Tagset* tagset;
-    CaseConverter* caseConverter;
-
-    UTF8CharsetConverter utf8CharsetConverter;
+    Environment env;
+    FSAType* analyzerFSA;
+    bool isAnalyzerFSAFromFile;
+    Generator generator;
+//    const CharsetConverter* charsetConverter;
+//    const Tagset* tagset;
+//    const CaseConverter* caseConverter;
+//
+//    UTF8CharsetConverter utf8CharsetConverter;
  
     MorfeuszOptions options;
 };
@@ -96,9 +86,8 @@ public:
     bool hasNext();
     friend class Morfeusz;
 private:
-    ResultsIterator(const std::string& text, const Morfeusz& morfeusz);
+    ResultsIterator(vector<MorphInterpretation>& res);
     const char* rawInput;
-    const Morfeusz& morfeusz;
     std::list<MorphInterpretation> resultsBuffer;
     int startNode;
 };
@@ -36,6 +36,11 @@ Tagset::Tagset(const unsigned char* fsaData) {
     readTags(currPtr, this->names);
 }
  
+//Tagset::Tagset(const Tagset& tagset)
+//: tags(tagset.tags), names(tagset.names) {
+//    
+//}
+
 const string Tagset::getTag(const int tagNum, const CharsetConverter& charsetConverter) const {
     return charsetConverter.fromUTF8(this->tags.at(tagNum));
 }
@@ -15,6 +15,7 @@
 class Tagset {
 public:
     explicit Tagset(const unsigned char* fsaData);
+//    Tagset(const Tagset& tagset);
     const std::string getTag(const int tagNum, const CharsetConverter& charsetConverter) const;
     const std::string getName(const int nameNum, const CharsetConverter& charsetConverter) const;
 private:
@@ -8,7 +8,9 @@
 #ifndef CHARSET_UTILS_HPP
 #define	CHARSET_UTILS_HPP
  
+#include <string>
 #include <set>
+#include "CharsetConverter.hpp"
  
 static inline std::set<int> initializeWhitespaces() {
     std::set<int> res;
@@ -18,10 +20,22 @@ static inline std::set&lt;int&gt; initializeWhitespaces() {
     return res;
 }
  
-bool isEndOfWord(int codepoint) {
+inline bool isEndOfWord(int codepoint) {
     static std::set<int> whitespaces(initializeWhitespaces());
     return whitespaces.count(codepoint);
 }
  
+template <class StateClass>
+void feedState(
+        StateClass& state,
+        int codepoint,
+        const CharsetConverter& charsetConverter) {
+    std::string chars;
+    charsetConverter.append(codepoint, chars);
+    for (unsigned int i = 0; i < chars.length(); i++) {
+        state.proceedToNext(chars[i]);
+    }
+}
+
 #endif	/* CHARSET_UTILS_HPP */
  
@@ -9,6 +9,7 @@
 #define	DEFAULT_FSA_HPP
  
 extern const unsigned char DEFAULT_FSA[];
+extern const unsigned char DEFAULT_SYNTH_FSA[];
  
 #endif	/* DEFAULT_FSA_HPP */
  
+/* 
+ * File:   exceptions.hpp
+ * Author: mlenart
+ *
+ * Created on 22 styczeń 2014, 13:16
+ */
+
+#ifndef EXCEPTIONS_HPP
+#define	EXCEPTIONS_HPP
+
+class MorfeuszException : public std::exception {
+public:
+
+    MorfeuszException(const std::string& what) : msg(what.c_str()) {
+    }
+
+    virtual ~MorfeuszException() throw () {
+    }
+
+    virtual const char* what() const throw () {
+        return this->msg.c_str();
+    }
+private:
+    const std::string msg;
+};
+
+#endif	/* EXCEPTIONS_HPP */
+
@@ -118,10 +118,10 @@ void CompressedFSA1&lt;T&gt;::doProceedToNextByList(
                 currPtr += *currPtr + 1;
                 break;
             case 2:
-                currPtr += ntohs(*((uint16_t*) currPtr)) + 2;
+                currPtr += ntohs(*((const uint16_t*) currPtr)) + 2;
                 break;
             case 3:
-                currPtr += (((unsigned int) ntohs(*((uint16_t*) currPtr))) << 8) + currPtr[2] + 3;
+                currPtr += (((const unsigned int) ntohs(*((const uint16_t*) currPtr))) << 8) + currPtr[2] + 3;
                 break;
         }
 //                                cerr << "FOUND " << c << " " << currPtr - this->startPtr << endl;
@@ -64,7 +64,7 @@ FSA&lt;T&gt;* FSA&lt;T&gt;::getFSA(const std::string&amp; filename, const Deserializer&lt;T&gt;&amp; deser
 template <class T>
 FSA<T>* FSA<T>::getFSA(const unsigned char* ptr, const Deserializer<T>& deserializer) {
  
-    uint32_t magicNumber = ntohl(*((uint32_t*) ptr));
+    uint32_t magicNumber = ntohl(*((const uint32_t*) ptr));
     if (magicNumber != MAGIC_NUMBER) {
         throw FSAException("Invalid magic number");
     }
@@ -10,6 +10,8 @@
  
 #include <iostream>
  
+#include "fsa.hpp"
+
 //#pragma pack(push, 1)  /* push current alignment to stack */
  
 struct StateData {
@@ -12,6 +12,7 @@
 %{
 #include "Morfeusz.hpp"
 #include "MorphInterpretation.hpp"
+#include "exceptions.hpp"
 #include "const.hpp"
 %}
  
@@ -49,6 +50,7 @@
 %include "Morfeusz.hpp"
 %include "MorphInterpretation.hpp"
 %include "const.hpp"
+%include "exceptions.hpp"
  
 // instantiate vector of interpretations
 namespace std {
@@ -18,10 +18,10 @@ using namespace std;
 int main(int argc, char** argv) {
     Morfeusz morfeusz;
 #ifdef _WIN32
-    morfeusz.setEncoding(CP852);
+    morfeusz.setCharset(CP852);
 #endif
 #ifdef _WIN64
-    morfeusz.setEncoding(CP852);
+    morfeusz.ssetCharsetCP852);
 #endif
     string line;
     while (getline(cin, line)) {
@@ -51,5 +51,3 @@ int main(int argc, char** argv) {
     printf("\n");
     return 0;
 }
-
-
+/* 
+ * File:   morfeusz_generator.cpp
+ * Author: mlenart
+ *
+ * Created on 21 styczeń 2014, 12:02
+ */
+
+#include <cstdlib>
+#include <iostream>
+#include <vector>
+#include "fsa/fsa.hpp"
+#include "Tagset.hpp"
+#include "Morfeusz.hpp"
+#include "const.hpp"
+
+using namespace std;
+
+int main(int argc, char** argv) {
+    Morfeusz morfeusz;
+#ifdef _WIN32
+    morfeusz.setCharset(CP852);
+#endif
+#ifdef _WIN64
+    morfeusz.ssetCharsetCP852);
+#endif
+    string line;
+    while (getline(cin, line)) {
+        //        printf("%s\n", line.c_str());
+        vector<MorphInterpretation> res;
+        morfeusz.generate(line, res);
+        printf("[");
+        for (unsigned int i = 0; i < res.size(); i++) {
+            if (i > 0) {
+                printf("; ");
+            }
+            MorphInterpretation& mi = res[i];
+            printf("%s,%s,%s,%s",
+                    mi.getOrth().c_str(), mi.getLemma().c_str(),
+                    mi.getTag().c_str(), mi.getName().c_str());
+        }
+        printf("]\n");
+    }
+    printf("\n");
+    return 0;
+}
@@ -16,53 +16,12 @@
  
 using namespace std;
  
-//void doTest(
-//        const FSA<vector<InterpsGroup >> &fsa,
-//        const Tagset& tagset,
-//        //        const InterpretationsDecoder<TaggedInterpretation>& interpsConverter, 
-//        const char* fname) {
-//    ifstream ifs;
-//    //    ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
-//    ifs.open(fname, ios::binary);
-//    string line;
-//    while (getline(ifs, line)) {
-//        vector<string> splitVector(split(line, '\t'));
-//        string orth = splitVector[0];
-//        string lemma = splitVector[1];
-//        string tag = splitVector[2];
-//        string name = splitVector[3];
-//        vector<InterpsGroup> value2;
-//        fsa.tryToRecognize(orth.c_str(), value2);
-//        DEBUG("recognized " + to_string(value2.size()));
-//        //        vector<TaggedInterpretation> parsedValues;
-//        bool found = false;
-//
-//        for (InterpsGroup ig : value2)
-//            for (MorphInterpretation interp : ig.getRealInterps(orth, 0, 0, tagset)) {
-//                //            TaggedInterpretation parsedValue = interpsConverter.getInterpretation(key, interp);
-//                //            (0, 0, orth, encodedInterp, tagset);
-//                //            parsedValues.push_back(parsedValue);
-//                //            debug(orth, parsedValue);
-//                if (lemma == interp.getLemma() && tag == interp.getTag() && name == interp.getName()) {
-//                    DEBUG("RECOGNIZED");
-//                    found = true;
-//                }
-//                else {
-//                    DEBUG("not matching " + interp.getLemma() + " " + interp.getTag() + " " + interp.getName());
-//                }
-//            }
-//        validate(found, "Failed to recognize " + orth + " " + lemma + ":" + tag + ":" + name);
-//        //        debug(key, value2);
-//        //        validate(fsa.tryToRecognize(key.c_str(), value2), "Failed to recognize " + key);
-//    }
-//    validate(ifs.eof(), "Failed to read the input file to the end");
-//}
-
 int main(int argc, char** argv) {
     validate(argc == 3, "Must provide exactly 2 arguments - input FSA filename and dictionary filename.");
     string fsaFilename = argv[1];
     string dictFilename = argv[2];
-    Morfeusz morfeusz(fsaFilename);
+    Morfeusz morfeusz;
+    morfeusz.setAnalyzerFile(fsaFilename);
     ifstream in;
     in.open(dictFilename.c_str());
     string line;
@@ -48,7 +48,7 @@ int main(int argc, char** argv) {
     Morfeusz morfeusz;
     if (argc == 4) {
         MorfeuszCharset encoding = getEncoding(argv[3]);
-        morfeusz.setEncoding(encoding);
+        morfeusz.setCharset(encoding);
     }
     string line;
     while (getline(in, line)) {
+/* 
+ * File:   test_synth_dict.cpp
+ * Author: mlenart
+ *
+ * Created on 21 styczeń 2014, 12:00
+ */
+
+#include <cstdlib>
+
+using namespace std;
+
+/*
+ * 
+ */
+int main(int argc, char** argv) {
+
+    return 0;
+}
+
@@ -81,7 +81,8 @@ void appendMorfeuszResults(const std::vector&lt;MorphInterpretation&gt;&amp; res, OutputSt
         if (prevStart != -1
                 && (prevStart != mi.getStartNode() || prevEnd != mi.getEndNode())) {
             out << "]\n[";
-        } else if (prevStart != -1) {
+        }
+        else if (prevStart != -1) {
             out << "; ";
         }
         out << mi.getStartNode() << ","
 <?xml version="1.0" encoding="UTF-8"?>
 <configurationDescriptor version="90">
   <logicalFolder name="root" displayName="root" projectFiles="true" kind="ROOT">
+    <logicalFolder name="build"
+                   displayName="build"
+                   projectFiles="true"
+                   root="build">
+      <itemPath>build/default_fsa.cpp</itemPath>
+      <itemPath>build/default_synth_fsa.cpp</itemPath>
+    </logicalFolder>
     <logicalFolder name="f1" displayName="input" projectFiles="true">
     </logicalFolder>
     <df root="morfeusz" name="0">
@@ -19,21 +26,21 @@
         <in>test_recognize.cpp</in>
         <in>test_speed.cpp</in>
       </df>
-      <df name="generator">
-        <in>EncodedGeneratorInterpretation.hpp</in>
-        <in>GeneratorDeserializer.cpp</in>
-        <in>GeneratorDeserializer.hpp</in>
-      </df>
+      <in>Environment.cpp</in>
       <in>FlexionGraph.cpp</in>
+      <in>Generator.cpp</in>
+      <in>GeneratorDeserializer.cpp</in>
       <in>Morfeusz.cpp</in>
       <in>MorphDeserializer.cpp</in>
       <in>MorphInterpretation.cpp</in>
       <in>Tagset.cpp</in>
-      <in>Toolchain-Linux-amd64.cmake</in>
       <in>const.cpp</in>
-      <in>main.cpp</in>
+      <in>exceptions.hpp</in>
+      <in>morfeusz_analyzer.cpp</in>
+      <in>morfeusz_generator.cpp</in>
       <in>test_recognize_dict.cpp</in>
       <in>test_result_equals.cpp</in>
+      <in>test_synth_dict.cpp</in>
     </df>
     <logicalFolder name="morfeusz"
                    displayName="morfeusz"
@@ -76,9 +83,17 @@
           <buildCommandWorkingDir>build</buildCommandWorkingDir>
           <buildCommand>${MAKE} -f Makefile</buildCommand>
           <cleanCommand>${MAKE} -f Makefile clean</cleanCommand>
-          <executablePath>build/morfeusz/test_result_equals</executablePath>
+          <executablePath>build/morfeusz/morfeusz_generator</executablePath>
         </makeTool>
       </makefileType>
+      <item path="build/default_fsa.cpp" ex="false" tool="1" flavor2="4">
+        <ccTool>
+        </ccTool>
+      </item>
+      <item path="build/default_synth_fsa.cpp" ex="false" tool="1" flavor2="4">
+        <ccTool>
+        </ccTool>
+      </item>
       <item path="build/morfeusz/java/swigJAVA.cpp" ex="false" tool="1" flavor2="4">
         <ccTool>
         </ccTool>
@@ -94,8 +109,13 @@
             <pElem>build/morfeusz/java</pElem>
           </incDir>
           <preprocessorList>
+            <Elem>NDEBUG</Elem>
+            <Elem>_OPTIMIZE__=1</Elem>
             <Elem>jmorfeusz_EXPORTS</Elem>
           </preprocessorList>
+          <undefinedList>
+            <Elem>__NO_INLINE__</Elem>
+          </undefinedList>
         </ccTool>
       </item>
       <item path="build/morfeusz/morfeuszPYTHON_wrap.cxx"
@@ -109,8 +129,13 @@
             <pElem>build/morfeusz/python</pElem>
           </incDir>
           <preprocessorList>
+            <Elem>NDEBUG</Elem>
+            <Elem>_OPTIMIZE__=1</Elem>
             <Elem>_morfeusz_EXPORTS</Elem>
           </preprocessorList>
+          <undefinedList>
+            <Elem>__NO_INLINE__</Elem>
+          </undefinedList>
         </ccTool>
       </item>
       <item path="build/morfeusz/python/swigPYTHON.cpp"
@@ -124,18 +149,16 @@
         <ccTool>
           <incDir>
             <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
             <pElem>build/morfeusz</pElem>
           </incDir>
           <preprocessorList>
-            <Elem>NDEBUG</Elem>
-            <Elem>_OPTIMIZE__=1</Elem>
             <Elem>__PIC__=2</Elem>
             <Elem>__pic__=2</Elem>
             <Elem>libmorfeusz_EXPORTS</Elem>
           </preprocessorList>
           <undefinedList>
             <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
-            <Elem>__NO_INLINE__</Elem>
           </undefinedList>
         </ccTool>
       </folder>
@@ -158,28 +181,42 @@
           </undefinedList>
         </ccTool>
       </folder>
+      <folder path="build">
+        <ccTool>
+          <incDir>
+            <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
+            <pElem>build/morfeusz</pElem>
+          </incDir>
+          <preprocessorList>
+            <Elem>__PIC__=2</Elem>
+            <Elem>__pic__=2</Elem>
+            <Elem>libmorfeusz_EXPORTS</Elem>
+          </preprocessorList>
+          <undefinedList>
+            <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
+          </undefinedList>
+        </ccTool>
+      </folder>
       <folder path="morfeusz">
         <ccTool>
           <incDir>
             <pElem>build</pElem>
           </incDir>
           <preprocessorList>
-            <Elem>NDEBUG</Elem>
-            <Elem>_OPTIMIZE__=1</Elem>
             <Elem>__PIC__=2</Elem>
             <Elem>__pic__=2</Elem>
           </preprocessorList>
           <undefinedList>
             <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
-            <Elem>__NO_INLINE__</Elem>
           </undefinedList>
         </ccTool>
       </folder>
       <folder path="morfeusz/java">
         <ccTool>
           <incDir>
-            <pElem>/usr/lib/jvm/default-java/include</pElem>
             <pElem>morfeusz</pElem>
+            <pElem>/usr/lib/jvm/default-java/include</pElem>
           </incDir>
           <preprocessorList>
             <Elem>jmorfeusz_EXPORTS</Elem>
@@ -193,26 +230,80 @@
             <pElem>morfeusz</pElem>
           </incDir>
           <preprocessorList>
+            <Elem>NDEBUG</Elem>
+            <Elem>_OPTIMIZE__=1</Elem>
             <Elem>pymorfeusz_EXPORTS</Elem>
           </preprocessorList>
+          <undefinedList>
+            <Elem>__NO_INLINE__</Elem>
+          </undefinedList>
         </ccTool>
       </folder>
+      <item path="morfeusz/Environment.cpp" ex="false" tool="1" flavor2="4">
+        <ccTool>
+          <incDir>
+            <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
+            <pElem>build/morfeusz</pElem>
+          </incDir>
+          <preprocessorList>
+            <Elem>__PIC__=2</Elem>
+            <Elem>__pic__=2</Elem>
+            <Elem>libmorfeusz_EXPORTS</Elem>
+          </preprocessorList>
+          <undefinedList>
+            <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
+          </undefinedList>
+        </ccTool>
+      </item>
       <item path="morfeusz/FlexionGraph.cpp" ex="false" tool="1" flavor2="4">
         <ccTool>
           <incDir>
             <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
+            <pElem>build/morfeusz</pElem>
+          </incDir>
+          <preprocessorList>
+            <Elem>__PIC__=2</Elem>
+            <Elem>__pic__=2</Elem>
+            <Elem>libmorfeusz_EXPORTS</Elem>
+          </preprocessorList>
+          <undefinedList>
+            <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
+          </undefinedList>
+        </ccTool>
+      </item>
+      <item path="morfeusz/Generator.cpp" ex="false" tool="1" flavor2="4">
+        <ccTool>
+          <incDir>
+            <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
+            <pElem>build/morfeusz</pElem>
+          </incDir>
+          <preprocessorList>
+            <Elem>__PIC__=2</Elem>
+            <Elem>__pic__=2</Elem>
+            <Elem>libmorfeusz_EXPORTS</Elem>
+          </preprocessorList>
+          <undefinedList>
+            <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
+          </undefinedList>
+        </ccTool>
+      </item>
+      <item path="morfeusz/GeneratorDeserializer.cpp" ex="false" tool="1" flavor2="4">
+        <ccTool>
+          <incDir>
+            <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
             <pElem>build/morfeusz</pElem>
           </incDir>
           <preprocessorList>
-            <Elem>NDEBUG</Elem>
-            <Elem>_OPTIMIZE__=1</Elem>
             <Elem>__PIC__=2</Elem>
             <Elem>__pic__=2</Elem>
             <Elem>libmorfeusz_EXPORTS</Elem>
           </preprocessorList>
           <undefinedList>
             <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
-            <Elem>__NO_INLINE__</Elem>
           </undefinedList>
         </ccTool>
       </item>
@@ -220,18 +311,16 @@
         <ccTool>
           <incDir>
             <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
             <pElem>build/morfeusz</pElem>
           </incDir>
           <preprocessorList>
-            <Elem>NDEBUG</Elem>
-            <Elem>_OPTIMIZE__=1</Elem>
             <Elem>__PIC__=2</Elem>
             <Elem>__pic__=2</Elem>
             <Elem>libmorfeusz_EXPORTS</Elem>
           </preprocessorList>
           <undefinedList>
             <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
-            <Elem>__NO_INLINE__</Elem>
           </undefinedList>
         </ccTool>
       </item>
@@ -239,18 +328,16 @@
         <ccTool>
           <incDir>
             <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
             <pElem>build/morfeusz</pElem>
           </incDir>
           <preprocessorList>
-            <Elem>NDEBUG</Elem>
-            <Elem>_OPTIMIZE__=1</Elem>
             <Elem>__PIC__=2</Elem>
             <Elem>__pic__=2</Elem>
             <Elem>libmorfeusz_EXPORTS</Elem>
           </preprocessorList>
           <undefinedList>
             <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
-            <Elem>__NO_INLINE__</Elem>
           </undefinedList>
         </ccTool>
       </item>
@@ -258,18 +345,16 @@
         <ccTool>
           <incDir>
             <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
             <pElem>build/morfeusz</pElem>
           </incDir>
           <preprocessorList>
-            <Elem>NDEBUG</Elem>
-            <Elem>_OPTIMIZE__=1</Elem>
             <Elem>__PIC__=2</Elem>
             <Elem>__pic__=2</Elem>
             <Elem>libmorfeusz_EXPORTS</Elem>
           </preprocessorList>
           <undefinedList>
             <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
-            <Elem>__NO_INLINE__</Elem>
           </undefinedList>
         </ccTool>
       </item>
@@ -277,26 +362,19 @@
         <ccTool>
           <incDir>
             <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
             <pElem>build/morfeusz</pElem>
           </incDir>
           <preprocessorList>
-            <Elem>NDEBUG</Elem>
-            <Elem>_OPTIMIZE__=1</Elem>
             <Elem>__PIC__=2</Elem>
             <Elem>__pic__=2</Elem>
             <Elem>libmorfeusz_EXPORTS</Elem>
           </preprocessorList>
           <undefinedList>
             <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
-            <Elem>__NO_INLINE__</Elem>
           </undefinedList>
         </ccTool>
       </item>
-      <item path="morfeusz/Toolchain-Linux-amd64.cmake"
-            ex="false"
-            tool="3"
-            flavor2="0">
-      </item>
       <item path="morfeusz/charset/CaseConverter.cpp" ex="false" tool="1" flavor2="4">
         <ccTool>
         </ccTool>
@@ -323,18 +401,16 @@
         <ccTool>
           <incDir>
             <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
             <pElem>build/morfeusz</pElem>
           </incDir>
           <preprocessorList>
-            <Elem>NDEBUG</Elem>
-            <Elem>_OPTIMIZE__=1</Elem>
             <Elem>__PIC__=2</Elem>
             <Elem>__pic__=2</Elem>
             <Elem>libmorfeusz_EXPORTS</Elem>
           </preprocessorList>
           <undefinedList>
             <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
-            <Elem>__NO_INLINE__</Elem>
           </undefinedList>
         </ccTool>
       </item>
@@ -342,22 +418,22 @@
         <ccTool>
         </ccTool>
       </item>
+      <item path="morfeusz/exceptions.hpp" ex="false" tool="3" flavor2="0">
+      </item>
       <item path="morfeusz/fsa/const.cpp" ex="false" tool="1" flavor2="4">
         <ccTool>
           <incDir>
             <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
             <pElem>build/morfeusz</pElem>
           </incDir>
           <preprocessorList>
-            <Elem>NDEBUG</Elem>
-            <Elem>_OPTIMIZE__=1</Elem>
             <Elem>__PIC__=2</Elem>
             <Elem>__pic__=2</Elem>
             <Elem>libmorfeusz_EXPORTS</Elem>
           </preprocessorList>
           <undefinedList>
             <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem>
-            <Elem>__NO_INLINE__</Elem>
           </undefinedList>
         </ccTool>
       </item>
@@ -385,64 +461,44 @@
           </incDir>
         </ccTool>
       </item>
-      <item path="morfeusz/generator/EncodedGeneratorInterpretation.hpp"
-            ex="false"
-            tool="3"
-            flavor2="0">
-      </item>
-      <item path="morfeusz/generator/GeneratorDeserializer.cpp"
-            ex="false"
-            tool="1"
-            flavor2="0">
-      </item>
-      <item path="morfeusz/generator/GeneratorDeserializer.hpp"
-            ex="false"
-            tool="3"
-            flavor2="0">
+      <item path="morfeusz/morfeusz_analyzer.cpp" ex="false" tool="1" flavor2="4">
+        <ccTool>
+          <incDir>
+            <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
+            <pElem>build/morfeusz</pElem>
+          </incDir>
+        </ccTool>
       </item>
-      <item path="morfeusz/main.cpp" ex="false" tool="1" flavor2="4">
+      <item path="morfeusz/morfeusz_generator.cpp" ex="false" tool="1" flavor2="4">
         <ccTool>
           <incDir>
             <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
             <pElem>build/morfeusz</pElem>
           </incDir>
-          <preprocessorList>
-            <Elem>NDEBUG</Elem>
-            <Elem>_OPTIMIZE__=1</Elem>
-          </preprocessorList>
-          <undefinedList>
-            <Elem>__NO_INLINE__</Elem>
-          </undefinedList>
         </ccTool>
       </item>
       <item path="morfeusz/test_recognize_dict.cpp" ex="false" tool="1" flavor2="4">
         <ccTool>
           <incDir>
             <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
             <pElem>build/morfeusz</pElem>
           </incDir>
-          <preprocessorList>
-            <Elem>NDEBUG</Elem>
-            <Elem>_OPTIMIZE__=1</Elem>
-          </preprocessorList>
-          <undefinedList>
-            <Elem>__NO_INLINE__</Elem>
-          </undefinedList>
         </ccTool>
       </item>
       <item path="morfeusz/test_result_equals.cpp" ex="false" tool="1" flavor2="4">
         <ccTool>
           <incDir>
             <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
             <pElem>build/morfeusz</pElem>
           </incDir>
-          <preprocessorList>
-            <Elem>NDEBUG</Elem>
-            <Elem>_OPTIMIZE__=1</Elem>
-          </preprocessorList>
-          <undefinedList>
-            <Elem>__NO_INLINE__</Elem>
-          </undefinedList>
+        </ccTool>
+      </item>
+      <item path="morfeusz/test_synth_dict.cpp" ex="false" tool="1" flavor2="4">
+        <ccTool>
         </ccTool>
       </item>
     </conf>