From 4bf99e6b94697f082572eb35c0dcdd60c3711761 Mon Sep 17 00:00:00 2001
From: Michał Lenart <michall@ipipan.waw.pl>
Date: Tue, 12 Nov 2013 17:53:25 +0000
Subject: [PATCH] - prawie działa rozpoznawanie informacji morfologicznej

---
 fsa/CMakeLists.txt             |  11 +++++------
 fsa/cfsa1_impl.hpp             |   9 +++++----
 fsa/cfsa2_impl.hpp             |   2 +-
 fsa/const.cpp                  |  10 ++++++++++
 fsa/const.hpp                  |  22 ++++++++++++++++++++++
 fsa/fsa.hpp                    |  27 ++++++++-------------------
 fsa/fsa_impl.hpp               |  41 +++++------------------------------------
 fsa/interpretation.hpp         |  29 -----------------------------
 fsa/simplefsa_impl.hpp         |   4 ++--
 fsa/state_impl.hpp             |   4 ++--
 fsa/test_morph.cpp             |  53 -----------------------------------------------------
 fsa/utils.hpp                  |  29 +++++++++++++++--------------
 morfeusz/CMakeLists.txt        |   3 +++
 morfeusz/MorphDeserializer.cpp |  48 ++++++++++++++++++++++++++++++++++++++++++++++++
 morfeusz/MorphDeserializer.hpp |  28 ++++++++++++++++++++++++++++
 morfeusz/Tagset.cpp            |  56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 morfeusz/Tagset.hpp            |  25 +++++++++++++++++++++++++
 morfeusz/interpretations.cpp   |  56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 morfeusz/interpretations.hpp   |  58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 morfeusz/main.cpp              |   1 +
 morfeusz/test_morph.cpp        |  82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 nbproject/configurations.xml   | 130 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
 nbproject/project.xml          |   4 ++--
 23 files changed, 545 insertions(+), 187 deletions(-)
 create mode 100644 fsa/const.cpp
 create mode 100644 fsa/const.hpp
 delete mode 100644 fsa/interpretation.hpp
 delete mode 100644 fsa/test_morph.cpp
 create mode 100644 morfeusz/MorphDeserializer.cpp
 create mode 100644 morfeusz/MorphDeserializer.hpp
 create mode 100644 morfeusz/Tagset.cpp
 create mode 100644 morfeusz/Tagset.hpp
 create mode 100644 morfeusz/interpretations.cpp
 create mode 100644 morfeusz/interpretations.hpp
 create mode 100644 morfeusz/test_morph.cpp

diff --git a/fsa/CMakeLists.txt b/fsa/CMakeLists.txt
index a013849..ace2488 100644
--- a/fsa/CMakeLists.txt
+++ b/fsa/CMakeLists.txt
@@ -1,11 +1,10 @@
 
-add_executable (test_speed test_speed.cpp)
-add_executable (test_speed_profile test_speed.cpp)
-add_executable (test_recognize test_recognize.cpp)
-add_executable (test_not_recognize test_not_recognize.cpp)
-add_executable (test_morph test_morph.cpp)
+add_executable (test_speed test_speed.cpp const.cpp)
+add_executable (test_speed_profile test_speed.cpp const.cpp)
+add_executable (test_recognize test_recognize.cpp const.cpp)
+add_executable (test_not_recognize test_not_recognize.cpp const.cpp)
+
 set_target_properties ( test_speed PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2 -pedantic -Wcast-align -Wextra -Wmissing-noreturn -Wconversion -Wcast-qual -Wcast-align" )
 set_target_properties ( test_speed_profile PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2 -g" )
 set_target_properties ( test_recognize PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" )
 set_target_properties ( test_not_recognize PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" )
-set_target_properties ( test_morph PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" )
diff --git a/fsa/cfsa1_impl.hpp b/fsa/cfsa1_impl.hpp
index e56f56f..3d41ae1 100644
--- a/fsa/cfsa1_impl.hpp
+++ b/fsa/cfsa1_impl.hpp
@@ -79,7 +79,7 @@ void CompressedFSA1<T>::doProceedToNextByList(
     TransitionData2 td;
     for (unsigned int i = 0; i < transitionsNum; i++) {
         //        const_cast<Counter*>(&counter)->increment(1);
-        td = *((TransitionData2*) currPtr);
+        td = *(reinterpret_cast<const TransitionData2*>(currPtr));
         if (td.shortLabel == shortLabel) {
             if (shortLabel == 0) {
                 currPtr++;
@@ -107,7 +107,8 @@ void CompressedFSA1<T>::doProceedToNextByList(
     if (!found) {
 //                                cerr << "SINK for " << c << endl;
         state.setNextAsSink();
-    } else {
+    } 
+    else {
         currPtr++;
 //                                        cerr << "offset size " << td.offsetSize << endl;
 //                            cerr << "offset " << offset << endl;
@@ -152,12 +153,12 @@ void CompressedFSA1<T>::proceedToNext(const char c, State<T>& state) const {
 //                        cerr << "NEXT " << (short) c << " from " << state.getOffset() << endl;
     const unsigned char* fromPointer = this->initialStatePtr + state.getOffset();
     unsigned char shortLabel = this->label2ShortLabel[(const unsigned char) c];
-    unsigned int transitionsTableOffset = 1;
+    unsigned long transitionsTableOffset = 1;
     if (state.isAccepting()) {
         transitionsTableOffset += state.getValueSize();
 //                                cerr << "transitionsTableOffset " << transitionsTableOffset + state.getOffset() << " because value is " << state.getValue() << endl;
     }
-    StateData2* sd = (StateData2*) (fromPointer);
+    const StateData2* sd = reinterpret_cast<const StateData2*>(fromPointer);
 //                cerr << "transitions num=" << sd->transitionsNum << endl;
     if (sd->array) {
         if (shortLabel > 0) {
diff --git a/fsa/cfsa2_impl.hpp b/fsa/cfsa2_impl.hpp
index 45767ac..fe59618 100644
--- a/fsa/cfsa2_impl.hpp
+++ b/fsa/cfsa2_impl.hpp
@@ -135,7 +135,7 @@ void CompressedFSA2<T>::proceedToNext(const char c, State<T>& state) const {
         cerr << "NEXT " << (short) c << " from " << state.getOffset() << endl;
 #endif
     const unsigned char* fromPointer = this->initialStatePtr + state.getOffset();
-    unsigned int transitionsTableOffset = 0;
+    unsigned long transitionsTableOffset = 0;
     if (state.isAccepting()) {
         transitionsTableOffset += state.getValueSize();
     }
diff --git a/fsa/const.cpp b/fsa/const.cpp
new file mode 100644
index 0000000..0bc9c6d
--- /dev/null
+++ b/fsa/const.cpp
@@ -0,0 +1,10 @@
+
+#include "const.hpp"
+
+extern const uint32_t MAGIC_NUMBER = 0x8fc2bc1b;
+extern const uint8_t VERSION_NUM = 9;
+
+extern const unsigned int VERSION_NUM_OFFSET = 4;
+extern const unsigned int IMPLEMENTATION_NUM_OFFSET = 5;
+extern const unsigned int ADDITIONAL_DATA_SIZE_OFFSET = 6;
+extern const unsigned int ADDITIONAL_DATA_OFFSET = 10;
diff --git a/fsa/const.hpp b/fsa/const.hpp
new file mode 100644
index 0000000..c37e921
--- /dev/null
+++ b/fsa/const.hpp
@@ -0,0 +1,22 @@
+/* 
+ * File:   const.hpp
+ * Author: mlenart
+ *
+ * Created on 12 listopad 2013, 14:11
+ */
+
+#ifndef CONST_HPP
+#define	CONST_HPP
+
+#include <netinet/in.h>
+
+extern const uint32_t MAGIC_NUMBER;
+extern const uint8_t VERSION_NUM;
+
+extern const unsigned int VERSION_NUM_OFFSET;
+extern const unsigned int IMPLEMENTATION_NUM_OFFSET;
+extern const unsigned int ADDITIONAL_DATA_SIZE_OFFSET;
+extern const unsigned int ADDITIONAL_DATA_OFFSET;
+
+#endif	/* CONST_HPP */
+
diff --git a/fsa/fsa.hpp b/fsa/fsa.hpp
index 4296d1e..052f46a 100644
--- a/fsa/fsa.hpp
+++ b/fsa/fsa.hpp
@@ -9,13 +9,12 @@
 #define FSA_HPP
 
 //#include <iostream>
-//#include <cstring>
-#include <typeinfo>
+#include <cstring>
 #include <cassert>
+#include <typeinfo>
 #include <exception>
 #include <string>
 #include <vector>
-#include "interpretation.hpp"
 
 template <class T> class State;
 template <class T> class FSA;
@@ -44,16 +43,12 @@ public:
      * Returns number of bytes read or -1 on error.
      */
     long deserialize(const unsigned char* ptr, char*& text) const {
-        //        text = const_cast<char*> (reinterpret_cast<const char*> (ptr));
-        //        return strlen(text) + 1;
-        return 1;
+        text = const_cast<char*> (reinterpret_cast<const char*> (ptr));
+        return strlen(text) + 1;
+//        return 1;
     }
 };
 
-class MorphDeserializer: public Deserializer<std::vector<Interpretation>> {
-    long deserialize(const unsigned char* ptr, std::vector<Interpretation>& interp) const;
-};
-
 class Counter {
 public:
 
@@ -88,8 +83,6 @@ public:
      */
     static FSA<T>* getFSA(const unsigned char* ptr, const Deserializer<T>& deserializer);
     
-    static const uint32_t MAGIC_NUMBER = 0x8fc2bc1b;
-    static const uint8_t VERSION_NUM = 8;
 protected:
 
     /**
@@ -105,10 +98,6 @@ protected:
     const Deserializer<T>& deserializer;
     friend class State<T>;
 private:
-    static int getMagicNumberOffset();
-    static int getVersionNumOffset();
-    static int getPopularCharsOffset();
-    static int getInitialStateOffset();
     //    FSA();
 };
 
@@ -220,12 +209,12 @@ public:
      * Makes sense only for accepting states.
      * For non-accepting states is throws an exception.
      */
-    unsigned int getValueSize() const;
+    unsigned long getValueSize() const;
 
     unsigned long getOffset() const;
 
     void setNext(const unsigned long offset);
-    void setNext(const unsigned long offset, const T& value, const unsigned int valueSize);
+    void setNext(const unsigned long offset, const T& value, const unsigned long valueSize);
     void setNextAsSink();
 
     explicit State(const FSA<T>& fsa);
@@ -237,7 +226,7 @@ private:
     bool accepting;
     bool sink;
     T value;
-    int valueSize;
+    long valueSize;
 };
 
 class FSAException : public std::exception {
diff --git a/fsa/fsa_impl.hpp b/fsa/fsa_impl.hpp
index 05f119e..6bc66ca 100644
--- a/fsa/fsa_impl.hpp
+++ b/fsa/fsa_impl.hpp
@@ -14,14 +14,11 @@
 #include <iostream>
 #include <vector>
 #include <netinet/in.h>
-#include "fsa.hpp"
 #include "utils.hpp"
+#include "const.hpp"
 
 using namespace std;
-
-static const unsigned int VERSION_NUM_OFFSET = 4;
-static const unsigned int IMPLEMENTATION_NUM_OFFSET = 5;
-static const unsigned int FSA_OFFSET = 6;
+//static const unsigned int FSA_OFFSET = 6;
 
 template <class T>
 bool FSA<T>::tryToRecognize(const char* input, T& value) const {
@@ -73,7 +70,9 @@ FSA<T>* FSA<T>::getFSA(const unsigned char* ptr, const Deserializer<T>& deserial
     }
     
     uint8_t implementationNum = *(ptr + IMPLEMENTATION_NUM_OFFSET);
-    const unsigned char* startPtr = ptr + FSA_OFFSET;
+    
+    uint32_t additionalDataSize = ntohl(*(reinterpret_cast<const uint32_t*>(ptr + ADDITIONAL_DATA_SIZE_OFFSET)));
+    const unsigned char* startPtr = ptr + ADDITIONAL_DATA_OFFSET + additionalDataSize;
     switch (implementationNum) {
         case 0:
             return new SimpleFSA<T>(startPtr, deserializer);
@@ -86,34 +85,4 @@ FSA<T>* FSA<T>::getFSA(const unsigned char* ptr, const Deserializer<T>& deserial
     }
 }
 
-static void deserializeLemma(const unsigned char*& ptr, Lemma& lemma) {
-    // XXX uważać na poprawność danych
-    lemma.suffixToCut = *ptr;
-    ptr++;
-    lemma.suffixToAdd = (const char*) ptr;
-    ptr += strlen((const char*) ptr) + 1;
-}
-
-static void deserializeInterp(const unsigned char*& ptr, Interpretation& interp) {
-    deserializeLemma(ptr, interp.lemma);
-    interp.tag = ntohs(*(reinterpret_cast<const uint16_t*>(ptr)));
-    ptr += 2;
-    interp.nameClassifier = *ptr;
-    ptr++;
-}
-
-long MorphDeserializer::deserialize(const unsigned char* ptr, vector<Interpretation>& interps) const {
-    const unsigned char* currPtr = ptr;
-    uint8_t interpsNum = *ptr;
-    interps.clear();
-    interps.reserve(interpsNum);
-    currPtr++;
-    for (unsigned int i = 0; i < interpsNum; i++) {
-        Interpretation interp;
-        deserializeInterp(currPtr, interp);
-        interps.push_back(interp);
-    }
-    return currPtr - ptr;
-}
-
 #endif	/* _SIMPLE_FSA_IMPL_HPP */
diff --git a/fsa/interpretation.hpp b/fsa/interpretation.hpp
deleted file mode 100644
index 6c83055..0000000
--- a/fsa/interpretation.hpp
+++ /dev/null
@@ -1,29 +0,0 @@
-/* 
- * File:   interpretation.hpp
- * Author: mlenart
- *
- * Created on November 4, 2013, 3:11 PM
- */
-
-#ifndef INTERPRETATION_HPP
-#define	INTERPRETATION_HPP
-
-#include <string>
-#include <list>
-
-using namespace std;
-
-struct Lemma {
-    unsigned short suffixToCut;
-    const char* suffixToAdd;
-};
-
-struct Interpretation {
-    Lemma lemma;
-    unsigned int tag;      // np. subst:sg:nom:m1
-    unsigned short nameClassifier; // np. "pospolita"
-    unsigned short qualifier;      // np. "dawne" lub "potoczne"
-};
-
-#endif	/* INTERPRETATION_HPP */
-
diff --git a/fsa/simplefsa_impl.hpp b/fsa/simplefsa_impl.hpp
index fecef99..9445404 100644
--- a/fsa/simplefsa_impl.hpp
+++ b/fsa/simplefsa_impl.hpp
@@ -37,7 +37,7 @@ static unsigned int decodeOffset(const unsigned char* ptr) {
 template <class T>
 void SimpleFSA<T>::proceedToNext(const char c, State<T>& state) const {
     const unsigned char* fromPointer = this->initialStatePtr + state.getOffset();
-    int transitionsTableOffset = sizeof (StateData);
+    long transitionsTableOffset = sizeof (StateData);
     if (state.isAccepting()) {
         transitionsTableOffset += state.getValueSize();
     }
@@ -60,7 +60,7 @@ void SimpleFSA<T>::proceedToNext(const char c, State<T>& state) const {
         const StateData* nextStateData = reinterpret_cast<const StateData*>(nextStatePointer);
         if (nextStateData->accepting) {
             T object;
-            int size = this->deserializer.deserialize(nextStatePointer + sizeof (StateData), object);
+            long size = this->deserializer.deserialize(nextStatePointer + sizeof (StateData), object);
             state.setNext(offset, object, size);
         } else {
             state.setNext(offset);
diff --git a/fsa/state_impl.hpp b/fsa/state_impl.hpp
index 0fd40a9..200722e 100644
--- a/fsa/state_impl.hpp
+++ b/fsa/state_impl.hpp
@@ -51,7 +51,7 @@ T State<T>::getValue() const {
 }
 
 template <class T>
-unsigned int State<T>::getValueSize() const {
+unsigned long State<T>::getValueSize() const {
     assert(this->isAccepting());
     return this->valueSize;
 }
@@ -69,7 +69,7 @@ void State<T>::setNext(const unsigned long offset) {
 }
 
 template <class T>
-void State<T>::setNext(const unsigned long offset, const T& value, const unsigned int valueSize) {
+void State<T>::setNext(const unsigned long offset, const T& value, const unsigned long valueSize) {
 //    assert(!this->isSink());
     this->offset = offset;
     this->accepting = true;
diff --git a/fsa/test_morph.cpp b/fsa/test_morph.cpp
deleted file mode 100644
index 100d558..0000000
--- a/fsa/test_morph.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/* 
- * File:   test_morph.cpp
- * Author: mlenart
- *
- * Created on November 8, 2013, 4:12 PM
- */
-
-#include <cstdlib>
-#include <sstream>
-#include <iostream>
-#include "fsa.hpp"
-#include "utils.hpp"
-
-using namespace std;
-
-void debug(const string& key, const vector<Interpretation> value) {
-    cerr << key << endl;
-    for (Interpretation i: value) {
-        cerr << "suffix to cut: " << i.lemma.suffixToCut << endl;
-        cerr << "suffix to add: " << i.lemma.suffixToAdd << endl;
-        cerr << "tag: " << i.tag << endl;
-        cerr << "name: " << i.nameClassifier << endl;
-    }
-    cerr << "==================" << endl;
-}
-
-void doTest(const FSA<vector<Interpretation>>& fsa, const char* fname) {
-    ifstream ifs;
-    //    ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
-    ifs.open(fname, ios::binary);
-    string line;
-    while (getline(ifs, line)) {
-        vector<string> splitVector(split(line, '\t'));
-        string key = splitVector[0];
-        vector<Interpretation> value2;
-        fsa.tryToRecognize(key.c_str(), value2);
-        debug(key, value2);
-//        validate(fsa.tryToRecognize(key.c_str(), value2), "Failed to recognize " + key);
-    }
-    validate(ifs.eof(), "Failed to read the input file to the end");
-}
-
-int main(int argc, char** argv) {
-    validate(argc == 3, "Must provide exactly two arguments - FSA filename and dictionary filename.");
-    const unsigned char* fsaData = readFile(argv[1]);
-    MorphDeserializer deserializer;
-    FSA<vector<Interpretation>>* fsa = FSA<vector<Interpretation>>::getFSA(fsaData, deserializer);
-    doTest(*fsa, argv[2]);
-    //    cout << argc << endl;
-    delete fsa;
-    return 0;
-}
-
diff --git a/fsa/utils.hpp b/fsa/utils.hpp
index 5475a0c..35e69c5 100644
--- a/fsa/utils.hpp
+++ b/fsa/utils.hpp
@@ -9,14 +9,15 @@
 #define	UTILS_HPP
 
 #include <iostream>
+#include <fstream>
 #include <sstream>
 #include <string>
 #include <fstream>
 #include <vector>
 
-using namespace std;
+//using namespace std;
 
-//#define DEBUG_BUILD
+#define DEBUG_BUILD
 
 #ifdef DEBUG_BUILD
 #  define DEBUG(x) do { std::cerr << x << std::endl; } while (0)
@@ -24,14 +25,14 @@ using namespace std;
 #  define DEBUG(x)
 #endif
 
-void validate(const bool cond, const std::string& msg) {
+inline void validate(const bool cond, const std::string& msg) {
     if (!cond) {
         std::cerr << msg << std::endl;
         exit(1);
     }
 }
 
-std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) {
+inline std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) {
     std::stringstream ss(s);
     std::string item;
     while (std::getline(ss, item, delim)) {
@@ -41,25 +42,25 @@ std::vector<std::string> &split(const std::string &s, char delim, std::vector<st
 }
 
 
-std::vector<std::string> split(const std::string &s, char delim) {
+inline std::vector<std::string> split(const std::string &s, char delim) {
     std::vector<std::string> elems;
     split(s, delim, elems);
     return elems;
 }
 
-string &rtrim(string &s) {
-        s.erase(find_if(s.rbegin(), s.rend(), not1(ptr_fun<int, int>(isspace))).base(), s.end());
-        return s;
-}
+//string &rtrim(string &s) {
+//        s.erase(find_if(s.rbegin(), s.rend(), not1(ptr_fun<int, int>(isspace))).base(), s.end());
+//        return s;
+//}
 
-unsigned char* readFile(const char* fname) {
-    ifstream ifs;
+inline unsigned char* readFile(const char* fname) {
+    std::ifstream ifs;
     ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
-    ifs.open(fname, ios::in | ios::binary | ios::ate);
+    ifs.open(fname, std::ios::in | std::ios::binary | std::ios::ate);
     //    if (ifs.is_open()) {
-    int size = ifs.tellg();
+    long size = ifs.tellg();
     unsigned char* memblock = new unsigned char [size];
-    ifs.seekg(0, ios::beg);
+    ifs.seekg(0, std::ios::beg);
     ifs.read(reinterpret_cast<char*> (memblock), size);
     ifs.close();
     return memblock;
diff --git a/morfeusz/CMakeLists.txt b/morfeusz/CMakeLists.txt
index 641748b..ea4b19f 100644
--- a/morfeusz/CMakeLists.txt
+++ b/morfeusz/CMakeLists.txt
@@ -6,7 +6,10 @@
 include_directories (${Morfeusz_SOURCE_DIR}/fsa) 
 add_library (morfeusz2 morfeusz.hpp morfeusz.cpp)
 add_executable (morfeusz2_analyze main.cpp)
+add_executable (test_morph test_morph.cpp interpretations.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp)
 
 # Link the executable to the Hello library. 
 target_link_libraries (morfeusz2_analyze morfeusz2)
 set_target_properties ( morfeusz2_analyze PROPERTIES COMPILE_FLAGS "-std=gnu++0x" )
+
+set_target_properties ( test_morph PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" )
diff --git a/morfeusz/MorphDeserializer.cpp b/morfeusz/MorphDeserializer.cpp
new file mode 100644
index 0000000..6d3605b
--- /dev/null
+++ b/morfeusz/MorphDeserializer.cpp
@@ -0,0 +1,48 @@
+/* 
+ * File:   MorphDeserializer.cpp
+ * Author: mlenart
+ * 
+ * Created on 12 listopad 2013, 15:31
+ */
+
+#include "MorphDeserializer.hpp"
+
+MorphDeserializer::MorphDeserializer() {
+}
+
+MorphDeserializer::MorphDeserializer(const MorphDeserializer& orig) {
+}
+
+MorphDeserializer::~MorphDeserializer() {
+}
+
+static void deserializeLemma(const unsigned char*& ptr, Lemma& lemma) {
+    // XXX uważać na poprawność danych
+    lemma.suffixToCut = *ptr;
+    ptr++;
+    lemma.suffixToAdd = (const char*) ptr;
+    ptr += strlen((const char*) ptr) + 1;
+}
+
+static void deserializeInterp(const unsigned char*& ptr, Interpretation& interp) {
+    deserializeLemma(ptr, interp.lemma);
+    interp.tag = ntohs(*(reinterpret_cast<const uint16_t*>(ptr)));
+    ptr += 2;
+    interp.nameClassifier = *ptr;
+    ptr++;
+}
+
+long MorphDeserializer::deserialize(const unsigned char* ptr, vector<Interpretation>& interps) const {
+    const unsigned char* currPtr = ptr;
+    uint8_t interpsNum = *ptr;
+    interps.clear();
+    interps.reserve(interpsNum);
+    currPtr++;
+    for (unsigned int i = 0; i < interpsNum; i++) {
+        Interpretation interp;
+        deserializeInterp(currPtr, interp);
+        interps.push_back(interp);
+    }
+    return currPtr - ptr;
+}
+
diff --git a/morfeusz/MorphDeserializer.hpp b/morfeusz/MorphDeserializer.hpp
new file mode 100644
index 0000000..ad4b73d
--- /dev/null
+++ b/morfeusz/MorphDeserializer.hpp
@@ -0,0 +1,28 @@
+/* 
+ * File:   MorphDeserializer.hpp
+ * Author: mlenart
+ *
+ * Created on 12 listopad 2013, 15:31
+ */
+
+#ifndef MORPHDESERIALIZER_HPP
+#define	MORPHDESERIALIZER_HPP
+
+#include <vector>
+#include "fsa.hpp"
+#include "interpretations.hpp"
+
+class MorphDeserializer: public Deserializer<std::vector<Interpretation>> {
+public:
+    MorphDeserializer();
+    MorphDeserializer(const MorphDeserializer& orig);
+    virtual ~MorphDeserializer();
+    long deserialize(
+        const unsigned char* ptr, 
+        std::vector<Interpretation>& interps) const;
+private:
+
+};
+
+#endif	/* MORPHDESERIALIZER_HPP */
+
diff --git a/morfeusz/Tagset.cpp b/morfeusz/Tagset.cpp
new file mode 100644
index 0000000..b5ed12d
--- /dev/null
+++ b/morfeusz/Tagset.cpp
@@ -0,0 +1,56 @@
+
+#include <string>
+#include <netinet/in.h>
+#include "Tagset.hpp"
+#include "const.hpp"
+#include "utils.hpp"
+
+using namespace std;
+
+static uint16_t readInt16(const unsigned char*& currPtr) {
+    DEBUG("readInt16");
+    uint16_t res = htons(*reinterpret_cast<const uint16_t*>(currPtr));
+    DEBUG("still alive " + to_string(res));
+    currPtr += 2;
+    DEBUG("still alive after ptr add");
+    return res;
+}
+
+static string readString(const unsigned char*& currPtr) {
+    DEBUG("readString");
+    string res(reinterpret_cast<const char*>(currPtr));
+    currPtr += res.length();
+    currPtr++;
+    return res;
+}
+
+static void readTags(const unsigned char*& currPtr, vector<string>& tags) {
+    tags.clear();
+    tags.resize(65536);
+    uint16_t tagsNum = readInt16(currPtr);
+    DEBUG("hi there");
+    DEBUG("tagsNum="+to_string((int) tagsNum));
+    for (unsigned int i = 0; i < tagsNum; i++) {
+        unsigned int tagNum = readInt16(currPtr);
+        tags[tagNum] = readString(currPtr);
+    }
+}
+
+Tagset::Tagset(const unsigned char* fsaData) {
+    const unsigned char* currPtr = fsaData + ADDITIONAL_DATA_OFFSET;
+//    uint32_t tagsNum = ntohl(*reinterpret_cast<const uint32_t*>(currPtr));
+//    uint32_t namesNum = ntohl(*reinterpret_cast<const uint32_t*>(fsaData + ADDITIONAL_DATA_OFFSET + 4));
+//    const unsigned char* currPtr = fsaData + 8;
+    DEBUG("will read tags");
+    readTags(currPtr, this->tags);
+    DEBUG("will read names");
+    readTags(currPtr, this->names);
+}
+
+const string& Tagset::getTag(const int tagNum) const {
+    return this->tags.at(tagNum);
+}
+
+const string& Tagset::getName(const int nameNum) const {
+    return this->names.at(nameNum);
+}
diff --git a/morfeusz/Tagset.hpp b/morfeusz/Tagset.hpp
new file mode 100644
index 0000000..83529f6
--- /dev/null
+++ b/morfeusz/Tagset.hpp
@@ -0,0 +1,25 @@
+/* 
+ * File:   tagset.hpp
+ * Author: mlenart
+ *
+ * Created on 12 listopad 2013, 14:09
+ */
+
+#ifndef TAGSET_HPP
+#define	TAGSET_HPP
+
+#include <string>
+#include <vector>
+
+class Tagset {
+public:
+    explicit Tagset(const unsigned char* fsaData);
+    const std::string& getTag(const int tagNum) const;
+    const std::string& getName(const int nameNum) const;
+private:
+    std::vector<std::string> tags;
+    std::vector<std::string> names;
+};
+
+#endif	/* TAGSET_HPP */
+
diff --git a/morfeusz/interpretations.cpp b/morfeusz/interpretations.cpp
new file mode 100644
index 0000000..fa29d1c
--- /dev/null
+++ b/morfeusz/interpretations.cpp
@@ -0,0 +1,56 @@
+
+#include "interpretations.hpp"
+#include "Tagset.hpp"
+
+using namespace std;
+
+Interpretation::Interpretation()
+: lemma(), tag(), nameClassifier() {
+
+}
+
+Interpretation::Interpretation(const Lemma& lemma, const int tag, const int name)
+: lemma(lemma), tag(tag), nameClassifier(name) {
+
+}
+
+StringInterpretation::StringInterpretation(
+        const string& lemma,
+        const string& tag,
+        const string& name)
+: lemma(lemma), tag(tag), name(name) {
+
+}
+
+string StringInterpretation::toString() const {
+    std::stringstream ss;
+    ss << lemma << ":" << tag << ":" << name;
+    return ss.str();
+}
+
+string LemmaConverter::convertLemma(
+        const string& orth,
+        const Lemma& lemma) const {
+    string res(orth);
+    res.erase(
+            res.end() - lemma.suffixToCut,
+            res.end());
+    res.append(lemma.suffixToAdd);
+    return res;
+}
+
+InterpretationsConverter::InterpretationsConverter(const unsigned char* data)
+: tagset(Tagset(data)) {
+
+}
+
+StringInterpretation InterpretationsConverter::convertInterpretation(
+        const string& orth,
+        const Interpretation& interp) const {
+    string lemma = this->lemmaConverter.convertLemma(orth, interp.lemma);
+    const string& tag = this->tagset.getTag(interp.tag);
+    const string& name = this->tagset.getName(interp.nameClassifier);
+    return StringInterpretation(lemma, tag, name);
+}
+
+
diff --git a/morfeusz/interpretations.hpp b/morfeusz/interpretations.hpp
new file mode 100644
index 0000000..4b3ac49
--- /dev/null
+++ b/morfeusz/interpretations.hpp
@@ -0,0 +1,58 @@
+/* 
+ * File:   interpretation.hpp
+ * Author: mlenart
+ *
+ * Created on November 4, 2013, 3:11 PM
+ */
+
+#ifndef INTERPRETATION_HPP
+#define	INTERPRETATION_HPP
+
+#include <string>
+#include <sstream>
+#include "Tagset.hpp"
+
+using namespace std;
+
+struct Lemma {
+    int suffixToCut;
+    string suffixToAdd;
+};
+
+struct Interpretation {
+    Interpretation();
+    Interpretation(const Lemma& lemma, const int tag, const int name);
+    Lemma lemma;
+    int tag;      // np. subst:sg:nom:m1
+    int nameClassifier; // np. "pospolita"
+//    int qualifier;      // np. "dawne" lub "potoczne"
+};
+
+struct StringInterpretation {
+    StringInterpretation(const std::string& lemma, const std::string& tag, const std::string& name);
+    const std::string lemma;
+    const std::string& tag;      // np. subst:sg:nom:m1
+    const std::string& name; // np. "pospolita"
+//    std::string qualifier;      // np. "dawne" lub "potoczne"
+    std::string toString() const;
+};
+
+class LemmaConverter {
+public:
+    std::string convertLemma(const std::string& orth, const Lemma& interp) const;
+};
+
+
+class InterpretationsConverter {
+public:
+    explicit InterpretationsConverter(const unsigned char* data);
+    StringInterpretation convertInterpretation(
+            const std::string& orth, 
+            const Interpretation& interp) const;
+private:
+    LemmaConverter lemmaConverter;
+    Tagset tagset;
+};
+
+#endif	/* INTERPRETATION_HPP */
+
diff --git a/morfeusz/main.cpp b/morfeusz/main.cpp
index 9713b42..d0b4b21 100644
--- a/morfeusz/main.cpp
+++ b/morfeusz/main.cpp
@@ -9,6 +9,7 @@
 #include <iostream>
 #include "fsa.hpp"
 #include "default_fsa.hpp"
+#include "Tagset.hpp"
 
 using namespace std;
 
diff --git a/morfeusz/test_morph.cpp b/morfeusz/test_morph.cpp
new file mode 100644
index 0000000..278afed
--- /dev/null
+++ b/morfeusz/test_morph.cpp
@@ -0,0 +1,82 @@
+/* 
+ * File:   test_morph.cpp
+ * Author: mlenart
+ *
+ * Created on November 8, 2013, 4:12 PM
+ */
+
+//#include <cstdlib>
+#include <sstream>
+#include <iostream>
+#include "fsa.hpp"
+#include "interpretations.hpp"
+#include "utils.hpp"
+#include "MorphDeserializer.hpp"
+
+using namespace std;
+
+void debug(const string& key, const vector<Interpretation> value) {
+    cerr << key << endl;
+    for (Interpretation i: value) {
+        cerr << "suffix to cut: " << i.lemma.suffixToCut << endl;
+        cerr << "suffix to add: " << i.lemma.suffixToAdd << endl;
+        cerr << "tag: " << i.tag << endl;
+        cerr << "name: " << i.nameClassifier << endl;
+    }
+    cerr << "==================" << endl;
+}
+
+void debug(const string& key, const StringInterpretation& value) {
+    cerr << key << '\t' << value.toString() << endl;
+}
+
+void doTest(
+        const FSA<vector<Interpretation>>& fsa, 
+        const InterpretationsConverter& interpsConverter, 
+        const char* fname) {
+    ifstream ifs;
+    //    ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
+    ifs.open(fname, ios::binary);
+    string line;
+    while (getline(ifs, line)) {
+        vector<string> splitVector(split(line, '\t'));
+        string key = splitVector[0];
+        string lemma = splitVector[1];
+        string tag = splitVector[2];
+        string name = splitVector[3];
+        vector<Interpretation> value2;
+        fsa.tryToRecognize(key.c_str(), value2);
+        vector<StringInterpretation> parsedValues;
+        bool found = false;
+        for (Interpretation interp: value2) {
+            StringInterpretation parsedValue = interpsConverter.convertInterpretation(key, interp);
+//            parsedValues.push_back(parsedValue);
+            debug(key, parsedValue);
+            if (lemma == parsedValue.lemma && tag == parsedValue.tag && name == parsedValue.name) {
+                found = true;
+            }
+        }
+        validate(found, "Failed to recognize " + key + " " + lemma + ":" + tag + ":" + name);
+//        debug(key, value2);
+//        validate(fsa.tryToRecognize(key.c_str(), value2), "Failed to recognize " + key);
+    }
+    validate(ifs.eof(), "Failed to read the input file to the end");
+}
+
+int main(int argc, char** argv) {
+    DEBUG("start test");
+    validate(argc == 3, "Must provide exactly two arguments - FSA filename, and dictionary filename.");
+    const unsigned char* fsaData = readFile(argv[1]);
+    MorphDeserializer deserializer;
+    DEBUG("will read FSA");
+    FSA<vector<Interpretation>>* fsa = FSA<vector<Interpretation>>::getFSA(fsaData, deserializer);
+    DEBUG("DONE read FSA");
+    DEBUG("will read tagset");
+    InterpretationsConverter converter(fsaData);
+    DEBUG("DONE read tagset");
+    DEBUG("still alive");
+    doTest(*fsa, converter, argv[2]);
+    //    cout << argc << endl;
+    delete fsa;
+    return 0;
+}
diff --git a/nbproject/configurations.xml b/nbproject/configurations.xml
index 47c78ca..58caadc 100644
--- a/nbproject/configurations.xml
+++ b/nbproject/configurations.xml
@@ -1,17 +1,34 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <configurationDescriptor version="90">
   <logicalFolder name="root" displayName="root" projectFiles="true" kind="ROOT">
+    <logicalFolder name="2.8.11.2"
+                   displayName="2.8.11.2"
+                   projectFiles="true"
+                   root="build/CMakeFiles/2.8.11.2">
+      <logicalFolder name="CompilerIdC" displayName="CompilerIdC" projectFiles="true">
+        <itemPath>build/CMakeFiles/2.8.11.2/CompilerIdC/CMakeCCompilerId.c</itemPath>
+      </logicalFolder>
+      <logicalFolder name="CompilerIdCXX"
+                     displayName="CompilerIdCXX"
+                     projectFiles="true">
+        <itemPath>build/CMakeFiles/2.8.11.2/CompilerIdCXX/CMakeCXXCompilerId.cpp</itemPath>
+      </logicalFolder>
+    </logicalFolder>
     <df root="fsa" name="0">
-      <in>cfsa1_impl.hpp</in>
-      <in>cfsa2_impl.hpp</in>
-      <in>interpretation.hpp</in>
-      <in>simplefsa_impl.hpp</in>
-      <in>test_morph.cpp</in>
       <in>test_not_recognize.cpp</in>
       <in>test_recognize.cpp</in>
       <in>test_speed.cpp</in>
     </df>
+    <logicalFolder name="Modules"
+                   displayName="Modules"
+                   projectFiles="true"
+                   root="/usr/share/cmake-2.8/Modules">
+      <itemPath>/usr/share/cmake-2.8/Modules/CMakeCCompilerABI.c</itemPath>
+      <itemPath>/usr/share/cmake-2.8/Modules/CMakeCXXCompilerABI.cpp</itemPath>
+      <itemPath>/usr/share/cmake-2.8/Modules/CMakeCompilerABI.h</itemPath>
+    </logicalFolder>
     <df root="morfeusz" name="1">
+      <in>interpretations.cpp</in>
       <in>main.cpp</in>
       <in>morfeusz.cpp</in>
     </df>
@@ -22,7 +39,6 @@
       <itemPath>CMakeLists.txt</itemPath>
       <itemPath>build/Makefile</itemPath>
     </logicalFolder>
-    <itemPath>cfsa1_impl.hpp</itemPath>
   </logicalFolder>
   <sourceFolderFilter>^(nbproject)$</sourceFolderFilter>
   <sourceRootList>
@@ -44,10 +60,41 @@
           <buildCommandWorkingDir>build</buildCommandWorkingDir>
           <buildCommand>${MAKE} -f Makefile</buildCommand>
           <cleanCommand>${MAKE} -f Makefile clean</cleanCommand>
-          <executablePath>build/fsa/test_dict</executablePath>
+          <executablePath>build/fsa/test_speed</executablePath>
+          <cTool>
+            <incDir>
+              <pElem>build/CMakeFiles/CMakeTmp</pElem>
+            </incDir>
+          </cTool>
         </makeTool>
       </makefileType>
-      <item path="cfsa1_impl.hpp" ex="false" tool="3" flavor2="0">
+      <item path="/usr/share/cmake-2.8/Modules/CMakeCCompilerABI.c"
+            ex="false"
+            tool="0"
+            flavor2="2">
+        <cTool>
+        </cTool>
+      </item>
+      <item path="/usr/share/cmake-2.8/Modules/CMakeCXXCompilerABI.cpp"
+            ex="false"
+            tool="1"
+            flavor2="4">
+        <ccTool>
+        </ccTool>
+      </item>
+      <item path="build/CMakeFiles/2.8.11.2/CompilerIdC/CMakeCCompilerId.c"
+            ex="false"
+            tool="0"
+            flavor2="2">
+        <cTool>
+        </cTool>
+      </item>
+      <item path="build/CMakeFiles/2.8.11.2/CompilerIdCXX/CMakeCXXCompilerId.cpp"
+            ex="false"
+            tool="1"
+            flavor2="4">
+        <ccTool>
+        </ccTool>
       </item>
       <folder path="0">
         <ccTool>
@@ -56,23 +103,27 @@
           </incDir>
         </ccTool>
       </folder>
-      <folder path="1">
+      <folder path="2.8.11.2">
+        <ccTool>
+          <incDir>
+            <pElem>build/CMakeFiles/CMakeTmp</pElem>
+          </incDir>
+        </ccTool>
+      </folder>
+      <folder path="Modules">
+        <ccTool>
+          <incDir>
+            <pElem>build/CMakeFiles/CMakeTmp</pElem>
+          </incDir>
+        </ccTool>
+      </folder>
+      <item path="fsa/const.cpp" ex="false" tool="1" flavor2="8">
         <ccTool>
           <incDir>
             <pElem>fsa</pElem>
             <pElem>build/morfeusz</pElem>
           </incDir>
         </ccTool>
-      </folder>
-      <item path="fsa/cfsa1_impl.hpp" ex="false" tool="3" flavor2="0">
-      </item>
-      <item path="fsa/cfsa2_impl.hpp" ex="false" tool="3" flavor2="0">
-      </item>
-      <item path="fsa/interpretation.hpp" ex="false" tool="3" flavor2="0">
-      </item>
-      <item path="fsa/simplefsa_impl.hpp" ex="false" tool="3" flavor2="0">
-      </item>
-      <item path="fsa/test_morph.cpp" ex="false" tool="1" flavor2="0">
       </item>
       <item path="fsa/test_not_recognize.cpp" ex="false" tool="1" flavor2="8">
         <ccTool>
@@ -86,12 +137,53 @@
         <ccTool>
         </ccTool>
       </item>
+      <item path="morfeusz/MorphDeserializer.cpp" ex="false" tool="1" flavor2="8">
+        <ccTool>
+          <incDir>
+            <pElem>fsa</pElem>
+            <pElem>build/morfeusz</pElem>
+          </incDir>
+        </ccTool>
+      </item>
+      <item path="morfeusz/Tagset.cpp" ex="false" tool="1" flavor2="8">
+        <ccTool>
+          <incDir>
+            <pElem>fsa</pElem>
+            <pElem>build/morfeusz</pElem>
+          </incDir>
+        </ccTool>
+      </item>
+      <item path="morfeusz/interpretations.cpp" ex="false" tool="1" flavor2="8">
+        <ccTool>
+          <incDir>
+            <pElem>fsa</pElem>
+            <pElem>build/morfeusz</pElem>
+          </incDir>
+        </ccTool>
+      </item>
       <item path="morfeusz/main.cpp" ex="false" tool="1" flavor2="8">
         <ccTool>
+          <incDir>
+            <pElem>fsa</pElem>
+            <pElem>build/morfeusz</pElem>
+          </incDir>
         </ccTool>
       </item>
       <item path="morfeusz/morfeusz.cpp" ex="false" tool="1" flavor2="4">
         <ccTool>
+          <incDir>
+            <pElem>build/CMakeFiles/CMakeTmp</pElem>
+            <pElem>fsa</pElem>
+            <pElem>build/morfeusz</pElem>
+          </incDir>
+        </ccTool>
+      </item>
+      <item path="morfeusz/test_morph.cpp" ex="false" tool="1" flavor2="8">
+        <ccTool>
+          <incDir>
+            <pElem>fsa</pElem>
+            <pElem>build/morfeusz</pElem>
+          </incDir>
         </ccTool>
       </item>
     </conf>
diff --git a/nbproject/project.xml b/nbproject/project.xml
index 84b35e8..6630483 100644
--- a/nbproject/project.xml
+++ b/nbproject/project.xml
@@ -4,9 +4,9 @@
     <configuration>
         <data xmlns="http://www.netbeans.org/ns/make-project/1">
             <name>morfeusz</name>
-            <c-extensions/>
+            <c-extensions>c</c-extensions>
             <cpp-extensions>cpp</cpp-extensions>
-            <header-extensions>hpp</header-extensions>
+            <header-extensions>h,hpp</header-extensions>
             <sourceEncoding>UTF-8</sourceEncoding>
             <make-dep-projects/>
             <sourceRootList>
--
libgit2 0.22.2