dalsza optymalizacja kodu

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@181 ff4e3ee1-f430-4e82-ade0-24591c43f1fd

dalsza optymalizacja kodu
git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@181 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Michał Lenart
1 parent f3f17708
Showing 21 changed files with 572 additions and 434 deletions
morfeusz/CMakeLists.txt
morfeusz/CasePatternHelper.hpp
morfeusz/Environment.cpp
morfeusz/Environment.hpp
morfeusz/InflexionGraph.cpp
morfeusz/InflexionGraph.hpp
morfeusz/InterpretedChunk.hpp
morfeusz/InterpretedChunksDecoder.hpp
morfeusz/Morfeusz.cpp
morfeusz/Morfeusz.hpp
morfeusz/decoder/InterpretedChunksDecoder.hpp
morfeusz/decoder/InterpretedChunksDecoder4Analyzer.cpp
morfeusz/decoder/InterpretedChunksDecoder4Analyzer.hpp
morfeusz/decoder/InterpretedChunksDecoder4Generator.cpp
morfeusz/decoder/InterpretedChunksDecoder4Generator.hpp
morfeusz/fsa/fsa.hpp
morfeusz/fsa/state_impl.hpp
morfeusz/morfeusz_analyzer.cpp
morfeusz/segrules/SegrulesFSA.hpp
nbproject/configurations.xml
@@ -38,6 +38,9 @@ set(SRC_FILES
     charset/conversion_tables.cpp
     cli/cli.cpp
     segrules/segrules.cpp
+    CasePatternHelper.cpp
+    decoder/InterpretedChunksDecoder4Analyzer.cpp
+    decoder/InterpretedChunksDecoder4Generator.cpp
 )
 set(INCLUDE_FILES 
@@ -12,6 +12,9 @@
 #include "InterpsGroup.hpp"
 #include "CasePatternHelper.hpp"
 #include "compressionByteUtils.hpp"
+#include "Environment.hpp"
+
+class Environment;
 class CasePatternHelper {
 public:
@@ -39,64 +42,17 @@ public:
     }
     bool checkInterpsGroupOrthCasePatterns(
-            const std::vector<uint32_t>& lowercaseCodepoints,
-            const std::vector<uint32_t>& originalCodepoints,
-            const InterpsGroup& ig) const {
-        const unsigned char* currPtr = ig.ptr;
-        unsigned char compressionByte = *currPtr++;
-        if (!this->caseSensitive) {
-            return true;
-        }
-        else if (isOrthOnlyLower(compressionByte)) {
-            return true;
-        }
-        else if (isOrthOnlyTitle(compressionByte)) {
-            return lowercaseCodepoints[0] != originalCodepoints[0];
-        } 
-        else {
-            unsigned char casePatternsNum = *currPtr++;
-            if (casePatternsNum == 0) {
-                return true;
-            } 
-            else {
-                for (unsigned int i = 0; i < casePatternsNum; i++) {
-                    if (checkCasePattern(
-                            lowercaseCodepoints,
-                            originalCodepoints,
-                            deserializeOneCasePattern(currPtr))) {
-                        return true;
-                    }
-                }
-                return false;
-            }
-        }
-    }
+            const Environment& env,
+            const char* orthStart,
+            const char* orthEnd,
+            const InterpsGroup& ig) const;
-    std::vector<bool> deserializeOneCasePattern(const unsigned char*& ptr) const {
-        std::vector<bool> res;
-        uint8_t casePatternType = *ptr++;
-        uint8_t prefixLength;
-        uint8_t patternLength;
-        switch (casePatternType) {
-            case LEMMA_ONLY_LOWER:
-                break;
-            case LEMMA_UPPER_PREFIX:
-                prefixLength = *ptr++;
-                res.resize(prefixLength, true);
-                break;
-            case LEMMA_MIXED_CASE:
-                patternLength = *ptr++;
-                for (unsigned int i = 0; i < patternLength; i++) {
-                    uint8_t idx = *ptr++;
-                    res.resize(idx + 1, false);
-                    res[idx] = true;
-                }
-                break;
-        }
-        return res;
-    }
+    static std::vector<bool> deserializeOneCasePattern(const unsigned char*& ptr);
 private:
     bool caseSensitive;
+    
+    mutable vector<uint32_t> orthCodepoints;
+    mutable vector<uint32_t> normalizedCodepoints;
     static const uint8_t LEMMA_ONLY_LOWER = 0;
     static const uint8_t LEMMA_UPPER_PREFIX = 1;
@@ -8,9 +8,11 @@
 #include <vector>
 #include <algorithm>
 #include "Environment.hpp"
-#include "InterpretedChunksDecoder.hpp"
+#include "decoder/InterpretedChunksDecoder.hpp"
 #include "MorphDeserializer.hpp"
 #include "exceptions.hpp"
+#include "decoder/InterpretedChunksDecoder4Analyzer.hpp"
+#include "decoder/InterpretedChunksDecoder4Generator.hpp"
 //class InterpretedChunksDecoder4Analyzer;
 //class InterpretedChunksDecoder4Generator;
@@ -53,7 +55,7 @@ processorType == ANALYZER
 ? (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Analyzer(*this)
 : (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Generator(*this)),
 processorType(processorType),
-casePatternHelper() {
+casePatternHelper(new CasePatternHelper()) {
 }
 const CharsetConverter* Environment::getCharsetConverter(MorfeuszCharset charset) const {
@@ -78,6 +80,7 @@ Environment::~Environment() {
         delete this->fsaFileStartPtr;
     }
     delete this->chunksDecoder;
+    delete this->casePatternHelper;
 }
 void Environment::setCharset(MorfeuszCharset charset) {
@@ -146,11 +149,11 @@ MorfeuszProcessorType Environment::getProcessorType() const {
 }
 void Environment::setCaseSensitive(bool caseSensitive) {
-    this->casePatternHelper.setCaseSensitive(caseSensitive);
+    this->casePatternHelper->setCaseSensitive(caseSensitive);
 }
 const CasePatternHelper& Environment::getCasePatternHelper() const {
-    return this->casePatternHelper;
+    return *this->casePatternHelper;
 }
 const Qualifiers& Environment::getQualifiersHelper() const {
@@ -11,6 +11,7 @@
 #include <vector>
 class InterpretedChunksDecoder;
+class CasePatternHelper;
 #include "charset/CaseConverter.hpp"
 #include "charset/CharsetConverter.hpp"
@@ -79,7 +80,7 @@ private:
     const InterpretedChunksDecoder* chunksDecoder;
     MorfeuszProcessorType processorType;
-    CasePatternHelper casePatternHelper;
+    CasePatternHelper* casePatternHelper;
     const CharsetConverter* getCharsetConverter(MorfeuszCharset charset) const;
 };
@@ -78,7 +78,7 @@ void InflexionGraph::addPath(const std::vector&lt;InterpretedChunk&gt;&amp; path, bool wea
                 this->addMiddleEdge((unsigned int) this->graph.size(), e);
             }
             else {
-                Edge e = {chunk, (int) this->graph.size() + 1};
+                Edge e = {chunk, (unsigned long) this->graph.size() + 1};
                 this->addMiddleEdge((unsigned int) this->graph.size(), e);
             }
         }
@@ -117,7 +117,8 @@ static bool containsEqualEdge(const vector&lt;InflexionGraph::Edge&gt;&amp; edges, const I
     for (unsigned int i = 0; i < edges.size(); i++) {
         const InflexionGraph::Edge& e1 = edges[i];
         if (e1.chunk.textStartPtr == e.chunk.textStartPtr
-                && e1.chunk.lowercaseCodepoints == e.chunk.lowercaseCodepoints
+                && e1.chunk.textStartPtr == e.chunk.textStartPtr
+                && e1.chunk.textEndPtr == e.chunk.textEndPtr
                 && e1.chunk.segmentType == e.chunk.segmentType
                 && e1.nextNode == e.nextNode) {
             return true;
@@ -22,7 +22,7 @@ public:
     struct Edge {
         InterpretedChunk chunk;
-        unsigned int nextNode;
+        unsigned long nextNode;
     };
     void addPath(const std::vector<InterpretedChunk>& path, bool weak);
@@ -15,8 +15,6 @@ struct InterpretedChunk {
     unsigned char segmentType;
     const char* textStartPtr;
     const char* textEndPtr;
-    std::vector<uint32_t> originalCodepoints;
-    std::vector<uint32_t> lowercaseCodepoints;
     const unsigned char* interpsGroupPtr;
     const unsigned char* interpsPtr;
     const unsigned char* interpsEndPtr;
-/* 
- * File:   InterpsGroupDecoder.hpp
- * Author: mlenart
- *
- * Created on November 22, 2013, 10:35 PM
- */
-
-#ifndef INTERPSGROUPDECODER_HPP
-#define	INTERPSGROUPDECODER_HPP
-
-#include <string>
-#include <vector>
-#include <utility>
-
-#include "charset/CharsetConverter.hpp"
-#include "EncodedInterpretation.hpp"
-#include "InterpretedChunk.hpp"
-#include "EncodedInterpretation.hpp"
-#include "charset/CaseConverter.hpp"
-#include "Environment.hpp"
-#include "MorphInterpretation.hpp"
-#include "CasePatternHelper.hpp"
-#include "deserializationUtils.hpp"
-#include "compressionByteUtils.hpp"
-#include "const.hpp"
-
-class InterpretedChunksDecoder {
-public:
-
-    InterpretedChunksDecoder(const Environment& env)
-    : env(env) {
-    }
-
-    virtual ~InterpretedChunksDecoder() {
-    }
-
-    virtual void decode(
-            unsigned int startNode,
-            unsigned int endNode,
-            const InterpretedChunk& interpretedChunk,
-            std::vector<MorphInterpretation>& out) const = 0;
-
-protected:
-
-    const Environment& env;
-};
-
-class InterpretedChunksDecoder4Analyzer : public InterpretedChunksDecoder {
-public:
-
-    InterpretedChunksDecoder4Analyzer(const Environment& env) : InterpretedChunksDecoder(env) {
-    }
-
-    void decode(
-            unsigned int startNode,
-            unsigned int endNode,
-            const InterpretedChunk& interpretedChunk,
-            std::vector<MorphInterpretation>& out) const {
-        string orth;
-        string lemmaPrefix;
-        if (convertPrefixes(interpretedChunk, orth, lemmaPrefix)) {
-            orth += this->env.getCharsetConverter().toString(interpretedChunk.originalCodepoints);
-            const unsigned char* currPtr = interpretedChunk.interpsPtr;
-            while (currPtr < interpretedChunk.interpsEndPtr) {
-                this->decodeMorphInterpretation(startNode, endNode, orth, lemmaPrefix, interpretedChunk, false, currPtr, out);
-            }
-        }
-    }
-
-protected:
-
-    void decodeForm(
-            const vector<uint32_t>& orth,
-            const EncodedForm& lemma,
-            bool forPrefix,
-            string& res) const {
-        for (unsigned int i = lemma.prefixToCut; i < orth.size() - lemma.suffixToCut; i++) {
-            uint32_t cp =
-                    (i < lemma.casePattern.size() && lemma.casePattern[i])
-                    ? env.getCaseConverter().toTitle(orth[i])
-                    : orth[i];
-            env.getCharsetConverter().append(cp, res);
-        }
-        if (!forPrefix) {
-            const char* suffixPtr = lemma.suffixToAdd.c_str();
-            const char* suffixEnd = suffixPtr + lemma.suffixToAdd.length();
-            while (suffixPtr != suffixEnd) {
-                uint32_t cp = UTF8CharsetConverter::getInstance().next(suffixPtr, suffixEnd);
-                env.getCharsetConverter().append(cp, res);
-            }
-        }
-    }
-
-    void deserializeEncodedForm(const unsigned char*& ptr, unsigned char compressionByte, EncodedForm& encodedForm) const {
-        encodedForm.prefixToCut = hasCompressedPrefixCut(compressionByte)
-                ? getPrefixCutLength(compressionByte)
-                : readInt8(ptr);
-        encodedForm.suffixToCut = readInt8(ptr);
-        encodedForm.suffixToAdd = readString(ptr);
-        assert(encodedForm.casePattern.size() == 0);
-        if (isLemmaOnlyLower(compressionByte)) {
-            encodedForm.casePattern = std::vector<bool>();
-        } else if (isLemmaOnlyTitle(compressionByte)) {
-            encodedForm.casePattern = std::vector<bool>();
-            encodedForm.casePattern.push_back(true);
-        } else {
-            encodedForm.casePattern = env.getCasePatternHelper().deserializeOneCasePattern(ptr);
-        }
-    }
-
-    EncodedInterpretation deserializeEncodedInterp(const unsigned char*& ptr, unsigned char compressionByte) const {
-        EncodedInterpretation interp;
-        if (isOrthOnlyLower(compressionByte)) {
-        } else if (isOrthOnlyTitle(compressionByte)) {
-            interp.orthCasePattern.push_back(true);
-        } else {
-            interp.orthCasePattern = this->env.getCasePatternHelper().deserializeOneCasePattern(ptr);
-        }
-        deserializeEncodedForm(ptr, compressionByte, interp.value);
-        interp.tag = readInt16(ptr);
-        interp.nameClassifier = *ptr++;
-        interp.qualifiers = readInt16(ptr);
-        return interp;
-    }
-private:
-
-    pair<string, string> getLemmaHomonymIdPair(const string& lemma) const {
-        vector<string> splitRes(split(lemma, ':'));
-        if (splitRes.size() == 2) {
-            return make_pair(splitRes[0], splitRes[1]);
-        } else {
-            return make_pair(lemma, "");
-        }
-    }
-
-    void decodeMorphInterpretation(
-            unsigned int startNode, unsigned int endNode,
-            const string& orth,
-            const string& lemmaPrefix,
-            const InterpretedChunk& chunk,
-            bool forPrefix,
-            const unsigned char*& ptr,
-            std::vector<MorphInterpretation>& out) const {
-        string lemma = lemmaPrefix;
-        EncodedInterpretation ei = this->deserializeEncodedInterp(ptr, *chunk.interpsGroupPtr);
-        this->decodeForm(chunk.lowercaseCodepoints, ei.value, forPrefix, lemma);
-        if (env.getCasePatternHelper().checkCasePattern(chunk.lowercaseCodepoints, chunk.originalCodepoints, ei.orthCasePattern)) {
-            //            pair<string, string> lemmaHomonymId = getLemmaHomonymIdPair(lemma);
-            out.push_back(MorphInterpretation(
-                    startNode, endNode,
-                    orth, lemma,
-                    //                    "",
-                    ei.tag,
-                    ei.nameClassifier,
-                    ei.qualifiers,
-                    env));
-        }
-    }
-
-    bool convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& orth, std::string& lemmaPrefix) const {
-        for (unsigned int i = 0; i < interpretedChunk.prefixChunks.size(); i++) {
-            const InterpretedChunk& prefixChunk = interpretedChunk.prefixChunks[i];
-            orth += env.getCharsetConverter().toString(prefixChunk.originalCodepoints);
-            const unsigned char* ptr = prefixChunk.interpsPtr;
-            std::vector<MorphInterpretation> mi;
-            //            env.getCasePatternHelper().skipCasePattern(ptr);
-            this->decodeMorphInterpretation(0, 0, orth, string(""), prefixChunk, true, ptr, mi);
-            if (!mi.empty()) {
-                lemmaPrefix += mi[0].getLemma();
-            } else {
-                return false;
-            }
-        }
-        return true;
-    }
-};
-
-class InterpretedChunksDecoder4Generator : public InterpretedChunksDecoder {
-public:
-
-    InterpretedChunksDecoder4Generator(const Environment& env) : InterpretedChunksDecoder(env) {
-    }
-
-    void decode(
-            unsigned int startNode,
-            unsigned int endNode,
-            const InterpretedChunk& interpretedChunk,
-            std::vector<MorphInterpretation>& out) const {
-        string orthPrefix;
-        string lemma;
-        convertPrefixes(interpretedChunk, orthPrefix, lemma);
-        lemma += env.getCharsetConverter().toString(interpretedChunk.originalCodepoints);
-        const unsigned char* currPtr = interpretedChunk.interpsPtr;
-        while (currPtr < interpretedChunk.interpsEndPtr) {
-            MorphInterpretation mi = this->decodeMorphInterpretation(startNode, endNode, orthPrefix, lemma, interpretedChunk, currPtr);
-            //                        cerr << mi.toString(false) << endl;
-            //            cerr << "required='" << interpretedChunk.requiredHomonymId << "' morphInterp='" << mi.getHomonymId() << "'" << endl;
-            if (interpretedChunk.requiredHomonymId.empty() || mi.hasHomonym(interpretedChunk.requiredHomonymId)) {
-                out.push_back(mi);
-            }
-        }
-    }
-
-private:
-
-    void convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& orthPrefix, std::string& lemma) const {
-        for (unsigned int i = 0; i < interpretedChunk.prefixChunks.size(); i++) {
-            const InterpretedChunk& prefixChunk = interpretedChunk.prefixChunks[i];
-            lemma += env.getCharsetConverter().toString(prefixChunk.originalCodepoints);
-            const unsigned char* ptr = prefixChunk.interpsPtr;
-            MorphInterpretation mi = this->decodeMorphInterpretation(0, 0, orthPrefix, string(""), prefixChunk, ptr);
-            orthPrefix += mi.getOrth();
-        }
-    }
-
-    MorphInterpretation decodeMorphInterpretation(
-            unsigned int startNode, unsigned int endNode,
-            const string& orthPrefix,
-            const string& lemma,
-            const InterpretedChunk& chunk,
-            const unsigned char*& ptr) const {
-        string orth = orthPrefix;
-        EncodedInterpretation ei = this->deserializeInterp(ptr);
-        this->decodeForm(chunk.originalCodepoints, ei.value, orth);
-        return MorphInterpretation(
-                startNode, endNode,
-                orth, lemma + HOMONYM_SEPARATOR + ei.homonymId,
-                //                ei.homonymId,
-                ei.tag,
-                ei.nameClassifier,
-                ei.qualifiers,
-                env);
-    }
-
-    void decodeForm(
-            const vector<uint32_t>& lemma,
-            const EncodedForm& orth,
-            string& res) const {
-        res += orth.prefixToAdd;
-        for (unsigned int i = 0; i < lemma.size() - orth.suffixToCut; i++) {
-            env.getCharsetConverter().append(lemma[i], res);
-        }
-        const char* suffixPtr = orth.suffixToAdd.c_str();
-        const char* suffixEnd = suffixPtr + orth.suffixToAdd.length();
-        while (suffixPtr != suffixEnd) {
-            uint32_t cp = UTF8CharsetConverter::getInstance().next(suffixPtr, suffixEnd);
-            env.getCharsetConverter().append(cp, res);
-        }
-    }
-
-    EncodedInterpretation deserializeInterp(const unsigned char*& ptr) const {
-        EncodedInterpretation interp;
-        interp.homonymId = readString(ptr);
-        interp.value.prefixToAdd = readString(ptr);
-        interp.value.suffixToCut = readInt8(ptr);
-        interp.value.suffixToAdd = readString(ptr);
-        interp.tag = readInt16(ptr);
-        interp.nameClassifier = readInt8(ptr);
-        interp.qualifiers = readInt16(ptr);
-        return interp;
-    }
-};
-
-#endif	/* INTERPSGROUPDECODER_HPP */
-
@@ -12,7 +12,7 @@
 #include "data/default_fsa.hpp"
 #include "Morfeusz.hpp"
 #include "MorphDeserializer.hpp"
-#include "InterpretedChunksDecoder.hpp"
+#include "decoder/InterpretedChunksDecoder.hpp"
 #include "charset/CharsetConverter.hpp"
 #include "charset/charset_utils.hpp"
 #include "charset/CaseConverter.hpp"
@@ -34,6 +34,51 @@ static MorfeuszOptions createDefaultOptions() {
     return res;
 }
+static void doShiftOrth(InterpretedChunk& from, InterpretedChunk& to) {
+    to.prefixChunks.insert(
+            to.prefixChunks.begin(),
+            from.prefixChunks.begin(),
+            from.prefixChunks.end());
+    to.prefixChunks.push_back(from);
+    to.textStartPtr = from.textStartPtr;
+    from.orthWasShifted = true;
+}
+
+static string debugInterpsGroup(unsigned char type, const char* startPtr, const char* endPtr) {
+    stringstream res;
+    res << "(" << (int) type << ", " << string(startPtr, endPtr) << "), ";
+    return res.str();
+}
+
+static string debugAccum(vector<InterpretedChunk>& accum) {
+    stringstream res;
+    for (unsigned int i = 0; i < accum.size(); i++) {
+        res << debugInterpsGroup(accum[i].segmentType, accum[i].textStartPtr, accum[i].textEndPtr);
+        //        res << "(" << (int) accum[i].interpsGroup.type << ", " << string(accum[i].chunkStartPtr, accum[i].chunkStartPtr) << "), ";
+    }
+    return res.str();
+}
+
+static void feedStateDirectly(
+        StateType& state,
+        const char* inputStart,
+        const char* inputEnd) {
+    const char* currInput = inputStart;
+    while (currInput != inputEnd && !state.isSink()) {
+        state.proceedToNext(*currInput++);
+    }
+}
+
+static void feedState(
+        StateType& state,
+        int codepoint) {
+    std::string chars;
+    UTF8CharsetConverter::getInstance().append(codepoint, chars);
+    for (unsigned int i = 0; i < chars.length() && !state.isSink(); i++) {
+        state.proceedToNext(chars[i]);
+    }
+}
+
 Morfeusz::Morfeusz()
 : analyzerEnv(DEFAULT_MORFEUSZ_CHARSET, ANALYZER, DEFAULT_FSA),
 generatorEnv(DEFAULT_MORFEUSZ_CHARSET, GENERATOR, DEFAULT_SYNTH_FSA),
@@ -97,11 +142,12 @@ void Morfeusz::processOneWord(
     if (!graph.empty()) {
         const InterpretedChunksDecoder& interpretedChunksDecoder = env.getInterpretedChunksDecoder();
         int srcNode = startNodeNum;
-        for (unsigned int i = 0; i < graph.getTheGraph().size(); i++) {
-            const vector<InflexionGraph::Edge>& edges = graph.getTheGraph()[i];
+        const std::vector< std::vector<InflexionGraph::Edge> >& theGraph = graph.getTheGraph();
+        for (unsigned int i = 0; i < theGraph.size(); i++) {
+            const vector<InflexionGraph::Edge>& edges = theGraph[i];
             for (unsigned int j = 0; j < edges.size(); j++) {
                 const InflexionGraph::Edge& e = edges[j];
-                int targetNode = startNodeNum + e.nextNode;
+                unsigned long targetNode = startNodeNum + e.nextNode;
                 interpretedChunksDecoder.decode(srcNode, targetNode, e.chunk, results);
             }
             srcNode++;
@@ -118,56 +164,11 @@ void Morfeusz::processOneWord(
     inputStart = currInput;
 }
-static inline void doShiftOrth(InterpretedChunk& from, InterpretedChunk& to) {
-    to.prefixChunks.insert(
-            to.prefixChunks.begin(),
-            from.prefixChunks.begin(),
-            from.prefixChunks.end());
-    to.prefixChunks.push_back(from);
-    from.orthWasShifted = true;
-    to.textStartPtr = from.textStartPtr;
-}
-
-static inline string debugInterpsGroup(unsigned char type, const char* startPtr, const char* endPtr) {
-    stringstream res;
-    res << "(" << (int) type << ", " << string(startPtr, endPtr) << "), ";
-    return res.str();
-}
-
-static inline string debugAccum(vector<InterpretedChunk>& accum) {
-    stringstream res;
-    for (unsigned int i = 0; i < accum.size(); i++) {
-        res << debugInterpsGroup(accum[i].segmentType, accum[i].textStartPtr, accum[i].textEndPtr);
-        //        res << "(" << (int) accum[i].interpsGroup.type << ", " << string(accum[i].chunkStartPtr, accum[i].chunkStartPtr) << "), ";
-    }
-    return res.str();
-}
-
-static inline void feedStateDirectly(
-        StateType& state,
-        const char* inputStart,
-        const char* inputEnd) {
-    const char* currInput = inputStart;
-    while (currInput != inputEnd && !state.isSink()) {
-        state.proceedToNext(*currInput++);
-    }
-}
-
-static inline void feedState(
-        StateType& state,
-        int codepoint) {
-    std::string chars;
-    UTF8CharsetConverter::getInstance().append(codepoint, chars);
-    for (unsigned int i = 0; i < chars.length() && !state.isSink(); i++) {
-        state.proceedToNext(chars[i]);
-    }
-}
-
 void Morfeusz::doProcessOneWord(
         const Environment& env,
         const char*& inputData,
         const char* inputEnd,
-        SegrulesState segrulesState) const {
+        const SegrulesState& segrulesState) const {
     if (this->options.debug) {
         cerr << "----------" << endl;
         cerr << "PROCESS: '" << inputData << "', already recognized: " << debugAccum(accum) << endl;
@@ -178,11 +179,6 @@ void Morfeusz::doProcessOneWord(
     const char* currInput = inputData;
     uint32_t codepoint = inputData == inputEnd ? 0 : env.getCharsetConverter().next(currInput, inputEnd);
     bool currCodepointIsWhitespace = isWhitespace(codepoint);
-    vector<uint32_t> originalCodepoints;
-    vector<uint32_t> normalizedCodepoints;
-
-    originalCodepoints.reserve(16);
-    normalizedCodepoints.reserve(16);
     StateType state = env.getFSA().getInitialState();
@@ -190,8 +186,6 @@ void Morfeusz::doProcessOneWord(
         uint32_t normalizedCodepoint = env.getProcessorType() == ANALYZER
                 ? env.getCaseConverter().toLower(codepoint)
                 : codepoint;
-        originalCodepoints.push_back(codepoint);
-        normalizedCodepoints.push_back(normalizedCodepoint);
         if (codepoint == normalizedCodepoint && &env.getCharsetConverter() == &UTF8CharsetConverter::getInstance()) {
             feedStateDirectly(state, prevInput, currInput);
         }
@@ -203,48 +197,37 @@ void Morfeusz::doProcessOneWord(
         currCodepointIsWhitespace = isWhitespace(codepoint);
         string homonymId;
         if (env.getProcessorType() == GENERATOR && codepoint == 0x3A && currInput + 1 != inputEnd) {
-            if (originalCodepoints.size() == 1) {
-                throw MorfeuszException("Lemma of length > 1 cannot start with a colon");
-            }
             homonymId = string(currInput + 1, inputEnd);
-            //            cerr << "homonym " << homonymId << endl;
             prevInput = currInput;
             currInput = inputEnd;
             codepoint = 0x00;
             currCodepointIsWhitespace = true;
         }
         if (state.isAccepting()) {
-            vector<InterpsGroup> val(state.getValue());
-            for (unsigned int i = 0; i < val.size(); i++) {
-                InterpsGroup& ig = val[i];
+//            vector<InterpsGroup> val(state.getValue());
+            for (unsigned int i = 0; i < state.getValue().size(); i++) {
+                const InterpsGroup& ig = state.getValue()[i];
                 if (this->options.debug) {
                     cerr << "recognized: " << debugInterpsGroup(ig.type, inputStart, currInput) << " at: '" << inputStart << "'" << endl;
                 }
-                vector<SegrulesState> newSegrulesStates = env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, currCodepointIsWhitespace);
+                const vector<SegrulesState> newSegrulesStates = env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, currCodepointIsWhitespace);
                 if (!newSegrulesStates.empty()
-                        && env.getCasePatternHelper().checkInterpsGroupOrthCasePatterns(normalizedCodepoints, originalCodepoints, ig)) {
-
-                    for (
-                            vector<SegrulesState>::iterator it = newSegrulesStates.begin();
-                            it != newSegrulesStates.end();
-                            ++it) {
-                        SegrulesState newSegrulesState = *it;
+                        && env.getCasePatternHelper().checkInterpsGroupOrthCasePatterns(env, inputStart, currInput, ig)) {
+                    for (unsigned int i = 0; i < newSegrulesStates.size(); i++) {
+                        const SegrulesState& newSegrulesState = newSegrulesStates[i];
                         const unsigned char* interpsPtr = getInterpretationsPtr(env, ig);
                         const unsigned char* interpsEndPtr = ig.ptr + ig.size;
-                        InterpretedChunk ic = {
-                            ig.type,
-                            inputStart,
-                            currInput,
-                            originalCodepoints,
-                            normalizedCodepoints,
-                            ig.ptr,
-                            interpsPtr,
-                            interpsEndPtr,
-                            newSegrulesState.shiftOrthFromPrevious,
-                            false,
-                            vector<InterpretedChunk>(),
-                            homonymId
-                        };
+                        InterpretedChunk ic;
+                        ic.segmentType = ig.type;
+                        ic.textStartPtr = inputStart;
+                        ic.textEndPtr = currInput;
+                        ic.interpsGroupPtr = ig.ptr;
+                        ic.interpsPtr = interpsPtr;
+                        ic.interpsEndPtr = interpsEndPtr;
+                        ic.shiftOrth = newSegrulesState.shiftOrthFromPrevious;
+                        ic.orthWasShifted = false;
+                        ic.requiredHomonymId = homonymId;
+                        
                         if (!accum.empty() && accum.back().shiftOrth) {
                             doShiftOrth(accum.back(), ic);
                         }
@@ -266,7 +249,7 @@ void Morfeusz::doProcessOneWord(
                     }
                 }
                 else if (this->options.debug) {
-                    cerr << !newSegrulesStates.empty() << env.getCasePatternHelper().checkInterpsGroupOrthCasePatterns(normalizedCodepoints, originalCodepoints, ig) << endl;
+//                    cerr << !newSegrulesStates.empty() << env.getCasePatternHelper().checkInterpsGroupOrthCasePatterns(normalizedCodepoints, originalCodepoints, ig) << endl;
                     cerr << "NOT ACCEPTING " << debugAccum(accum) << debugInterpsGroup(ig.type, inputStart, currInput) << endl;
                 }
             }
@@ -170,7 +170,7 @@ private:
             const Environment& env,
             const char*& inputData,
             const char* inputEnd,
-            SegrulesState segrulesState) const;
+            const SegrulesState& segrulesState) const;
     void handleIgnChunk(
         const Environment& env,
+/* 
+ * File:   InterpsGroupDecoder.hpp
+ * Author: mlenart
+ *
+ * Created on November 22, 2013, 10:35 PM
+ */
+
+#ifndef INTERPSGROUPDECODER_HPP
+#define	INTERPSGROUPDECODER_HPP
+
+#include <string>
+#include <vector>
+#include <utility>
+
+#include "charset/CharsetConverter.hpp"
+#include "EncodedInterpretation.hpp"
+#include "InterpretedChunk.hpp"
+#include "EncodedInterpretation.hpp"
+#include "charset/CaseConverter.hpp"
+#include "Environment.hpp"
+#include "MorphInterpretation.hpp"
+#include "CasePatternHelper.hpp"
+#include "deserializationUtils.hpp"
+#include "compressionByteUtils.hpp"
+#include "const.hpp"
+
+class InterpretedChunksDecoder {
+public:
+
+    InterpretedChunksDecoder(const Environment& env): env(env) {
+    }
+
+    virtual ~InterpretedChunksDecoder() {
+    }
+
+    virtual void decode(
+            unsigned int startNode,
+            unsigned int endNode,
+            const InterpretedChunk& interpretedChunk,
+            std::vector<MorphInterpretation>& out) const = 0;
+
+protected:
+
+    const Environment& env;
+};
+
+#endif	/* INTERPSGROUPDECODER_HPP */
+
+/* 
+ * File:   InterpretedChunksDecoder4Analyzer.cpp
+ * Author: mlenart
+ * 
+ * Created on 15 maj 2014, 15:28
+ */
+
+#include "InterpretedChunksDecoder4Analyzer.hpp"
+#include <string>
+
+using namespace std;
+
+InterpretedChunksDecoder4Analyzer::InterpretedChunksDecoder4Analyzer(const Environment& env) : InterpretedChunksDecoder(env) {
+}
+
+void InterpretedChunksDecoder4Analyzer::decode(
+        unsigned int startNode,
+        unsigned int endNode,
+        const InterpretedChunk& interpretedChunk,
+        std::vector<MorphInterpretation>& out) const {
+    string orth;
+    string lemmaPrefix;
+    if (convertPrefixes(interpretedChunk, orth, lemmaPrefix)) {
+        //            orth += this->env.getCharsetConverter().toString(interpretedChunk.originalCodepoints);
+        orth.insert(orth.end(), interpretedChunk.textStartPtr, interpretedChunk.textEndPtr);
+        const unsigned char* currPtr = interpretedChunk.interpsPtr;
+        while (currPtr < interpretedChunk.interpsEndPtr) {
+            this->decodeMorphInterpretation(startNode, endNode, orth, lemmaPrefix, interpretedChunk, false, currPtr, out);
+        }
+    }
+}
+
+void InterpretedChunksDecoder4Analyzer::decodeLemma(
+        const vector<uint32_t>& orth,
+        const EncodedForm& lemma,
+        bool forPrefix,
+        string& res) const {
+    for (unsigned int i = lemma.prefixToCut; i < orth.size() - lemma.suffixToCut; i++) {
+        uint32_t cp =
+                (i < lemma.casePattern.size() && lemma.casePattern[i])
+                ? env.getCaseConverter().toTitle(orth[i])
+                : orth[i];
+        env.getCharsetConverter().append(cp, res);
+    }
+    if (!forPrefix) {
+        const char* suffixPtr = lemma.suffixToAdd.c_str();
+        const char* suffixEnd = suffixPtr + lemma.suffixToAdd.length();
+        while (suffixPtr != suffixEnd) {
+            uint32_t cp = UTF8CharsetConverter::getInstance().next(suffixPtr, suffixEnd);
+            env.getCharsetConverter().append(cp, res);
+        }
+    }
+}
+
+void InterpretedChunksDecoder4Analyzer::deserializeEncodedForm(const unsigned char*& ptr, unsigned char compressionByte, EncodedForm& encodedForm) const {
+    encodedForm.prefixToCut = hasCompressedPrefixCut(compressionByte)
+            ? getPrefixCutLength(compressionByte)
+            : readInt8(ptr);
+    encodedForm.suffixToCut = readInt8(ptr);
+    encodedForm.suffixToAdd = readString(ptr);
+    assert(encodedForm.casePattern.size() == 0);
+    if (isLemmaOnlyLower(compressionByte)) {
+//        encodedForm.casePattern = std::vector<bool>();
+    }
+    else if (isLemmaOnlyTitle(compressionByte)) {
+//        encodedForm.casePattern = std::vector<bool>();
+        encodedForm.casePattern.push_back(true);
+    }
+    else {
+        encodedForm.casePattern = env.getCasePatternHelper().deserializeOneCasePattern(ptr);
+    }
+}
+
+EncodedInterpretation InterpretedChunksDecoder4Analyzer::deserializeEncodedInterp(const unsigned char*& ptr, unsigned char compressionByte) const {
+    EncodedInterpretation interp;
+    if (isOrthOnlyLower(compressionByte)) {
+    }
+    else if (isOrthOnlyTitle(compressionByte)) {
+        interp.orthCasePattern.push_back(true);
+    }
+    else {
+        interp.orthCasePattern = this->env.getCasePatternHelper().deserializeOneCasePattern(ptr);
+    }
+    deserializeEncodedForm(ptr, compressionByte, interp.value);
+    interp.tag = readInt16(ptr);
+    interp.nameClassifier = *ptr++;
+    interp.qualifiers = readInt16(ptr);
+    return interp;
+}
+
+void InterpretedChunksDecoder4Analyzer::decodeMorphInterpretation(
+        unsigned int startNode, unsigned int endNode,
+        const string& orth,
+        const string& lemmaPrefix,
+        const InterpretedChunk& chunk,
+        bool forPrefix,
+        const unsigned char*& ptr,
+        std::vector<MorphInterpretation>& out) const {
+    string lemma(lemmaPrefix);
+    orthCodepoints.clear();
+    normalizedCodepoints.clear();
+    const char* currPtr = chunk.textStartPtr;
+    while (currPtr != chunk.textEndPtr) {
+        uint32_t cp = env.getCharsetConverter().next(currPtr, chunk.textEndPtr);
+        orthCodepoints.push_back(cp);
+        normalizedCodepoints.push_back(env.getCaseConverter().toLower(cp));
+    }
+    EncodedInterpretation ei = this->deserializeEncodedInterp(ptr, *chunk.interpsGroupPtr);
+    if (env.getCasePatternHelper().checkCasePattern(normalizedCodepoints, orthCodepoints, ei.orthCasePattern)) {
+        this->decodeLemma(normalizedCodepoints, ei.value, forPrefix, lemma);
+        //            pair<string, string> lemmaHomonymId = getLemmaHomonymIdPair(lemma);
+        out.push_back(MorphInterpretation(
+                startNode, endNode,
+                orth, lemma,
+                //                    "",
+                ei.tag,
+                ei.nameClassifier,
+                ei.qualifiers,
+                env));
+    }
+}
+
+bool InterpretedChunksDecoder4Analyzer::convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& orth, std::string& lemmaPrefix) const {
+    for (unsigned int i = 0; i < interpretedChunk.prefixChunks.size(); i++) {
+        const InterpretedChunk& prefixChunk = interpretedChunk.prefixChunks[i];
+        orth.insert(orth.end(), prefixChunk.textStartPtr, prefixChunk.textEndPtr);
+        const unsigned char* ptr = prefixChunk.interpsPtr;
+        std::vector<MorphInterpretation> mi;
+        this->decodeMorphInterpretation(0, 0, orth, string(""), prefixChunk, true, ptr, mi);
+        if (!mi.empty()) {
+            lemmaPrefix += mi[0].getLemma();
+        }
+        else {
+            return false;
+        }
+    }
+    return true;
+}
+/* 
+ * File:   InterpretedChunksDecoder4Analyzer.hpp
+ * Author: mlenart
+ *
+ * Created on 15 maj 2014, 15:28
+ */
+
+#ifndef INTERPRETEDCHUNKSDECODER4ANALYZER_HPP
+#define	INTERPRETEDCHUNKSDECODER4ANALYZER_HPP
+
+#include "InterpretedChunksDecoder.hpp"
+
+class InterpretedChunksDecoder4Analyzer : public InterpretedChunksDecoder {
+public:
+
+    InterpretedChunksDecoder4Analyzer(const Environment& env);
+
+    void decode(
+            unsigned int startNode,
+            unsigned int endNode,
+            const InterpretedChunk& interpretedChunk,
+            std::vector<MorphInterpretation>& out) const;
+
+private:
+
+    void decodeLemma(
+            const vector<uint32_t>& orth,
+            const EncodedForm& lemma,
+            bool forPrefix,
+            string& res) const;
+
+    void deserializeEncodedForm(const unsigned char*& ptr, unsigned char compressionByte, EncodedForm& encodedForm) const;
+
+    EncodedInterpretation deserializeEncodedInterp(const unsigned char*& ptr, unsigned char compressionByte) const;
+
+    void decodeMorphInterpretation(
+            unsigned int startNode, unsigned int endNode,
+            const string& orth,
+            const string& lemmaPrefix,
+            const InterpretedChunk& chunk,
+            bool forPrefix,
+            const unsigned char*& ptr,
+            std::vector<MorphInterpretation>& out) const;
+
+    bool convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& orth, std::string& lemmaPrefix) const;
+    
+    mutable std::vector<uint32_t> orthCodepoints;
+    mutable std::vector<uint32_t> normalizedCodepoints;
+};
+
+#endif	/* INTERPRETEDCHUNKSDECODER4ANALYZER_HPP */
+
+/* 
+ * File:   InterpretedChunksDecoder4Generator.cpp
+ * Author: mlenart
+ * 
+ * Created on 15 maj 2014, 15:28
+ */
+
+#include "InterpretedChunksDecoder4Generator.hpp"
+#include <string>
+#include <vector>
+
+using namespace std;
+
+InterpretedChunksDecoder4Generator::InterpretedChunksDecoder4Generator(const Environment& env) : InterpretedChunksDecoder(env) {
+}
+
+void InterpretedChunksDecoder4Generator::decode(
+        unsigned int startNode,
+        unsigned int endNode,
+        const InterpretedChunk& interpretedChunk,
+        std::vector<MorphInterpretation>& out) const {
+    string orthPrefix;
+    string lemma;
+    convertPrefixes(interpretedChunk, orthPrefix, lemma);
+    //        lemma += env.getCharsetConverter().toString(interpretedChunk.originalCodepoints);
+    lemma.insert(lemma.end(), interpretedChunk.textStartPtr, interpretedChunk.textEndPtr);
+    const unsigned char* currPtr = interpretedChunk.interpsPtr;
+    while (currPtr < interpretedChunk.interpsEndPtr) {
+        MorphInterpretation mi = this->decodeMorphInterpretation(startNode, endNode, orthPrefix, lemma, interpretedChunk, currPtr);
+        //                        cerr << mi.toString(false) << endl;
+        //            cerr << "required='" << interpretedChunk.requiredHomonymId << "' morphInterp='" << mi.getHomonymId() << "'" << endl;
+        if (interpretedChunk.requiredHomonymId.empty() || mi.hasHomonym(interpretedChunk.requiredHomonymId)) {
+            out.push_back(mi);
+        }
+    }
+}
+
+void InterpretedChunksDecoder4Generator::convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& orthPrefix, std::string& lemma) const {
+    for (unsigned int i = 0; i < interpretedChunk.prefixChunks.size(); i++) {
+        const InterpretedChunk& prefixChunk = interpretedChunk.prefixChunks[i];
+        lemma.insert(lemma.end(), prefixChunk.textStartPtr, prefixChunk.textEndPtr);
+        const unsigned char* ptr = prefixChunk.interpsPtr;
+        MorphInterpretation mi = this->decodeMorphInterpretation(0, 0, orthPrefix, string(""), prefixChunk, ptr);
+        orthPrefix += mi.getOrth();
+    }
+}
+
+MorphInterpretation InterpretedChunksDecoder4Generator::decodeMorphInterpretation(
+        unsigned int startNode, unsigned int endNode,
+        const string& orthPrefix,
+        const string& lemma,
+        const InterpretedChunk& chunk,
+        const unsigned char*& ptr) const {
+    string orth = orthPrefix;
+    EncodedInterpretation ei = this->deserializeInterp(ptr);
+    codepoints.clear();
+    const char* currPtr = chunk.textStartPtr;
+    while (currPtr != chunk.textEndPtr) {
+        uint32_t cp = env.getCharsetConverter().next(currPtr, chunk.textEndPtr);
+        codepoints.push_back(cp);
+    }
+    this->decodeForm(codepoints, ei.value, orth);
+    return MorphInterpretation(
+            startNode, endNode,
+            orth, ei.homonymId.empty() ? lemma : (lemma + HOMONYM_SEPARATOR + ei.homonymId),
+            //                ei.homonymId,
+            ei.tag,
+            ei.nameClassifier,
+            ei.qualifiers,
+            env);
+}
+
+void InterpretedChunksDecoder4Generator::decodeForm(
+        const vector<uint32_t>& lemma,
+        const EncodedForm& orth,
+        string& res) const {
+    res += orth.prefixToAdd;
+    for (unsigned int i = 0; i < lemma.size() - orth.suffixToCut; i++) {
+        env.getCharsetConverter().append(lemma[i], res);
+    }
+    const char* suffixPtr = orth.suffixToAdd.c_str();
+    const char* suffixEnd = suffixPtr + orth.suffixToAdd.length();
+    while (suffixPtr != suffixEnd) {
+        uint32_t cp = UTF8CharsetConverter::getInstance().next(suffixPtr, suffixEnd);
+        env.getCharsetConverter().append(cp, res);
+    }
+}
+
+EncodedInterpretation InterpretedChunksDecoder4Generator::deserializeInterp(const unsigned char*& ptr) const {
+    EncodedInterpretation interp;
+    interp.homonymId = readString(ptr);
+    interp.value.prefixToAdd = readString(ptr);
+    interp.value.suffixToCut = readInt8(ptr);
+    interp.value.suffixToAdd = readString(ptr);
+    interp.tag = readInt16(ptr);
+    interp.nameClassifier = readInt8(ptr);
+    interp.qualifiers = readInt16(ptr);
+    return interp;
+}
+/* 
+ * File:   InterpretedChunksDecoder4Generator.hpp
+ * Author: mlenart
+ *
+ * Created on 15 maj 2014, 15:28
+ */
+
+#ifndef INTERPRETEDCHUNKSDECODER4GENERATOR_HPP
+#define	INTERPRETEDCHUNKSDECODER4GENERATOR_HPP
+
+#include "InterpretedChunksDecoder.hpp"
+
+class InterpretedChunksDecoder4Generator : public InterpretedChunksDecoder {
+public:
+
+    InterpretedChunksDecoder4Generator(const Environment& env);
+
+    void decode(
+            unsigned int startNode,
+            unsigned int endNode,
+            const InterpretedChunk& interpretedChunk,
+            std::vector<MorphInterpretation>& out) const;
+
+private:
+
+    void convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& orthPrefix, std::string& lemma) const;
+
+    MorphInterpretation decodeMorphInterpretation(
+            unsigned int startNode, unsigned int endNode,
+            const string& orthPrefix,
+            const string& lemma,
+            const InterpretedChunk& chunk,
+            const unsigned char*& ptr) const;
+
+    void decodeForm(
+            const vector<uint32_t>& lemma,
+            const EncodedForm& orth,
+            string& res) const;
+
+    EncodedInterpretation deserializeInterp(const unsigned char*& ptr) const;
+    
+    mutable std::vector<uint32_t> codepoints;
+};
+
+
+#endif	/* INTERPRETEDCHUNKSDECODER4GENERATOR_HPP */
+
@@ -167,7 +167,7 @@ public:
      * Makes sense only for accepting states.
      * For non-accepting states is throws an exception.
      */
-    T getValue() const;
+    const T& getValue() const;
     unsigned char getLastTransitionValue() const;
@@ -46,7 +46,7 @@ unsigned long State&lt;T&gt;::getOffset() const {
 }
 template <class T>
-T State<T>::getValue() const {
+const T& State<T>::getValue() const {
     assert(this->isAccepting());
     return this->value;
 }
@@ -43,11 +43,20 @@ int main(int argc, const char** argv) {
             else if (prevStart != -1) {
                 printf("; ");
             }
-            printf("%s", mi.toString(true).c_str());
-//            printf("%d,%d,%s,%s,%s,%s",
-//                    mi.getStartNode(), mi.getEndNode(),
-//                    mi.getOrth().c_str(), lemmaToShow.c_str(),
-//                    mi.getTag().c_str(), lemmaToShow.c_str());
+//            printf("%s", mi.toString(true).c_str());
+            printf("%d,%d,%s,%s,%s",
+                    mi.getStartNode(), mi.getEndNode(),
+                    mi.getOrth().c_str(), mi.getLemma().c_str(),
+                    mi.getTag().c_str());
+            if (!mi.getName().empty()) {
+                printf(",%s", mi.getName().c_str());
+            }
+            if (!mi.getQualifiers().empty()) {
+                printf(",%s", mi.getQualifiers()[0].c_str());
+                for (unsigned int i = 1; i < mi.getQualifiers().size(); i++) {
+                    printf("|%s", mi.getQualifiers()[i].c_str());
+                }
+            }
             prevStart = mi.getStartNode();
             prevEnd = mi.getEndNode();
         }
@@ -34,12 +34,12 @@ public:
     std::vector<SegrulesState> proceedToNext(
             const unsigned char segnum,
-            const SegrulesState state,
+            const SegrulesState& state,
             bool atEndOfWord) const {
         std::vector<SegrulesState> res;
         const unsigned char* currPtr = ptr + state.offset + 1;
         const unsigned char transitionsNum = *currPtr++;
-        for (unsigned int i = 0; i < transitionsNum; i++) {
+        for (int i = 0; i < transitionsNum; i++) {
             if (*currPtr == segnum) {
                 SegrulesState newState = this->transition2State(currPtr);
                 if ((atEndOfWord && newState.accepting) 
@@ -130,6 +130,8 @@
         </ccTool>
       </item>
       <item path="build/morfeusz/java/swigJAVA.cpp" ex="false" tool="1" flavor2="4">
+        <ccTool flags="1">
+        </ccTool>
       </item>
       <item path="build/morfeusz/morfeuszJAVA_wrap.cxx"
             ex="false"
@@ -239,6 +241,7 @@
             <pElem>build/morfeusz</pElem>
           </incDir>
           <preprocessorList>
+            <Elem>NDEBUG</Elem>
             <Elem>libmorfeusz_EXPORTS</Elem>
           </preprocessorList>
         </ccTool>
@@ -283,7 +286,7 @@
         <ccTool>
           <incDir>
             <pElem>morfeusz</pElem>
-            <pElem>/usr/lib/jvm/java-6-openjdk/include</pElem>
+            <pElem>/usr/lib/jvm/default-java/include</pElem>
           </incDir>
           <preprocessorList>
             <Elem>NDEBUG</Elem>
@@ -310,6 +313,19 @@
           </undefinedList>
         </ccTool>
       </folder>
+      <item path="morfeusz/CasePatternHelper.cpp" ex="false" tool="1" flavor2="4">
+        <ccTool flags="1">
+          <incDir>
+            <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
+            <pElem>build/morfeusz</pElem>
+          </incDir>
+          <preprocessorList>
+            <Elem>NDEBUG</Elem>
+            <Elem>libmorfeusz_EXPORTS</Elem>
+          </preprocessorList>
+        </ccTool>
+      </item>
       <item path="morfeusz/Environment.cpp" ex="false" tool="1" flavor2="4">
         <ccTool flags="1">
           <incDir>
@@ -387,40 +403,75 @@
         </ccTool>
       </item>
       <item path="morfeusz/Tagset.cpp" ex="false" tool="1" flavor2="4">
-        <ccTool flags="1">
+        <ccTool flags="2">
           <incDir>
             <pElem>build</pElem>
             <pElem>morfeusz</pElem>
             <pElem>build/morfeusz</pElem>
           </incDir>
           <preprocessorList>
-            <Elem>NDEBUG</Elem>
             <Elem>libmorfeusz_EXPORTS</Elem>
           </preprocessorList>
         </ccTool>
       </item>
       <item path="morfeusz/charset/CaseConverter.cpp" ex="false" tool="1" flavor2="4">
+        <ccTool flags="2">
+        </ccTool>
       </item>
       <item path="morfeusz/charset/CharsetConverter.cpp"
             ex="false"
             tool="1"
             flavor2="4">
-        <ccTool flags="1">
-          <preprocessorList>
-            <Elem>NDEBUG</Elem>
-          </preprocessorList>
+        <ccTool flags="2">
         </ccTool>
       </item>
       <item path="morfeusz/charset/caseconv.cpp" ex="false" tool="1" flavor2="4">
+        <ccTool flags="2">
+        </ccTool>
       </item>
       <item path="morfeusz/charset/conversion_tables.cpp"
             ex="false"
             tool="1"
             flavor2="4">
+        <ccTool flags="2">
+        </ccTool>
       </item>
       <item path="morfeusz/cli/cli.cpp" ex="false" tool="1" flavor2="4">
+        <ccTool flags="1">
+        </ccTool>
       </item>
       <item path="morfeusz/const.cpp" ex="false" tool="1" flavor2="4">
+        <ccTool flags="2">
+          <incDir>
+            <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
+            <pElem>build/morfeusz</pElem>
+          </incDir>
+          <preprocessorList>
+            <Elem>libmorfeusz_EXPORTS</Elem>
+          </preprocessorList>
+        </ccTool>
+      </item>
+      <item path="morfeusz/decoder/InterpretedChunksDecoder4Analyzer.cpp"
+            ex="false"
+            tool="1"
+            flavor2="4">
+        <ccTool flags="1">
+          <incDir>
+            <pElem>build</pElem>
+            <pElem>morfeusz</pElem>
+            <pElem>build/morfeusz</pElem>
+          </incDir>
+          <preprocessorList>
+            <Elem>NDEBUG</Elem>
+            <Elem>libmorfeusz_EXPORTS</Elem>
+          </preprocessorList>
+        </ccTool>
+      </item>
+      <item path="morfeusz/decoder/InterpretedChunksDecoder4Generator.cpp"
+            ex="false"
+            tool="1"
+            flavor2="4">
         <ccTool flags="1">
           <incDir>
             <pElem>build</pElem>
@@ -509,6 +560,8 @@
         </ccTool>
       </item>
       <item path="morfeusz/segrules/segrules.cpp" ex="false" tool="1" flavor2="4">
+        <ccTool flags="1">
+        </ccTool>
       </item>
       <item path="morfeusz/test_recognize_dict.cpp" ex="false" tool="1" flavor2="4">
         <ccTool flags="0">
+#!/bin/bash
+
+rm -rf profbuild
+mkdir -p profbuild
+cd profbuild
+cmake -D INPUT_DICTIONARIES=../input/dodatki.tab,../input/PoliMorfSmall.tab -D CMAKE_BUILD_TYPE=Debug -D CMAKE_SHARED_LINKER_FLAGS="-lprofiler" -D CMAKE_EXE_LINKER_FLAGS="-lprofiler" ..
+make -j4
+rm -f /tmp/morfeusz.prof
+export LD_PRELOAD="/usr/lib/libprofiler.so"
+export CPUPROFILE="/tmp/morfeusz.prof"
+morfeusz/morfeusz_analyzer -i /tmp/dupadupa < /mnt/storage/morfeusz/sents10k > /dev/null
+### pprof --gv profbuild/morfeusz/morfeusz_analyzer /tmp/morfeusz.prof