From 96182ff07bfb4683c51c56fc23b351f34b71dc5b Mon Sep 17 00:00:00 2001
From: Michał Lenart <michall@ipipan.waw.pl>
Date: Thu, 21 Aug 2014 14:12:50 +0000
Subject: [PATCH] - drobne poprawki wydajnościowe

---
 morfeusz/MorfeuszImpl.cpp         | 26 +++++---------------------
 morfeusz/segrules/SegrulesFSA.cpp | 39 ++++++++++++++++++++-------------------
 morfeusz/segrules/SegrulesFSA.hpp | 20 ++++++++++++--------
 profile.sh                        | 12 ++++++------
 4 files changed, 43 insertions(+), 54 deletions(-)

diff --git a/morfeusz/MorfeuszImpl.cpp b/morfeusz/MorfeuszImpl.cpp
index f4ba924..9b1811d 100644
--- a/morfeusz/MorfeuszImpl.cpp
+++ b/morfeusz/MorfeuszImpl.cpp
@@ -342,9 +342,9 @@ namespace morfeusz {
         }
         bool caseMatches = env.getCasePatternHelper().checkInterpsGroupOrthCasePatterns(env, reader.getWordStartPtr(), reader.getCurrPtr(), ig);
         if (caseMatches || options.caseHandling == CONDITIONALLY_CASE_SENSITIVE) {
-
-            SegrulesState newSegrulesState = env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, isAtWhitespace);
-            if (!newSegrulesState.sink) {
+            SegrulesState newSegrulesState = SegrulesState::FAILED_STATE;
+            env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, isAtWhitespace, newSegrulesState);
+            if (!newSegrulesState.failed) {
                 InterpretedChunk ic(
                         createChunk(ig, reader, newSegrulesState.shiftOrthFromPrevious, homonymId));
 
@@ -356,27 +356,11 @@ namespace morfeusz {
                         newSegrulesState,
                         ic);
             }
-//            if (!newSegrulesStates.empty()) {
-//                for (unsigned int i = 0; i < newSegrulesStates.size(); i++) {
-//                    const SegrulesState& newSegrulesState = newSegrulesStates[i];
-//
-//                    InterpretedChunk ic(
-//                            createChunk(ig, reader, newSegrulesState.shiftOrthFromPrevious, homonymId));
-//
-//                    processInterpretedChunk(
-//                            env,
-//                            reader,
-//                            isAtWhitespace,
-//                            caseMatches,
-//                            newSegrulesState,
-//                            ic);
-//                }
-//                newSegrulesStates.resize(0);
-//            } 
             else if (this->options.debug) {
                 std::cerr << "NOT ACCEPTING (segmentation)" << debugAccum(accum) << debugInterpsGroup(ig.type, reader.getWordStartPtr(), reader.getCurrPtr()) << std::endl;
             }
-        } else if (this->options.debug) {
+        } 
+        else if (this->options.debug) {
             std::cerr << "NOT ACCEPTING (case)" << debugAccum(accum) << debugInterpsGroup(ig.type, reader.getWordStartPtr(), reader.getCurrPtr()) << std::endl;
         }
     }
diff --git a/morfeusz/segrules/SegrulesFSA.cpp b/morfeusz/segrules/SegrulesFSA.cpp
index 192effd..9fc5c80 100644
--- a/morfeusz/segrules/SegrulesFSA.cpp
+++ b/morfeusz/segrules/SegrulesFSA.cpp
@@ -8,41 +8,44 @@ using namespace std;
 
 namespace morfeusz {
 
-    SegrulesState SegrulesState::SINK_STATE = {
+    SegrulesState SegrulesState::FAILED_STATE = {
         0, // offset
         false, // accepting
         false, // weak
         false, // shift orth
-        true // sink
+        true, // sink
+        true, // failed
     };
 
-    SegrulesState SegrulesFSA::proceedToNext(
+    void SegrulesFSA::proceedToNext(
             const unsigned char segnum,
             const SegrulesState& state,
-            bool atEndOfWord) const {
+            bool atEndOfWord,
+            SegrulesState& resState) const {
+        assert(!state.failed);
         if (state.offset == 0) {
-            return doProceedFromInitialState(segnum, atEndOfWord);
+            doProceedFromInitialState(segnum, atEndOfWord, resState);
         } else {
-            return doProceedFromNonInitialState(segnum, state, atEndOfWord);
+            doProceedFromNonInitialState(segnum, state, atEndOfWord, resState);
         }
     }
 
-    SegrulesState SegrulesFSA::doProceedFromInitialState(
+    void SegrulesFSA::doProceedFromInitialState(
             const unsigned char segnum,
-            bool atEndOfWord) const {
+            bool atEndOfWord,
+            SegrulesState& resState) const {
         const SegrulesState& newState = initialTransitions[segnum];
         if ((atEndOfWord && newState.accepting)
                 || (!atEndOfWord && !newState.sink)) {
-            return newState;
-        } else {
-            return SegrulesState::SINK_STATE;
+            resState = newState;
         }
     }
 
-    SegrulesState SegrulesFSA::doProceedFromNonInitialState(
+    void SegrulesFSA::doProceedFromNonInitialState(
             const unsigned char segnum,
             const SegrulesState& state,
-            bool atEndOfWord) const {
+            bool atEndOfWord,
+            SegrulesState& resState) const {
         const unsigned char* currPtr = ptr + state.offset + 1;
         const unsigned char transitionsNum = *currPtr++;
         for (int i = 0; i < transitionsNum; i++) {
@@ -50,14 +53,11 @@ namespace morfeusz {
                 SegrulesState newState = this->transition2State(currPtr);
                 if ((atEndOfWord && newState.accepting)
                         || (!atEndOfWord && !newState.sink)) {
-                    return newState;
-                } else {
-                    return SegrulesState::SINK_STATE;
+                    resState = newState;
                 }
             }
             currPtr += 4;
         }
-        return SegrulesState::SINK_STATE;
     }
 
     SegrulesState SegrulesFSA::transition2State(const unsigned char* transitionPtr) const {
@@ -69,12 +69,13 @@ namespace morfeusz {
         res.offset = readInt16(transitionPtr);
         res.accepting = *(ptr + res.offset) & ACCEPTING_FLAG;
         res.weak = *(ptr + res.offset) & WEAK_FLAG;
-        res.sink = !res.accepting && *(ptr + res.offset + 1) == 0;
+        res.sink = *(ptr + res.offset + 1) == 0;
+        res.failed = !res.accepting && res.sink;
         return res;
     }
 
     vector< SegrulesState > SegrulesFSA::createInitialTransitionsVector() {
-        vector< SegrulesState > res(256, SegrulesState());
+        vector< SegrulesState > res(256, SegrulesState::FAILED_STATE);
         const unsigned char* currPtr = ptr + initialState.offset + 1;
         const unsigned char transitionsNum = *currPtr++;
         for (int i = 0; i < transitionsNum; i++) {
diff --git a/morfeusz/segrules/SegrulesFSA.hpp b/morfeusz/segrules/SegrulesFSA.hpp
index 1498c95..c026f05 100644
--- a/morfeusz/segrules/SegrulesFSA.hpp
+++ b/morfeusz/segrules/SegrulesFSA.hpp
@@ -20,8 +20,9 @@ struct SegrulesState {
     bool weak;
     bool shiftOrthFromPrevious;
     bool sink;
+    bool failed;
     
-    static SegrulesState SINK_STATE;
+    static SegrulesState FAILED_STATE;
 };
 
 inline bool operator<(const SegrulesState& s1, const SegrulesState& s2) {
@@ -32,15 +33,16 @@ class SegrulesFSA {
 public:
 
     SegrulesFSA(const unsigned char* ptr) : initialState(), ptr(ptr), initialTransitions() {
-        SegrulesState state = {0, false, false, false, false};
+        SegrulesState state = {0, false, false, false, false, false};
         initialState = state;
         initialTransitions = createInitialTransitionsVector();
     }
 
-    SegrulesState proceedToNext(
+    void proceedToNext(
             const unsigned char segnum,
             const SegrulesState& state,
-            bool atEndOfWord) const;
+            bool atEndOfWord,
+            SegrulesState& resState) const;
 
     virtual ~SegrulesFSA() {
     }
@@ -54,14 +56,16 @@ private:
     
     std::vector< SegrulesState > createInitialTransitionsVector();
     
-    SegrulesState doProceedFromInitialState(
+    void doProceedFromInitialState(
             const unsigned char segnum,
-            bool atEndOfWord) const;
+            bool atEndOfWord,
+            SegrulesState& resState) const;
     
-    SegrulesState doProceedFromNonInitialState(
+    void doProceedFromNonInitialState(
             const unsigned char segnum,
             const SegrulesState& state,
-            bool atEndOfWord) const;
+            bool atEndOfWord,
+            SegrulesState& resState) const;
 };
 
 }
diff --git a/profile.sh b/profile.sh
index c607808..db7daa4 100755
--- a/profile.sh
+++ b/profile.sh
@@ -1,12 +1,12 @@
 #!/bin/bash
 
-rm -rf profbuild
-mkdir -p profbuild
-cd profbuild
-cmake -D INPUT_DICTIONARIES=../input/dodatki.tab,../input/PoliMorfSmall.tab -D CMAKE_BUILD_TYPE=Debug -D CMAKE_CXX_FLAGS="-g -O2" -D CMAKE_SHARED_LINKER_FLAGS="-lprofiler" -D CMAKE_EXE_LINKER_FLAGS="-lprofiler" ..
-make
+#~ rm -rf profbuild
+#~ mkdir -p profbuild
+#~ cd profbuild
+#~ cmake -D INPUT_DICTIONARIES=/home/wkieras/input/dodatki.tab,../input/PoliMorfSmall.tab -D CMAKE_BUILD_TYPE=Debug -D CMAKE_CXX_FLAGS="-g -O2" -D CMAKE_SHARED_LINKER_FLAGS="-lprofiler" -D CMAKE_EXE_LINKER_FLAGS="-lprofiler" ..
+#~ make
 rm -f /tmp/morfeusz.prof
 export LD_PRELOAD="/usr/lib/libprofiler.so"
 export CPUPROFILE="/tmp/morfeusz.prof"
-morfeusz/morfeusz_analyzer -i /home/wkieras/output/sgjp_analyzer.fsa < /mnt/storage/morfeusz/sents10k > /dev/null
+morfeusz/morfeusz_analyzer --dict sgjp --dict-dir /home/mlenart/opt/morfeusz/buildall/Linux-i386-false/_CPack_Packages/Linux/DEB/morfeusz2-2.0.0_sgjp-Linux-i386/usr/share/morfeusz/dictionaries < /mnt/storage/morfeusz/sents10k > /dev/null
 ### pprof --gv profbuild/morfeusz/morfeusz_analyzer /tmp/morfeusz.prof
--
libgit2 0.22.2