From 96182ff07bfb4683c51c56fc23b351f34b71dc5b Mon Sep 17 00:00:00 2001 From: Michał Lenart <michall@ipipan.waw.pl> Date: Thu, 21 Aug 2014 14:12:50 +0000 Subject: [PATCH] - drobne poprawki wydajnościowe --- morfeusz/MorfeuszImpl.cpp | 26 +++++--------------------- morfeusz/segrules/SegrulesFSA.cpp | 39 ++++++++++++++++++++------------------- morfeusz/segrules/SegrulesFSA.hpp | 20 ++++++++++++-------- profile.sh | 12 ++++++------ 4 files changed, 43 insertions(+), 54 deletions(-) diff --git a/morfeusz/MorfeuszImpl.cpp b/morfeusz/MorfeuszImpl.cpp index f4ba924..9b1811d 100644 --- a/morfeusz/MorfeuszImpl.cpp +++ b/morfeusz/MorfeuszImpl.cpp @@ -342,9 +342,9 @@ namespace morfeusz { } bool caseMatches = env.getCasePatternHelper().checkInterpsGroupOrthCasePatterns(env, reader.getWordStartPtr(), reader.getCurrPtr(), ig); if (caseMatches || options.caseHandling == CONDITIONALLY_CASE_SENSITIVE) { - - SegrulesState newSegrulesState = env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, isAtWhitespace); - if (!newSegrulesState.sink) { + SegrulesState newSegrulesState = SegrulesState::FAILED_STATE; + env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, isAtWhitespace, newSegrulesState); + if (!newSegrulesState.failed) { InterpretedChunk ic( createChunk(ig, reader, newSegrulesState.shiftOrthFromPrevious, homonymId)); @@ -356,27 +356,11 @@ namespace morfeusz { newSegrulesState, ic); } -// if (!newSegrulesStates.empty()) { -// for (unsigned int i = 0; i < newSegrulesStates.size(); i++) { -// const SegrulesState& newSegrulesState = newSegrulesStates[i]; -// -// InterpretedChunk ic( -// createChunk(ig, reader, newSegrulesState.shiftOrthFromPrevious, homonymId)); -// -// processInterpretedChunk( -// env, -// reader, -// isAtWhitespace, -// caseMatches, -// newSegrulesState, -// ic); -// } -// newSegrulesStates.resize(0); -// } else if (this->options.debug) { std::cerr << "NOT ACCEPTING (segmentation)" << debugAccum(accum) << debugInterpsGroup(ig.type, reader.getWordStartPtr(), reader.getCurrPtr()) << std::endl; } - } else if (this->options.debug) { + } + else if (this->options.debug) { std::cerr << "NOT ACCEPTING (case)" << debugAccum(accum) << debugInterpsGroup(ig.type, reader.getWordStartPtr(), reader.getCurrPtr()) << std::endl; } } diff --git a/morfeusz/segrules/SegrulesFSA.cpp b/morfeusz/segrules/SegrulesFSA.cpp index 192effd..9fc5c80 100644 --- a/morfeusz/segrules/SegrulesFSA.cpp +++ b/morfeusz/segrules/SegrulesFSA.cpp @@ -8,41 +8,44 @@ using namespace std; namespace morfeusz { - SegrulesState SegrulesState::SINK_STATE = { + SegrulesState SegrulesState::FAILED_STATE = { 0, // offset false, // accepting false, // weak false, // shift orth - true // sink + true, // sink + true, // failed }; - SegrulesState SegrulesFSA::proceedToNext( + void SegrulesFSA::proceedToNext( const unsigned char segnum, const SegrulesState& state, - bool atEndOfWord) const { + bool atEndOfWord, + SegrulesState& resState) const { + assert(!state.failed); if (state.offset == 0) { - return doProceedFromInitialState(segnum, atEndOfWord); + doProceedFromInitialState(segnum, atEndOfWord, resState); } else { - return doProceedFromNonInitialState(segnum, state, atEndOfWord); + doProceedFromNonInitialState(segnum, state, atEndOfWord, resState); } } - SegrulesState SegrulesFSA::doProceedFromInitialState( + void SegrulesFSA::doProceedFromInitialState( const unsigned char segnum, - bool atEndOfWord) const { + bool atEndOfWord, + SegrulesState& resState) const { const SegrulesState& newState = initialTransitions[segnum]; if ((atEndOfWord && newState.accepting) || (!atEndOfWord && !newState.sink)) { - return newState; - } else { - return SegrulesState::SINK_STATE; + resState = newState; } } - SegrulesState SegrulesFSA::doProceedFromNonInitialState( + void SegrulesFSA::doProceedFromNonInitialState( const unsigned char segnum, const SegrulesState& state, - bool atEndOfWord) const { + bool atEndOfWord, + SegrulesState& resState) const { const unsigned char* currPtr = ptr + state.offset + 1; const unsigned char transitionsNum = *currPtr++; for (int i = 0; i < transitionsNum; i++) { @@ -50,14 +53,11 @@ namespace morfeusz { SegrulesState newState = this->transition2State(currPtr); if ((atEndOfWord && newState.accepting) || (!atEndOfWord && !newState.sink)) { - return newState; - } else { - return SegrulesState::SINK_STATE; + resState = newState; } } currPtr += 4; } - return SegrulesState::SINK_STATE; } SegrulesState SegrulesFSA::transition2State(const unsigned char* transitionPtr) const { @@ -69,12 +69,13 @@ namespace morfeusz { res.offset = readInt16(transitionPtr); res.accepting = *(ptr + res.offset) & ACCEPTING_FLAG; res.weak = *(ptr + res.offset) & WEAK_FLAG; - res.sink = !res.accepting && *(ptr + res.offset + 1) == 0; + res.sink = *(ptr + res.offset + 1) == 0; + res.failed = !res.accepting && res.sink; return res; } vector< SegrulesState > SegrulesFSA::createInitialTransitionsVector() { - vector< SegrulesState > res(256, SegrulesState()); + vector< SegrulesState > res(256, SegrulesState::FAILED_STATE); const unsigned char* currPtr = ptr + initialState.offset + 1; const unsigned char transitionsNum = *currPtr++; for (int i = 0; i < transitionsNum; i++) { diff --git a/morfeusz/segrules/SegrulesFSA.hpp b/morfeusz/segrules/SegrulesFSA.hpp index 1498c95..c026f05 100644 --- a/morfeusz/segrules/SegrulesFSA.hpp +++ b/morfeusz/segrules/SegrulesFSA.hpp @@ -20,8 +20,9 @@ struct SegrulesState { bool weak; bool shiftOrthFromPrevious; bool sink; + bool failed; - static SegrulesState SINK_STATE; + static SegrulesState FAILED_STATE; }; inline bool operator<(const SegrulesState& s1, const SegrulesState& s2) { @@ -32,15 +33,16 @@ class SegrulesFSA { public: SegrulesFSA(const unsigned char* ptr) : initialState(), ptr(ptr), initialTransitions() { - SegrulesState state = {0, false, false, false, false}; + SegrulesState state = {0, false, false, false, false, false}; initialState = state; initialTransitions = createInitialTransitionsVector(); } - SegrulesState proceedToNext( + void proceedToNext( const unsigned char segnum, const SegrulesState& state, - bool atEndOfWord) const; + bool atEndOfWord, + SegrulesState& resState) const; virtual ~SegrulesFSA() { } @@ -54,14 +56,16 @@ private: std::vector< SegrulesState > createInitialTransitionsVector(); - SegrulesState doProceedFromInitialState( + void doProceedFromInitialState( const unsigned char segnum, - bool atEndOfWord) const; + bool atEndOfWord, + SegrulesState& resState) const; - SegrulesState doProceedFromNonInitialState( + void doProceedFromNonInitialState( const unsigned char segnum, const SegrulesState& state, - bool atEndOfWord) const; + bool atEndOfWord, + SegrulesState& resState) const; }; } diff --git a/profile.sh b/profile.sh index c607808..db7daa4 100755 --- a/profile.sh +++ b/profile.sh @@ -1,12 +1,12 @@ #!/bin/bash -rm -rf profbuild -mkdir -p profbuild -cd profbuild -cmake -D INPUT_DICTIONARIES=../input/dodatki.tab,../input/PoliMorfSmall.tab -D CMAKE_BUILD_TYPE=Debug -D CMAKE_CXX_FLAGS="-g -O2" -D CMAKE_SHARED_LINKER_FLAGS="-lprofiler" -D CMAKE_EXE_LINKER_FLAGS="-lprofiler" .. -make +#~ rm -rf profbuild +#~ mkdir -p profbuild +#~ cd profbuild +#~ cmake -D INPUT_DICTIONARIES=/home/wkieras/input/dodatki.tab,../input/PoliMorfSmall.tab -D CMAKE_BUILD_TYPE=Debug -D CMAKE_CXX_FLAGS="-g -O2" -D CMAKE_SHARED_LINKER_FLAGS="-lprofiler" -D CMAKE_EXE_LINKER_FLAGS="-lprofiler" .. +#~ make rm -f /tmp/morfeusz.prof export LD_PRELOAD="/usr/lib/libprofiler.so" export CPUPROFILE="/tmp/morfeusz.prof" -morfeusz/morfeusz_analyzer -i /home/wkieras/output/sgjp_analyzer.fsa < /mnt/storage/morfeusz/sents10k > /dev/null +morfeusz/morfeusz_analyzer --dict sgjp --dict-dir /home/mlenart/opt/morfeusz/buildall/Linux-i386-false/_CPack_Packages/Linux/DEB/morfeusz2-2.0.0_sgjp-Linux-i386/usr/share/morfeusz/dictionaries < /mnt/storage/morfeusz/sents10k > /dev/null ### pprof --gv profbuild/morfeusz/morfeusz_analyzer /tmp/morfeusz.prof -- libgit2 0.22.2