Commit 96182ff07bfb4683c51c56fc23b351f34b71dc5b

Authored by Michał Lenart
1 parent 7508ece1

- drobne poprawki wydajnościowe

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/trunk@276 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
morfeusz/MorfeuszImpl.cpp
@@ -342,9 +342,9 @@ namespace morfeusz { @@ -342,9 +342,9 @@ namespace morfeusz {
342 } 342 }
343 bool caseMatches = env.getCasePatternHelper().checkInterpsGroupOrthCasePatterns(env, reader.getWordStartPtr(), reader.getCurrPtr(), ig); 343 bool caseMatches = env.getCasePatternHelper().checkInterpsGroupOrthCasePatterns(env, reader.getWordStartPtr(), reader.getCurrPtr(), ig);
344 if (caseMatches || options.caseHandling == CONDITIONALLY_CASE_SENSITIVE) { 344 if (caseMatches || options.caseHandling == CONDITIONALLY_CASE_SENSITIVE) {
345 -  
346 - SegrulesState newSegrulesState = env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, isAtWhitespace);  
347 - if (!newSegrulesState.sink) { 345 + SegrulesState newSegrulesState = SegrulesState::FAILED_STATE;
  346 + env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, isAtWhitespace, newSegrulesState);
  347 + if (!newSegrulesState.failed) {
348 InterpretedChunk ic( 348 InterpretedChunk ic(
349 createChunk(ig, reader, newSegrulesState.shiftOrthFromPrevious, homonymId)); 349 createChunk(ig, reader, newSegrulesState.shiftOrthFromPrevious, homonymId));
350 350
@@ -356,27 +356,11 @@ namespace morfeusz { @@ -356,27 +356,11 @@ namespace morfeusz {
356 newSegrulesState, 356 newSegrulesState,
357 ic); 357 ic);
358 } 358 }
359 -// if (!newSegrulesStates.empty()) {  
360 -// for (unsigned int i = 0; i < newSegrulesStates.size(); i++) {  
361 -// const SegrulesState& newSegrulesState = newSegrulesStates[i];  
362 -//  
363 -// InterpretedChunk ic(  
364 -// createChunk(ig, reader, newSegrulesState.shiftOrthFromPrevious, homonymId));  
365 -//  
366 -// processInterpretedChunk(  
367 -// env,  
368 -// reader,  
369 -// isAtWhitespace,  
370 -// caseMatches,  
371 -// newSegrulesState,  
372 -// ic);  
373 -// }  
374 -// newSegrulesStates.resize(0);  
375 -// }  
376 else if (this->options.debug) { 359 else if (this->options.debug) {
377 std::cerr << "NOT ACCEPTING (segmentation)" << debugAccum(accum) << debugInterpsGroup(ig.type, reader.getWordStartPtr(), reader.getCurrPtr()) << std::endl; 360 std::cerr << "NOT ACCEPTING (segmentation)" << debugAccum(accum) << debugInterpsGroup(ig.type, reader.getWordStartPtr(), reader.getCurrPtr()) << std::endl;
378 } 361 }
379 - } else if (this->options.debug) { 362 + }
  363 + else if (this->options.debug) {
380 std::cerr << "NOT ACCEPTING (case)" << debugAccum(accum) << debugInterpsGroup(ig.type, reader.getWordStartPtr(), reader.getCurrPtr()) << std::endl; 364 std::cerr << "NOT ACCEPTING (case)" << debugAccum(accum) << debugInterpsGroup(ig.type, reader.getWordStartPtr(), reader.getCurrPtr()) << std::endl;
381 } 365 }
382 } 366 }
morfeusz/segrules/SegrulesFSA.cpp
@@ -8,41 +8,44 @@ using namespace std; @@ -8,41 +8,44 @@ using namespace std;
8 8
9 namespace morfeusz { 9 namespace morfeusz {
10 10
11 - SegrulesState SegrulesState::SINK_STATE = { 11 + SegrulesState SegrulesState::FAILED_STATE = {
12 0, // offset 12 0, // offset
13 false, // accepting 13 false, // accepting
14 false, // weak 14 false, // weak
15 false, // shift orth 15 false, // shift orth
16 - true // sink 16 + true, // sink
  17 + true, // failed
17 }; 18 };
18 19
19 - SegrulesState SegrulesFSA::proceedToNext( 20 + void SegrulesFSA::proceedToNext(
20 const unsigned char segnum, 21 const unsigned char segnum,
21 const SegrulesState& state, 22 const SegrulesState& state,
22 - bool atEndOfWord) const { 23 + bool atEndOfWord,
  24 + SegrulesState& resState) const {
  25 + assert(!state.failed);
23 if (state.offset == 0) { 26 if (state.offset == 0) {
24 - return doProceedFromInitialState(segnum, atEndOfWord); 27 + doProceedFromInitialState(segnum, atEndOfWord, resState);
25 } else { 28 } else {
26 - return doProceedFromNonInitialState(segnum, state, atEndOfWord); 29 + doProceedFromNonInitialState(segnum, state, atEndOfWord, resState);
27 } 30 }
28 } 31 }
29 32
30 - SegrulesState SegrulesFSA::doProceedFromInitialState( 33 + void SegrulesFSA::doProceedFromInitialState(
31 const unsigned char segnum, 34 const unsigned char segnum,
32 - bool atEndOfWord) const { 35 + bool atEndOfWord,
  36 + SegrulesState& resState) const {
33 const SegrulesState& newState = initialTransitions[segnum]; 37 const SegrulesState& newState = initialTransitions[segnum];
34 if ((atEndOfWord && newState.accepting) 38 if ((atEndOfWord && newState.accepting)
35 || (!atEndOfWord && !newState.sink)) { 39 || (!atEndOfWord && !newState.sink)) {
36 - return newState;  
37 - } else {  
38 - return SegrulesState::SINK_STATE; 40 + resState = newState;
39 } 41 }
40 } 42 }
41 43
42 - SegrulesState SegrulesFSA::doProceedFromNonInitialState( 44 + void SegrulesFSA::doProceedFromNonInitialState(
43 const unsigned char segnum, 45 const unsigned char segnum,
44 const SegrulesState& state, 46 const SegrulesState& state,
45 - bool atEndOfWord) const { 47 + bool atEndOfWord,
  48 + SegrulesState& resState) const {
46 const unsigned char* currPtr = ptr + state.offset + 1; 49 const unsigned char* currPtr = ptr + state.offset + 1;
47 const unsigned char transitionsNum = *currPtr++; 50 const unsigned char transitionsNum = *currPtr++;
48 for (int i = 0; i < transitionsNum; i++) { 51 for (int i = 0; i < transitionsNum; i++) {
@@ -50,14 +53,11 @@ namespace morfeusz { @@ -50,14 +53,11 @@ namespace morfeusz {
50 SegrulesState newState = this->transition2State(currPtr); 53 SegrulesState newState = this->transition2State(currPtr);
51 if ((atEndOfWord && newState.accepting) 54 if ((atEndOfWord && newState.accepting)
52 || (!atEndOfWord && !newState.sink)) { 55 || (!atEndOfWord && !newState.sink)) {
53 - return newState;  
54 - } else {  
55 - return SegrulesState::SINK_STATE; 56 + resState = newState;
56 } 57 }
57 } 58 }
58 currPtr += 4; 59 currPtr += 4;
59 } 60 }
60 - return SegrulesState::SINK_STATE;  
61 } 61 }
62 62
63 SegrulesState SegrulesFSA::transition2State(const unsigned char* transitionPtr) const { 63 SegrulesState SegrulesFSA::transition2State(const unsigned char* transitionPtr) const {
@@ -69,12 +69,13 @@ namespace morfeusz { @@ -69,12 +69,13 @@ namespace morfeusz {
69 res.offset = readInt16(transitionPtr); 69 res.offset = readInt16(transitionPtr);
70 res.accepting = *(ptr + res.offset) & ACCEPTING_FLAG; 70 res.accepting = *(ptr + res.offset) & ACCEPTING_FLAG;
71 res.weak = *(ptr + res.offset) & WEAK_FLAG; 71 res.weak = *(ptr + res.offset) & WEAK_FLAG;
72 - res.sink = !res.accepting && *(ptr + res.offset + 1) == 0; 72 + res.sink = *(ptr + res.offset + 1) == 0;
  73 + res.failed = !res.accepting && res.sink;
73 return res; 74 return res;
74 } 75 }
75 76
76 vector< SegrulesState > SegrulesFSA::createInitialTransitionsVector() { 77 vector< SegrulesState > SegrulesFSA::createInitialTransitionsVector() {
77 - vector< SegrulesState > res(256, SegrulesState()); 78 + vector< SegrulesState > res(256, SegrulesState::FAILED_STATE);
78 const unsigned char* currPtr = ptr + initialState.offset + 1; 79 const unsigned char* currPtr = ptr + initialState.offset + 1;
79 const unsigned char transitionsNum = *currPtr++; 80 const unsigned char transitionsNum = *currPtr++;
80 for (int i = 0; i < transitionsNum; i++) { 81 for (int i = 0; i < transitionsNum; i++) {
morfeusz/segrules/SegrulesFSA.hpp
@@ -20,8 +20,9 @@ struct SegrulesState { @@ -20,8 +20,9 @@ struct SegrulesState {
20 bool weak; 20 bool weak;
21 bool shiftOrthFromPrevious; 21 bool shiftOrthFromPrevious;
22 bool sink; 22 bool sink;
  23 + bool failed;
23 24
24 - static SegrulesState SINK_STATE; 25 + static SegrulesState FAILED_STATE;
25 }; 26 };
26 27
27 inline bool operator<(const SegrulesState& s1, const SegrulesState& s2) { 28 inline bool operator<(const SegrulesState& s1, const SegrulesState& s2) {
@@ -32,15 +33,16 @@ class SegrulesFSA { @@ -32,15 +33,16 @@ class SegrulesFSA {
32 public: 33 public:
33 34
34 SegrulesFSA(const unsigned char* ptr) : initialState(), ptr(ptr), initialTransitions() { 35 SegrulesFSA(const unsigned char* ptr) : initialState(), ptr(ptr), initialTransitions() {
35 - SegrulesState state = {0, false, false, false, false}; 36 + SegrulesState state = {0, false, false, false, false, false};
36 initialState = state; 37 initialState = state;
37 initialTransitions = createInitialTransitionsVector(); 38 initialTransitions = createInitialTransitionsVector();
38 } 39 }
39 40
40 - SegrulesState proceedToNext( 41 + void proceedToNext(
41 const unsigned char segnum, 42 const unsigned char segnum,
42 const SegrulesState& state, 43 const SegrulesState& state,
43 - bool atEndOfWord) const; 44 + bool atEndOfWord,
  45 + SegrulesState& resState) const;
44 46
45 virtual ~SegrulesFSA() { 47 virtual ~SegrulesFSA() {
46 } 48 }
@@ -54,14 +56,16 @@ private: @@ -54,14 +56,16 @@ private:
54 56
55 std::vector< SegrulesState > createInitialTransitionsVector(); 57 std::vector< SegrulesState > createInitialTransitionsVector();
56 58
57 - SegrulesState doProceedFromInitialState( 59 + void doProceedFromInitialState(
58 const unsigned char segnum, 60 const unsigned char segnum,
59 - bool atEndOfWord) const; 61 + bool atEndOfWord,
  62 + SegrulesState& resState) const;
60 63
61 - SegrulesState doProceedFromNonInitialState( 64 + void doProceedFromNonInitialState(
62 const unsigned char segnum, 65 const unsigned char segnum,
63 const SegrulesState& state, 66 const SegrulesState& state,
64 - bool atEndOfWord) const; 67 + bool atEndOfWord,
  68 + SegrulesState& resState) const;
65 }; 69 };
66 70
67 } 71 }
profile.sh
1 #!/bin/bash 1 #!/bin/bash
2 2
3 -rm -rf profbuild  
4 -mkdir -p profbuild  
5 -cd profbuild  
6 -cmake -D INPUT_DICTIONARIES=../input/dodatki.tab,../input/PoliMorfSmall.tab -D CMAKE_BUILD_TYPE=Debug -D CMAKE_CXX_FLAGS="-g -O2" -D CMAKE_SHARED_LINKER_FLAGS="-lprofiler" -D CMAKE_EXE_LINKER_FLAGS="-lprofiler" ..  
7 -make 3 +#~ rm -rf profbuild
  4 +#~ mkdir -p profbuild
  5 +#~ cd profbuild
  6 +#~ cmake -D INPUT_DICTIONARIES=/home/wkieras/input/dodatki.tab,../input/PoliMorfSmall.tab -D CMAKE_BUILD_TYPE=Debug -D CMAKE_CXX_FLAGS="-g -O2" -D CMAKE_SHARED_LINKER_FLAGS="-lprofiler" -D CMAKE_EXE_LINKER_FLAGS="-lprofiler" ..
  7 +#~ make
8 rm -f /tmp/morfeusz.prof 8 rm -f /tmp/morfeusz.prof
9 export LD_PRELOAD="/usr/lib/libprofiler.so" 9 export LD_PRELOAD="/usr/lib/libprofiler.so"
10 export CPUPROFILE="/tmp/morfeusz.prof" 10 export CPUPROFILE="/tmp/morfeusz.prof"
11 -morfeusz/morfeusz_analyzer -i /home/wkieras/output/sgjp_analyzer.fsa < /mnt/storage/morfeusz/sents10k > /dev/null 11 +morfeusz/morfeusz_analyzer --dict sgjp --dict-dir /home/mlenart/opt/morfeusz/buildall/Linux-i386-false/_CPack_Packages/Linux/DEB/morfeusz2-2.0.0_sgjp-Linux-i386/usr/share/morfeusz/dictionaries < /mnt/storage/morfeusz/sents10k > /dev/null
12 ### pprof --gv profbuild/morfeusz/morfeusz_analyzer /tmp/morfeusz.prof 12 ### pprof --gv profbuild/morfeusz/morfeusz_analyzer /tmp/morfeusz.prof