Commit 96182ff07bfb4683c51c56fc23b351f34b71dc5b
1 parent
7508ece1
- drobne poprawki wydajnościowe
git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/trunk@276 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
4 changed files
with
43 additions
and
54 deletions
morfeusz/MorfeuszImpl.cpp
... | ... | @@ -342,9 +342,9 @@ namespace morfeusz { |
342 | 342 | } |
343 | 343 | bool caseMatches = env.getCasePatternHelper().checkInterpsGroupOrthCasePatterns(env, reader.getWordStartPtr(), reader.getCurrPtr(), ig); |
344 | 344 | if (caseMatches || options.caseHandling == CONDITIONALLY_CASE_SENSITIVE) { |
345 | - | |
346 | - SegrulesState newSegrulesState = env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, isAtWhitespace); | |
347 | - if (!newSegrulesState.sink) { | |
345 | + SegrulesState newSegrulesState = SegrulesState::FAILED_STATE; | |
346 | + env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, isAtWhitespace, newSegrulesState); | |
347 | + if (!newSegrulesState.failed) { | |
348 | 348 | InterpretedChunk ic( |
349 | 349 | createChunk(ig, reader, newSegrulesState.shiftOrthFromPrevious, homonymId)); |
350 | 350 | |
... | ... | @@ -356,27 +356,11 @@ namespace morfeusz { |
356 | 356 | newSegrulesState, |
357 | 357 | ic); |
358 | 358 | } |
359 | -// if (!newSegrulesStates.empty()) { | |
360 | -// for (unsigned int i = 0; i < newSegrulesStates.size(); i++) { | |
361 | -// const SegrulesState& newSegrulesState = newSegrulesStates[i]; | |
362 | -// | |
363 | -// InterpretedChunk ic( | |
364 | -// createChunk(ig, reader, newSegrulesState.shiftOrthFromPrevious, homonymId)); | |
365 | -// | |
366 | -// processInterpretedChunk( | |
367 | -// env, | |
368 | -// reader, | |
369 | -// isAtWhitespace, | |
370 | -// caseMatches, | |
371 | -// newSegrulesState, | |
372 | -// ic); | |
373 | -// } | |
374 | -// newSegrulesStates.resize(0); | |
375 | -// } | |
376 | 359 | else if (this->options.debug) { |
377 | 360 | std::cerr << "NOT ACCEPTING (segmentation)" << debugAccum(accum) << debugInterpsGroup(ig.type, reader.getWordStartPtr(), reader.getCurrPtr()) << std::endl; |
378 | 361 | } |
379 | - } else if (this->options.debug) { | |
362 | + } | |
363 | + else if (this->options.debug) { | |
380 | 364 | std::cerr << "NOT ACCEPTING (case)" << debugAccum(accum) << debugInterpsGroup(ig.type, reader.getWordStartPtr(), reader.getCurrPtr()) << std::endl; |
381 | 365 | } |
382 | 366 | } |
... | ... |
morfeusz/segrules/SegrulesFSA.cpp
... | ... | @@ -8,41 +8,44 @@ using namespace std; |
8 | 8 | |
9 | 9 | namespace morfeusz { |
10 | 10 | |
11 | - SegrulesState SegrulesState::SINK_STATE = { | |
11 | + SegrulesState SegrulesState::FAILED_STATE = { | |
12 | 12 | 0, // offset |
13 | 13 | false, // accepting |
14 | 14 | false, // weak |
15 | 15 | false, // shift orth |
16 | - true // sink | |
16 | + true, // sink | |
17 | + true, // failed | |
17 | 18 | }; |
18 | 19 | |
19 | - SegrulesState SegrulesFSA::proceedToNext( | |
20 | + void SegrulesFSA::proceedToNext( | |
20 | 21 | const unsigned char segnum, |
21 | 22 | const SegrulesState& state, |
22 | - bool atEndOfWord) const { | |
23 | + bool atEndOfWord, | |
24 | + SegrulesState& resState) const { | |
25 | + assert(!state.failed); | |
23 | 26 | if (state.offset == 0) { |
24 | - return doProceedFromInitialState(segnum, atEndOfWord); | |
27 | + doProceedFromInitialState(segnum, atEndOfWord, resState); | |
25 | 28 | } else { |
26 | - return doProceedFromNonInitialState(segnum, state, atEndOfWord); | |
29 | + doProceedFromNonInitialState(segnum, state, atEndOfWord, resState); | |
27 | 30 | } |
28 | 31 | } |
29 | 32 | |
30 | - SegrulesState SegrulesFSA::doProceedFromInitialState( | |
33 | + void SegrulesFSA::doProceedFromInitialState( | |
31 | 34 | const unsigned char segnum, |
32 | - bool atEndOfWord) const { | |
35 | + bool atEndOfWord, | |
36 | + SegrulesState& resState) const { | |
33 | 37 | const SegrulesState& newState = initialTransitions[segnum]; |
34 | 38 | if ((atEndOfWord && newState.accepting) |
35 | 39 | || (!atEndOfWord && !newState.sink)) { |
36 | - return newState; | |
37 | - } else { | |
38 | - return SegrulesState::SINK_STATE; | |
40 | + resState = newState; | |
39 | 41 | } |
40 | 42 | } |
41 | 43 | |
42 | - SegrulesState SegrulesFSA::doProceedFromNonInitialState( | |
44 | + void SegrulesFSA::doProceedFromNonInitialState( | |
43 | 45 | const unsigned char segnum, |
44 | 46 | const SegrulesState& state, |
45 | - bool atEndOfWord) const { | |
47 | + bool atEndOfWord, | |
48 | + SegrulesState& resState) const { | |
46 | 49 | const unsigned char* currPtr = ptr + state.offset + 1; |
47 | 50 | const unsigned char transitionsNum = *currPtr++; |
48 | 51 | for (int i = 0; i < transitionsNum; i++) { |
... | ... | @@ -50,14 +53,11 @@ namespace morfeusz { |
50 | 53 | SegrulesState newState = this->transition2State(currPtr); |
51 | 54 | if ((atEndOfWord && newState.accepting) |
52 | 55 | || (!atEndOfWord && !newState.sink)) { |
53 | - return newState; | |
54 | - } else { | |
55 | - return SegrulesState::SINK_STATE; | |
56 | + resState = newState; | |
56 | 57 | } |
57 | 58 | } |
58 | 59 | currPtr += 4; |
59 | 60 | } |
60 | - return SegrulesState::SINK_STATE; | |
61 | 61 | } |
62 | 62 | |
63 | 63 | SegrulesState SegrulesFSA::transition2State(const unsigned char* transitionPtr) const { |
... | ... | @@ -69,12 +69,13 @@ namespace morfeusz { |
69 | 69 | res.offset = readInt16(transitionPtr); |
70 | 70 | res.accepting = *(ptr + res.offset) & ACCEPTING_FLAG; |
71 | 71 | res.weak = *(ptr + res.offset) & WEAK_FLAG; |
72 | - res.sink = !res.accepting && *(ptr + res.offset + 1) == 0; | |
72 | + res.sink = *(ptr + res.offset + 1) == 0; | |
73 | + res.failed = !res.accepting && res.sink; | |
73 | 74 | return res; |
74 | 75 | } |
75 | 76 | |
76 | 77 | vector< SegrulesState > SegrulesFSA::createInitialTransitionsVector() { |
77 | - vector< SegrulesState > res(256, SegrulesState()); | |
78 | + vector< SegrulesState > res(256, SegrulesState::FAILED_STATE); | |
78 | 79 | const unsigned char* currPtr = ptr + initialState.offset + 1; |
79 | 80 | const unsigned char transitionsNum = *currPtr++; |
80 | 81 | for (int i = 0; i < transitionsNum; i++) { |
... | ... |
morfeusz/segrules/SegrulesFSA.hpp
... | ... | @@ -20,8 +20,9 @@ struct SegrulesState { |
20 | 20 | bool weak; |
21 | 21 | bool shiftOrthFromPrevious; |
22 | 22 | bool sink; |
23 | + bool failed; | |
23 | 24 | |
24 | - static SegrulesState SINK_STATE; | |
25 | + static SegrulesState FAILED_STATE; | |
25 | 26 | }; |
26 | 27 | |
27 | 28 | inline bool operator<(const SegrulesState& s1, const SegrulesState& s2) { |
... | ... | @@ -32,15 +33,16 @@ class SegrulesFSA { |
32 | 33 | public: |
33 | 34 | |
34 | 35 | SegrulesFSA(const unsigned char* ptr) : initialState(), ptr(ptr), initialTransitions() { |
35 | - SegrulesState state = {0, false, false, false, false}; | |
36 | + SegrulesState state = {0, false, false, false, false, false}; | |
36 | 37 | initialState = state; |
37 | 38 | initialTransitions = createInitialTransitionsVector(); |
38 | 39 | } |
39 | 40 | |
40 | - SegrulesState proceedToNext( | |
41 | + void proceedToNext( | |
41 | 42 | const unsigned char segnum, |
42 | 43 | const SegrulesState& state, |
43 | - bool atEndOfWord) const; | |
44 | + bool atEndOfWord, | |
45 | + SegrulesState& resState) const; | |
44 | 46 | |
45 | 47 | virtual ~SegrulesFSA() { |
46 | 48 | } |
... | ... | @@ -54,14 +56,16 @@ private: |
54 | 56 | |
55 | 57 | std::vector< SegrulesState > createInitialTransitionsVector(); |
56 | 58 | |
57 | - SegrulesState doProceedFromInitialState( | |
59 | + void doProceedFromInitialState( | |
58 | 60 | const unsigned char segnum, |
59 | - bool atEndOfWord) const; | |
61 | + bool atEndOfWord, | |
62 | + SegrulesState& resState) const; | |
60 | 63 | |
61 | - SegrulesState doProceedFromNonInitialState( | |
64 | + void doProceedFromNonInitialState( | |
62 | 65 | const unsigned char segnum, |
63 | 66 | const SegrulesState& state, |
64 | - bool atEndOfWord) const; | |
67 | + bool atEndOfWord, | |
68 | + SegrulesState& resState) const; | |
65 | 69 | }; |
66 | 70 | |
67 | 71 | } |
... | ... |
profile.sh
1 | 1 | #!/bin/bash |
2 | 2 | |
3 | -rm -rf profbuild | |
4 | -mkdir -p profbuild | |
5 | -cd profbuild | |
6 | -cmake -D INPUT_DICTIONARIES=../input/dodatki.tab,../input/PoliMorfSmall.tab -D CMAKE_BUILD_TYPE=Debug -D CMAKE_CXX_FLAGS="-g -O2" -D CMAKE_SHARED_LINKER_FLAGS="-lprofiler" -D CMAKE_EXE_LINKER_FLAGS="-lprofiler" .. | |
7 | -make | |
3 | +#~ rm -rf profbuild | |
4 | +#~ mkdir -p profbuild | |
5 | +#~ cd profbuild | |
6 | +#~ cmake -D INPUT_DICTIONARIES=/home/wkieras/input/dodatki.tab,../input/PoliMorfSmall.tab -D CMAKE_BUILD_TYPE=Debug -D CMAKE_CXX_FLAGS="-g -O2" -D CMAKE_SHARED_LINKER_FLAGS="-lprofiler" -D CMAKE_EXE_LINKER_FLAGS="-lprofiler" .. | |
7 | +#~ make | |
8 | 8 | rm -f /tmp/morfeusz.prof |
9 | 9 | export LD_PRELOAD="/usr/lib/libprofiler.so" |
10 | 10 | export CPUPROFILE="/tmp/morfeusz.prof" |
11 | -morfeusz/morfeusz_analyzer -i /home/wkieras/output/sgjp_analyzer.fsa < /mnt/storage/morfeusz/sents10k > /dev/null | |
11 | +morfeusz/morfeusz_analyzer --dict sgjp --dict-dir /home/mlenart/opt/morfeusz/buildall/Linux-i386-false/_CPack_Packages/Linux/DEB/morfeusz2-2.0.0_sgjp-Linux-i386/usr/share/morfeusz/dictionaries < /mnt/storage/morfeusz/sents10k > /dev/null | |
12 | 12 | ### pprof --gv profbuild/morfeusz/morfeusz_analyzer /tmp/morfeusz.prof |
... | ... |