Commit 96182ff07bfb4683c51c56fc23b351f34b71dc5b

Authored by Michał Lenart
1 parent 7508ece1

- drobne poprawki wydajnościowe

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/trunk@276 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
morfeusz/MorfeuszImpl.cpp
... ... @@ -342,9 +342,9 @@ namespace morfeusz {
342 342 }
343 343 bool caseMatches = env.getCasePatternHelper().checkInterpsGroupOrthCasePatterns(env, reader.getWordStartPtr(), reader.getCurrPtr(), ig);
344 344 if (caseMatches || options.caseHandling == CONDITIONALLY_CASE_SENSITIVE) {
345   -
346   - SegrulesState newSegrulesState = env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, isAtWhitespace);
347   - if (!newSegrulesState.sink) {
  345 + SegrulesState newSegrulesState = SegrulesState::FAILED_STATE;
  346 + env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, isAtWhitespace, newSegrulesState);
  347 + if (!newSegrulesState.failed) {
348 348 InterpretedChunk ic(
349 349 createChunk(ig, reader, newSegrulesState.shiftOrthFromPrevious, homonymId));
350 350  
... ... @@ -356,27 +356,11 @@ namespace morfeusz {
356 356 newSegrulesState,
357 357 ic);
358 358 }
359   -// if (!newSegrulesStates.empty()) {
360   -// for (unsigned int i = 0; i < newSegrulesStates.size(); i++) {
361   -// const SegrulesState& newSegrulesState = newSegrulesStates[i];
362   -//
363   -// InterpretedChunk ic(
364   -// createChunk(ig, reader, newSegrulesState.shiftOrthFromPrevious, homonymId));
365   -//
366   -// processInterpretedChunk(
367   -// env,
368   -// reader,
369   -// isAtWhitespace,
370   -// caseMatches,
371   -// newSegrulesState,
372   -// ic);
373   -// }
374   -// newSegrulesStates.resize(0);
375   -// }
376 359 else if (this->options.debug) {
377 360 std::cerr << "NOT ACCEPTING (segmentation)" << debugAccum(accum) << debugInterpsGroup(ig.type, reader.getWordStartPtr(), reader.getCurrPtr()) << std::endl;
378 361 }
379   - } else if (this->options.debug) {
  362 + }
  363 + else if (this->options.debug) {
380 364 std::cerr << "NOT ACCEPTING (case)" << debugAccum(accum) << debugInterpsGroup(ig.type, reader.getWordStartPtr(), reader.getCurrPtr()) << std::endl;
381 365 }
382 366 }
... ...
morfeusz/segrules/SegrulesFSA.cpp
... ... @@ -8,41 +8,44 @@ using namespace std;
8 8  
9 9 namespace morfeusz {
10 10  
11   - SegrulesState SegrulesState::SINK_STATE = {
  11 + SegrulesState SegrulesState::FAILED_STATE = {
12 12 0, // offset
13 13 false, // accepting
14 14 false, // weak
15 15 false, // shift orth
16   - true // sink
  16 + true, // sink
  17 + true, // failed
17 18 };
18 19  
19   - SegrulesState SegrulesFSA::proceedToNext(
  20 + void SegrulesFSA::proceedToNext(
20 21 const unsigned char segnum,
21 22 const SegrulesState& state,
22   - bool atEndOfWord) const {
  23 + bool atEndOfWord,
  24 + SegrulesState& resState) const {
  25 + assert(!state.failed);
23 26 if (state.offset == 0) {
24   - return doProceedFromInitialState(segnum, atEndOfWord);
  27 + doProceedFromInitialState(segnum, atEndOfWord, resState);
25 28 } else {
26   - return doProceedFromNonInitialState(segnum, state, atEndOfWord);
  29 + doProceedFromNonInitialState(segnum, state, atEndOfWord, resState);
27 30 }
28 31 }
29 32  
30   - SegrulesState SegrulesFSA::doProceedFromInitialState(
  33 + void SegrulesFSA::doProceedFromInitialState(
31 34 const unsigned char segnum,
32   - bool atEndOfWord) const {
  35 + bool atEndOfWord,
  36 + SegrulesState& resState) const {
33 37 const SegrulesState& newState = initialTransitions[segnum];
34 38 if ((atEndOfWord && newState.accepting)
35 39 || (!atEndOfWord && !newState.sink)) {
36   - return newState;
37   - } else {
38   - return SegrulesState::SINK_STATE;
  40 + resState = newState;
39 41 }
40 42 }
41 43  
42   - SegrulesState SegrulesFSA::doProceedFromNonInitialState(
  44 + void SegrulesFSA::doProceedFromNonInitialState(
43 45 const unsigned char segnum,
44 46 const SegrulesState& state,
45   - bool atEndOfWord) const {
  47 + bool atEndOfWord,
  48 + SegrulesState& resState) const {
46 49 const unsigned char* currPtr = ptr + state.offset + 1;
47 50 const unsigned char transitionsNum = *currPtr++;
48 51 for (int i = 0; i < transitionsNum; i++) {
... ... @@ -50,14 +53,11 @@ namespace morfeusz {
50 53 SegrulesState newState = this->transition2State(currPtr);
51 54 if ((atEndOfWord && newState.accepting)
52 55 || (!atEndOfWord && !newState.sink)) {
53   - return newState;
54   - } else {
55   - return SegrulesState::SINK_STATE;
  56 + resState = newState;
56 57 }
57 58 }
58 59 currPtr += 4;
59 60 }
60   - return SegrulesState::SINK_STATE;
61 61 }
62 62  
63 63 SegrulesState SegrulesFSA::transition2State(const unsigned char* transitionPtr) const {
... ... @@ -69,12 +69,13 @@ namespace morfeusz {
69 69 res.offset = readInt16(transitionPtr);
70 70 res.accepting = *(ptr + res.offset) & ACCEPTING_FLAG;
71 71 res.weak = *(ptr + res.offset) & WEAK_FLAG;
72   - res.sink = !res.accepting && *(ptr + res.offset + 1) == 0;
  72 + res.sink = *(ptr + res.offset + 1) == 0;
  73 + res.failed = !res.accepting && res.sink;
73 74 return res;
74 75 }
75 76  
76 77 vector< SegrulesState > SegrulesFSA::createInitialTransitionsVector() {
77   - vector< SegrulesState > res(256, SegrulesState());
  78 + vector< SegrulesState > res(256, SegrulesState::FAILED_STATE);
78 79 const unsigned char* currPtr = ptr + initialState.offset + 1;
79 80 const unsigned char transitionsNum = *currPtr++;
80 81 for (int i = 0; i < transitionsNum; i++) {
... ...
morfeusz/segrules/SegrulesFSA.hpp
... ... @@ -20,8 +20,9 @@ struct SegrulesState {
20 20 bool weak;
21 21 bool shiftOrthFromPrevious;
22 22 bool sink;
  23 + bool failed;
23 24  
24   - static SegrulesState SINK_STATE;
  25 + static SegrulesState FAILED_STATE;
25 26 };
26 27  
27 28 inline bool operator<(const SegrulesState& s1, const SegrulesState& s2) {
... ... @@ -32,15 +33,16 @@ class SegrulesFSA {
32 33 public:
33 34  
34 35 SegrulesFSA(const unsigned char* ptr) : initialState(), ptr(ptr), initialTransitions() {
35   - SegrulesState state = {0, false, false, false, false};
  36 + SegrulesState state = {0, false, false, false, false, false};
36 37 initialState = state;
37 38 initialTransitions = createInitialTransitionsVector();
38 39 }
39 40  
40   - SegrulesState proceedToNext(
  41 + void proceedToNext(
41 42 const unsigned char segnum,
42 43 const SegrulesState& state,
43   - bool atEndOfWord) const;
  44 + bool atEndOfWord,
  45 + SegrulesState& resState) const;
44 46  
45 47 virtual ~SegrulesFSA() {
46 48 }
... ... @@ -54,14 +56,16 @@ private:
54 56  
55 57 std::vector< SegrulesState > createInitialTransitionsVector();
56 58  
57   - SegrulesState doProceedFromInitialState(
  59 + void doProceedFromInitialState(
58 60 const unsigned char segnum,
59   - bool atEndOfWord) const;
  61 + bool atEndOfWord,
  62 + SegrulesState& resState) const;
60 63  
61   - SegrulesState doProceedFromNonInitialState(
  64 + void doProceedFromNonInitialState(
62 65 const unsigned char segnum,
63 66 const SegrulesState& state,
64   - bool atEndOfWord) const;
  67 + bool atEndOfWord,
  68 + SegrulesState& resState) const;
65 69 };
66 70  
67 71 }
... ...
profile.sh
1 1 #!/bin/bash
2 2  
3   -rm -rf profbuild
4   -mkdir -p profbuild
5   -cd profbuild
6   -cmake -D INPUT_DICTIONARIES=../input/dodatki.tab,../input/PoliMorfSmall.tab -D CMAKE_BUILD_TYPE=Debug -D CMAKE_CXX_FLAGS="-g -O2" -D CMAKE_SHARED_LINKER_FLAGS="-lprofiler" -D CMAKE_EXE_LINKER_FLAGS="-lprofiler" ..
7   -make
  3 +#~ rm -rf profbuild
  4 +#~ mkdir -p profbuild
  5 +#~ cd profbuild
  6 +#~ cmake -D INPUT_DICTIONARIES=/home/wkieras/input/dodatki.tab,../input/PoliMorfSmall.tab -D CMAKE_BUILD_TYPE=Debug -D CMAKE_CXX_FLAGS="-g -O2" -D CMAKE_SHARED_LINKER_FLAGS="-lprofiler" -D CMAKE_EXE_LINKER_FLAGS="-lprofiler" ..
  7 +#~ make
8 8 rm -f /tmp/morfeusz.prof
9 9 export LD_PRELOAD="/usr/lib/libprofiler.so"
10 10 export CPUPROFILE="/tmp/morfeusz.prof"
11   -morfeusz/morfeusz_analyzer -i /home/wkieras/output/sgjp_analyzer.fsa < /mnt/storage/morfeusz/sents10k > /dev/null
  11 +morfeusz/morfeusz_analyzer --dict sgjp --dict-dir /home/mlenart/opt/morfeusz/buildall/Linux-i386-false/_CPack_Packages/Linux/DEB/morfeusz2-2.0.0_sgjp-Linux-i386/usr/share/morfeusz/dictionaries < /mnt/storage/morfeusz/sents10k > /dev/null
12 12 ### pprof --gv profbuild/morfeusz/morfeusz_analyzer /tmp/morfeusz.prof
... ...