Commit 85624534d16e639be972e60d741c62b67471bdb1
1 parent
81d5f245
- komentarze w metodzie obsługującej separatory
git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/trunk@329 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
1 changed file
with
6 additions
and
2 deletions
morfeusz/MorfeuszImpl.cpp
... | ... | @@ -441,6 +441,8 @@ namespace morfeusz { |
441 | 441 | while (currInput != chunkBounds.chunkEndPtr) { |
442 | 442 | prevInput = currInput; |
443 | 443 | const char* nonSeparatorInputEnd = prevInput; |
444 | + | |
445 | + // advance through non-separator chars | |
444 | 446 | do { |
445 | 447 | codepoint = env.getCharsetConverter().next(currInput, chunkBounds.chunkEndPtr); |
446 | 448 | if (!env.isSeparator(codepoint)) { |
... | ... | @@ -448,10 +450,11 @@ namespace morfeusz { |
448 | 450 | } |
449 | 451 | } while (currInput != chunkBounds.chunkEndPtr && !env.isSeparator(codepoint)); |
450 | 452 | |
453 | + // advance through separator chars | |
451 | 454 | if (env.isSeparator(codepoint)) { |
452 | 455 | separatorFound = true; |
453 | 456 | if (nonSeparatorInputEnd != prevInput) { |
454 | - // there are non-separators + separators | |
457 | + // there are some non-separators + some separators | |
455 | 458 | |
456 | 459 | int startNode = results.empty() ? startNodeNum : results.back().endNode; |
457 | 460 | // process part before separators |
... | ... | @@ -482,11 +485,12 @@ namespace morfeusz { |
482 | 485 | // currInput == chunkBounds.chunkEndPtr |
483 | 486 | if (!env.isSeparator(codepoint)) { |
484 | 487 | if (separatorFound) { |
485 | - // process part after separators | |
488 | + // process (remaining) non-separators | |
486 | 489 | int startNode = results.empty() ? startNodeNum : results.back().endNode; |
487 | 490 | TextReader newReader4(prevInput, chunkBounds.chunkEndPtr, env); |
488 | 491 | this->processOneWord(env, newReader4, startNode, results, true); |
489 | 492 | } else { |
493 | + // no separators found at all - whole chunk is ign | |
490 | 494 | this->appendIgnotiumToResults(env, chunkBounds, startNodeNum, results); |
491 | 495 | } |
492 | 496 | } |
... | ... |