Commit 85624534d16e639be972e60d741c62b67471bdb1

Authored by Michał Lenart
1 parent 81d5f245

- komentarze w metodzie obsługującej separatory

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/trunk@329 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing 1 changed file with 6 additions and 2 deletions
morfeusz/MorfeuszImpl.cpp
... ... @@ -441,6 +441,8 @@ namespace morfeusz {
441 441 while (currInput != chunkBounds.chunkEndPtr) {
442 442 prevInput = currInput;
443 443 const char* nonSeparatorInputEnd = prevInput;
  444 +
  445 + // advance through non-separator chars
444 446 do {
445 447 codepoint = env.getCharsetConverter().next(currInput, chunkBounds.chunkEndPtr);
446 448 if (!env.isSeparator(codepoint)) {
... ... @@ -448,10 +450,11 @@ namespace morfeusz {
448 450 }
449 451 } while (currInput != chunkBounds.chunkEndPtr && !env.isSeparator(codepoint));
450 452  
  453 + // advance through separator chars
451 454 if (env.isSeparator(codepoint)) {
452 455 separatorFound = true;
453 456 if (nonSeparatorInputEnd != prevInput) {
454   - // there are non-separators + separators
  457 + // there are some non-separators + some separators
455 458  
456 459 int startNode = results.empty() ? startNodeNum : results.back().endNode;
457 460 // process part before separators
... ... @@ -482,11 +485,12 @@ namespace morfeusz {
482 485 // currInput == chunkBounds.chunkEndPtr
483 486 if (!env.isSeparator(codepoint)) {
484 487 if (separatorFound) {
485   - // process part after separators
  488 + // process (remaining) non-separators
486 489 int startNode = results.empty() ? startNodeNum : results.back().endNode;
487 490 TextReader newReader4(prevInput, chunkBounds.chunkEndPtr, env);
488 491 this->processOneWord(env, newReader4, startNode, results, true);
489 492 } else {
  493 + // no separators found at all - whole chunk is ign
490 494 this->appendIgnotiumToResults(env, chunkBounds, startNodeNum, results);
491 495 }
492 496 }
... ...