#include "CasePatternHelper.hpp" using namespace std; namespace morfeusz { bool CasePatternHelper::checkInterpsGroupOrthCasePatterns( const Environment& env, const char* orthStart, const char* orthEnd, const InterpsGroup& ig) const { const unsigned char* currPtr = ig.ptr; unsigned char compressionByte = *currPtr++; if (!this->caseSensitive || isOrthOnlyLower(compressionByte)) { return true; } else if (isOrthOnlyTitle(compressionByte)) { uint32_t cp = env.getCharsetConverter().next(orthStart, orthEnd); return cp == env.getCaseConverter().toTitle(cp); } else { unsigned char casePatternsNum = *currPtr++; if (casePatternsNum == 0) { return true; } else { for (unsigned int i = 0; i < casePatternsNum; i++) { const char* currOrthPtr = orthStart; orthCodepoints.clear(); normalizedCodepoints.clear(); bool isDiff = false; while (currOrthPtr != orthEnd) { uint32_t codepoint = env.getCharsetConverter().next(currOrthPtr, orthEnd); uint32_t normalizedCodepoint = env.getCaseConverter().toLower(codepoint); orthCodepoints.push_back(codepoint); normalizedCodepoints.push_back(normalizedCodepoint); isDiff = isDiff || codepoint != normalizedCodepoint; } if (isDiff && checkCasePattern( normalizedCodepoints, orthCodepoints, deserializeOneCasePattern(currPtr))) { return true; } } return false; } } } std::vector<bool> CasePatternHelper::deserializeOneCasePattern(const unsigned char*& ptr) { std::vector<bool> res; uint8_t casePatternType = *ptr++; uint8_t prefixLength; uint8_t patternLength; switch (casePatternType) { case LEMMA_ONLY_LOWER: break; case LEMMA_UPPER_PREFIX: prefixLength = *ptr++; res.resize(prefixLength, true); break; case LEMMA_MIXED_CASE: patternLength = *ptr++; for (unsigned int i = 0; i < patternLength; i++) { uint8_t idx = *ptr++; res.resize(idx + 1, false); res[idx] = true; } break; } return res; } }