CasePatternHelper.hpp
3.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/*
* File: CasePatternHelper.hpp
* Author: lennyn
*
* Created on April 4, 2014, 12:11 PM
*/
#ifndef CASEPATTERNHELPER_HPP
#define CASEPATTERNHELPER_HPP
#include <vector>
#include "InterpsGroup.hpp"
#include "CasePatternHelper.hpp"
#include "compressionByteUtils.hpp"
class CasePatternHelper {
public:
CasePatternHelper() : caseSensitive(false) {
}
void setCaseSensitive(bool caseSensitive) {
this->caseSensitive = caseSensitive;
}
bool checkCasePattern(
const std::vector<uint32_t>& lowercaseCodepoints,
const std::vector<uint32_t>& originalCodepoints,
const std::vector<bool>& casePattern) const {
if (this->caseSensitive) {
for (unsigned int i = 0; i < casePattern.size(); i++) {
if (casePattern[i] && lowercaseCodepoints[i] == originalCodepoints[i]) {
return false;
}
}
}
return true;
}
bool checkInterpsGroupOrthCasePatterns(
const std::vector<uint32_t>& lowercaseCodepoints,
const std::vector<uint32_t>& originalCodepoints,
const InterpsGroup& ig) const {
const unsigned char* currPtr = ig.ptr;
unsigned char compressionByte = *currPtr++;
if (!this->caseSensitive) {
return true;
}
else if (isOrthOnlyLower(compressionByte)) {
return true;
}
else if (isOrthOnlyTitle(compressionByte)) {
return lowercaseCodepoints[0] != originalCodepoints[0];
}
else {
unsigned char casePatternsNum = *currPtr++;
if (casePatternsNum == 0) {
return true;
}
else {
for (unsigned int i = 0; i < casePatternsNum; i++) {
if (checkCasePattern(
lowercaseCodepoints,
originalCodepoints,
deserializeOneCasePattern(currPtr))) {
return true;
}
}
return false;
}
}
}
std::vector<bool> deserializeOneCasePattern(const unsigned char*& ptr) const {
std::vector<bool> res;
uint8_t casePatternType = *ptr++;
uint8_t prefixLength;
uint8_t patternLength;
switch (casePatternType) {
case LEMMA_ONLY_LOWER:
break;
case LEMMA_UPPER_PREFIX:
prefixLength = *ptr++;
res.resize(prefixLength, true);
break;
case LEMMA_MIXED_CASE:
patternLength = *ptr++;
for (unsigned int i = 0; i < patternLength; i++) {
uint8_t idx = *ptr++;
res.resize(idx + 1, false);
res[idx] = true;
}
break;
}
return res;
}
private:
bool caseSensitive;
static const uint8_t LEMMA_ONLY_LOWER = 0;
static const uint8_t LEMMA_UPPER_PREFIX = 1;
static const uint8_t LEMMA_MIXED_CASE = 2;
};
#endif /* CASEPATTERNHELPER_HPP */