InterpretedChunk.hpp
1.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
/*
* File: InterpretedChunk.hpp
* Author: mlenart
*
* Created on 18 listopad 2013, 15:00
*/
#ifndef INTERPRETEDCHUNK_HPP
#define INTERPRETEDCHUNK_HPP
#include <vector>
#include "InterpsGroup.hpp"
namespace morfeusz {
/**
* Denotes a part of text that has some not-yet-deserialized interpretations attached to it.
*/
struct InterpretedChunk {
/**
* The type of segment for this chunk.
*/
unsigned char segmentType;
/**
* Pointer to start of this chunks text
*/
const char* textStartPtr;
/**
* Pointer to end of this chunks text (exclusive)
*/
const char* textEndPtr;
/**
* Pointer to start of this chunks text (exclusive)
* including preceding whitespaces if whitespace-handling set to APPEND
*/
const char* chunkStartPtr;
/**
* Pointer to end of this chunks text (exclusive)
* including following whitespaces if whitespace-handling set to APPEND
*/
const char* chunkEndPtr;
/**
* Pointer to the start of this chunks binary data.
*/
const unsigned char* interpsGroupPtr;
/**
* Pointer to the end of this chunks binary data (exclusive)
*/
const unsigned char* interpsEndPtr;
/**
* true iff this chunk shifts orth to the one right to it (it is "A" in "A> B")
*/
bool shiftOrth;
/**
* true iff this chunk has attached data from its prefix chunk (when it is "B" segment in "A> B" segmentation rule)
*/
bool orthWasShifted;
/**
* Number of codepoints this chunks consists of.
*/
int codepointsNum;
/**
* Chunks that are in the prefix segments (those with ">" in segmentation rules, ie. "dig>* dig")
*/
std::vector<InterpretedChunk> prefixChunks;
/**
* Homonym id specified by the user.
*/
std::string requiredHomonymId;
bool forceIgnoreCase;
};
}
#endif /* INTERPRETEDCHUNK_HPP */