|
1
2
3
4
5
6
7
8
9
10
|
/*
* File: InterpretedChunk.hpp
* Author: mlenart
*
* Created on 18 listopad 2013, 15:00
*/
#ifndef INTERPRETEDCHUNK_HPP
#define INTERPRETEDCHUNK_HPP
|
|
11
|
#include <vector>
|
|
12
13
|
#include "InterpsGroup.hpp"
|
|
14
15
|
namespace morfeusz {
|
|
16
17
18
|
/**
* Denotes a part of text that has some not-yet-deserialized interpretations attached to it.
*/
|
|
19
|
struct InterpretedChunk {
|
|
20
21
22
23
|
/**
* The type of segment for this chunk.
*/
|
|
24
|
unsigned char segmentType;
|
|
25
26
27
28
|
/**
* Pointer to start of this chunks text
*/
|
|
29
|
const char* textStartPtr;
|
|
30
31
32
33
|
/**
* Pointer to end of this chunks text (exclusive)
*/
|
|
34
|
const char* textEndPtr;
|
|
35
36
|
/**
|
|
37
38
39
40
41
42
|
* Pointer to end of this chunks text (exclusive)
* including following whitespaces if whitespace-handling set to APPEND
*/
const char* chunkEndPtr;
/**
|
|
43
44
|
* Pointer to the start of this chunks binary data.
*/
|
|
45
|
const unsigned char* interpsGroupPtr;
|
|
46
47
48
49
|
/**
* Pointer to the end of this chunks binary data (exclusive)
*/
|
|
50
|
const unsigned char* interpsEndPtr;
|
|
51
52
53
54
|
/**
* true iff this chunk shifts orth to the one right to it (it is "A" in "A> B")
*/
|
|
55
|
bool shiftOrth;
|
|
56
57
58
59
|
/**
* true iff this chunk has attached data from its prefix chunk (when it is "B" segment in "A> B" segmentation rule)
*/
|
|
60
|
bool orthWasShifted;
|
|
61
62
63
64
|
/**
* Number of codepoints this chunks consists of.
*/
|
|
65
|
int codepointsNum;
|
|
66
67
68
69
|
/**
* Chunks that are in the prefix segments (those with ">" in segmentation rules, ie. "dig>* dig")
*/
|
|
70
|
std::vector<InterpretedChunk> prefixChunks;
|
|
71
72
73
74
|
/**
* Homonym id specified by the user.
*/
|
|
75
|
std::string requiredHomonymId;
|
|
76
77
|
bool forceIgnoreCase;
|
|
78
79
|
};
|
|
80
81
|
}
|
|
82
83
|
#endif /* INTERPRETEDCHUNK_HPP */
|