|
1
2
3
4
5
6
7
8
9
10
|
/*
* File: InterpretedChunk.hpp
* Author: mlenart
*
* Created on 18 listopad 2013, 15:00
*/
#ifndef INTERPRETEDCHUNK_HPP
#define INTERPRETEDCHUNK_HPP
|
|
11
|
#include <vector>
|
|
12
13
|
#include "InterpsGroup.hpp"
|
|
14
15
|
namespace morfeusz {
|
|
16
17
18
|
/**
* Denotes a part of text that has some not-yet-deserialized interpretations attached to it.
*/
|
|
19
|
struct InterpretedChunk {
|
|
20
21
22
23
|
/**
* The type of segment for this chunk.
*/
|
|
24
|
unsigned char segmentType;
|
|
25
26
|
/**
|
|
27
|
* Pointer to start of word containing this chunk (possibly including prefixes text)
|
|
28
|
*/
|
|
29
|
const char* textStartPtr;
|
|
30
31
|
/**
|
|
32
33
34
35
36
|
* Pointer to start of this chunks text
*/
const char* textNoPrefixesStartPtr;
/**
|
|
37
38
|
* Pointer to end of this chunks text (exclusive)
*/
|
|
39
|
const char* textEndPtr;
|
|
40
41
|
/**
|
|
42
|
* Pointer to start of this chunks text (exclusive)
|
|
43
|
* possibly including preceding whitespaces if whitespace-handling set to APPEND
|
|
44
45
46
47
48
|
*/
const char* chunkStartPtr;
/**
* Pointer to end of this chunks text (exclusive)
|
|
49
50
51
52
53
|
* including following whitespaces if whitespace-handling set to APPEND
*/
const char* chunkEndPtr;
/**
|
|
54
55
|
* Pointer to the start of this chunks binary data.
*/
|
|
56
|
const unsigned char* interpsGroupPtr;
|
|
57
58
59
60
|
/**
* Pointer to the end of this chunks binary data (exclusive)
*/
|
|
61
|
const unsigned char* interpsEndPtr;
|
|
62
63
64
65
|
/**
* true iff this chunk shifts orth to the one right to it (it is "A" in "A> B")
*/
|
|
66
|
bool shiftOrth;
|
|
67
68
69
70
|
/**
* true iff this chunk has attached data from its prefix chunk (when it is "B" segment in "A> B" segmentation rule)
*/
|
|
71
|
bool orthWasShifted;
|
|
72
73
74
75
|
/**
* Number of codepoints this chunks consists of.
*/
|
|
76
|
int codepointsNum;
|
|
77
78
79
80
|
/**
* Chunks that are in the prefix segments (those with ">" in segmentation rules, ie. "dig>* dig")
*/
|
|
81
|
std::vector<InterpretedChunk> prefixChunks;
|
|
82
83
84
85
|
/**
* Homonym id specified by the user.
*/
|
|
86
|
std::string requiredHomonymId;
|
|
87
88
|
bool forceIgnoreCase;
|
|
89
90
|
};
|
|
91
92
|
}
|
|
93
94
|
#endif /* INTERPRETEDCHUNK_HPP */
|