Blame view

morfeusz/InterpretedChunk.hpp 2.1 KB
Michał Lenart authored
1
2
3
4
5
6
7
8
9
10
/* 
 * File:   InterpretedChunk.hpp
 * Author: mlenart
 *
 * Created on 18 listopad 2013, 15:00
 */

#ifndef INTERPRETEDCHUNK_HPP
#define	INTERPRETEDCHUNK_HPP
Michał Lenart authored
11
#include <vector>
Michał Lenart authored
12
13
#include "InterpsGroup.hpp"
Michał Lenart authored
14
15
namespace morfeusz {
Michał Lenart authored
16
17
18
/**
 * Denotes a part of text that has some not-yet-deserialized interpretations attached to it.
 */
Michał Lenart authored
19
struct InterpretedChunk {
Michał Lenart authored
20
21
22
23

    /**
     * The type of segment for this chunk.
     */
Michał Lenart authored
24
    unsigned char segmentType;
Michał Lenart authored
25
26

    /**
Michał Lenart authored
27
     * Pointer to start of word containing this chunk (possibly including prefixes text)
Michał Lenart authored
28
     */
Michał Lenart authored
29
    const char* textStartPtr;
Michał Lenart authored
30
31

    /**
Michał Lenart authored
32
33
34
35
36
     * Pointer to start of this chunks text
     */
    const char* textNoPrefixesStartPtr;

    /**
Michał Lenart authored
37
38
     * Pointer to end of this chunks text (exclusive)
     */
Michał Lenart authored
39
    const char* textEndPtr;
Michał Lenart authored
40
41

    /**
Michał Lenart authored
42
     * Pointer to start of this chunks text (exclusive)
Michał Lenart authored
43
     * possibly including preceding whitespaces if whitespace-handling set to APPEND
Michał Lenart authored
44
45
46
47
48
     */
    const char* chunkStartPtr;

    /**
     * Pointer to end of this chunks text (exclusive)
Michał Lenart authored
49
50
51
52
53
     * including following whitespaces if whitespace-handling set to APPEND
     */
    const char* chunkEndPtr;

    /**
Michał Lenart authored
54
55
     * Pointer to the start of this chunks binary data.
     */
Michał Lenart authored
56
    const unsigned char* interpsGroupPtr;
Michał Lenart authored
57
58
59
60

    /**
     * Pointer to the end of this chunks binary data (exclusive)
     */
Michał Lenart authored
61
    const unsigned char* interpsEndPtr;
Michał Lenart authored
62
63
64
65

    /**
     * true iff this chunk shifts orth to the one right to it (it is "A" in "A> B")
     */
Michał Lenart authored
66
    bool shiftOrth;
Michał Lenart authored
67
68
69
70

    /**
     * true iff this chunk has attached data from its prefix chunk (when it is "B" segment in "A> B" segmentation rule)
     */
Michał Lenart authored
71
    bool orthWasShifted;
Michał Lenart authored
72
73
74
75

    /**
     * Number of codepoints this chunks consists of.
     */
Michał Lenart authored
76
    int codepointsNum;
Michał Lenart authored
77
78
79
80

    /**
     * Chunks that are in the prefix segments (those with ">" in segmentation rules, ie. "dig>* dig")
     */
Michał Lenart authored
81
    std::vector<InterpretedChunk> prefixChunks;
Michał Lenart authored
82
83
84
85

    /**
     * Homonym id specified by the user.
     */
Michał Lenart authored
86
    std::string requiredHomonymId;
Michał Lenart authored
87
88

    bool forceIgnoreCase;
Michał Lenart authored
89
90
};
Michał Lenart authored
91
92
}
Michał Lenart authored
93
94
#endif	/* INTERPRETEDCHUNK_HPP */