types.thrift
3.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
namespace cpp multiservice
namespace java pl.waw.ipipan.zil.multiservice.thrift.types
namespace py multiservice.types
enum TAnnotationLayer {
SEGMENTATION,
MORPHOSYNTAX,
WORDS,
GROUPS,
NAMES,
SUMMARY,
DEPENDENCY_PARSE,
MENTIONS,
COREFERENCE,
SENTIMENT,
TERMS
}
exception MultiserviceException {
1: string message
}
struct TInterpretation {
1: required string base,
2: required string ctag,
3: string msd
}
struct TToken {
1: string id,
2: string orth,
3: i32 offset,
4: bool noPrecedingSpace,
5: list<TInterpretation> interpretations,
6: TInterpretation chosenInterpretation,
7: list<TInterpretation> candidateInterpretations
}
struct TSyntacticWord {
1: string id,
2: string orth,
3: TInterpretation chosenInterpretation,
4: list<TInterpretation> candidateInterpretations,
5: list<string> childIds,
6: string rule
}
struct TSyntacticGroup {
1: string id,
2: string orth,
3: string semanticHeadId,
4: string syntacticHeadId,
5: list<string> childIds,
6: string type,
7: string rule
}
struct TSentimentTag {
1: string id,
2: string orth,
3: double value, // -1 means "extremely negative", 1 means "extremely positive"
4: list<string> childIds, // contains ids of morphosyntactic entities
5: string rule
}
struct TNamedEntity {
1: string id,
2: string orth,
3: string base,
4: string type,
5: string subtype,
6: list<string> childIds
}
struct DependencyParseNode {
1: string startTokenId,
2: string endTokenId,
3: string label
}
struct TMention {
1: string id,
2: list<string> headIds,
3: list<string> childIds,
4: bool zeroSubject
}
struct TCoreference {
1: string id,
2: string type,
3: string dominant,
4: list<string> mentionIds,
5: string sourceMentionId
}
struct TSentence {
1: string id,
2: list<TToken> tokens,
3: list<TToken> rejectedTokens, // tokens from rejected segmentation variants
4: list<TSyntacticWord> words,
5: list<TSyntacticGroup> groups,
6: list<TNamedEntity> names,
7: list<DependencyParseNode> dependencyParse,
8: list<TMention> mentions,
9: list<TSentimentTag> sentimentTags
}
struct TParagraph {
1: string id,
2: string text,
3: list<TSentence> sentences
}
struct THeader {
1: string id,
2: string title, // title of the text (if exists)
3: string distributor, // distributor of the text or name of the annotating tool
4: i64 publicationTime, // date of publication in POSIX format
5: i64 processingDuration, // time spent on annotation, in milliseconds
6: string sourceDescText,
7: string retrievedFrom
}
struct AnnotationDetails {
1: bool hasSegmentsDisambiguated = false,
2: bool hasMorphosyntaxDisambiguated = false,
3: bool hasMorphosyntaxPartiallyDisambiguated = false
}
struct TTerm {
1: string text,
2: double cvalue,
3: i32 length,
4: i32 freqS,
5: i32 freqN,
6: i32 numContexts
}
struct TText {
1: THeader textHeader,
2: required list<TParagraph> paragraphs,
3: required map<TAnnotationLayer, THeader> annotationHeaders = {},
4: required AnnotationDetails annotationDetails = {},
5: string summary,
6: list<TCoreference> coreferences,
7: list<TTerm> terms
}