execTypes.ml
3.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
(*
* ENIAM: Categorial Syntactic-Semantic Parser for Polish
* Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
* Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*)
type status = Idle | PreprocessingError | LexiconError | ParseError | ParseTimeout | Parsed | TooManyNodes | NotParsed | NotReduced | ReductionError | SemError | NotTranslated
type eniam_parse_result = {
status: status;
msg: string;
lex_time: float;
parse_time: float;
reduction_time: float;
sem_time: float;
paths_size: int;
graph_size: int;
term_size: int;
graph: LCGtypes.graph;
term: LCGtypes.linear_term array;
paths: PreTypes.token_record array;
}
type mode =
Raw | Struct | CONLL | ENIAM | Mate
type sentence =
RawSentence of string
(* | CONLL of conll list *)
| StructSentence of PreTypes.token_record list * int (* paths * last *)
| ORSentence of PreTypes.token_record list * int * int * paragraph
(* | NKJP1M of nkjp1m list *)
(* | Skladnica of skladnica_tree *)
| AltSentence of (mode * sentence) list (* string = etykieta np raw, nkjp, krzaki *)
| ENIAMSentence of eniam_parse_result
and paragraph_record = {pid: string; pbeg: int; plen: int; psentence: sentence} (* beg i len liczone po znakach unicode ( * 100 ???) *)
and paragraph =
RawParagraph of string
| StructParagraph of paragraph_record list (* zdania *)
| AltParagraph of (mode * paragraph) list
type text =
RawText of string
| StructText of paragraph list * int (* akapity * next_id *)
| AltText of (mode * text) list
type result = {
input_text: text;
pre_text: text;
pre_time1: float;
pre_time2: float;
status: status;
msg: string;
(* lex_time: float; *)
parse_time: float;
parsed_text: text;
(* reduction_time: float;
sem_time: float;
paths_size: int;
graph_size: int;
term_size: int;
graph: LCGtypes.graph;
term: LCGtypes.linear_term array;
disamb: LCGtypes.linear_term array;
sem: LCGtypes.linear_term array;
sem2: LCGtypes.linear_term array;
sem3: LCGtypes.linear_term;
trees: LCGtypes.linear_term list;
mrls: SemTypes.mrl_formula list;
paths: PreTypes.token_record array; *)
}
type sum_result = {
no_queries: int;
no_pre_error: int;
no_lex_error: int;
no_parse_error: int;
no_timeout: int;
no_reduction_error: int;
no_sem_error: int;
no_not_parsed: int;
no_not_reduced: int;
no_too_many_nodes: int;
no_not_translated: int;
no_parsed: int;
sum_pre_time1: float;
sum_pre_time2: float;
sum_lex_time: float;
sum_parse_time: float;
sum_reduction_time: float;
sum_sem_time: float;
}
type message_from_overseer =
Work_with of string * (*reg_params*)(string * float)
| Kill_yourself
type message_to_overseer =
Ready_to_work of string
| Work_done of
string * result
let time_fun = Unix.gettimeofday
(* let time_fun = Sys.time () *)