LCGfields.ml
6.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
(*
* ENIAMcorpora is a library that integrates ENIAM with corpora in CONLL format
* Copyright (C) 2016 Daniel Oklesinski <oklesinski dot daniel atSPAMfree gmail dot com>
* Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
*
* This library is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*)
open LCGtypes
open Xstd
open ExecTypes
let eniam = "eniam"
let conll = "conll"
module Strings =
struct
type t = string
let compare a b = Pervasives.compare a b
end
module StrMap = Map.Make(Strings)
let field_map = StrMap.(empty |> add eniam (ref empty) |> add conll (ref empty))
let add_to_field_map str_mode field content =
let f_map = StrMap.find str_mode field_map in
let c_map = if StrMap.mem field !f_map
then StrMap.find field !f_map
else let temp = ref StrMap.empty in
f_map := StrMap.add field temp !f_map; temp in
if StrMap.mem content !c_map
then incr (StrMap.find content !c_map)
else c_map := StrMap.add content (ref 1) !c_map
let print_field_map () =
StrMap.iter (fun key1 val1 ->
print_endline key1;
StrMap.iter (fun key2 val2 ->
let i = ref 0 in
print_endline ("\t" ^ key2);
StrMap.iter (fun key3 val3 ->
i := !i + !val3;
print_endline ("\t\t" ^ key3 ^ "\t\t" ^ (string_of_int !val3))
) !val2;
print_endline ("\tsum: " ^ (string_of_int !i))
) !val1
) field_map;
print_newline ()
module Statuses =
struct
type t = status
let compare a b = Pervasives.compare a b
end
module StatMap = Xmap.MakeQ(Statuses)
let stat_map = ref StatMap.empty
let reset () =
stat_map := StatMap.empty
let print_results () =
print_endline "\nStatistics of CONLL statuses:";
StatMap.iter !stat_map (fun key value -> print_endline ("\t" ^ (match key with
Idle -> "Idle"
| PreprocessingError -> "PreprocessingError"
| LexiconError -> "LexiconError"
| ParseError -> "ParseError"
| ParseTimeout -> "ParseTimeout"
| NotParsed -> "NotParsed"
| ReductionError -> "ReductionError"
| TooManyNodes -> "TooManyNodes"
| NotReduced -> "NotReduced"
| SemError -> "SemError"
| NotTranslated -> "NotTranslated"
| Parsed -> "Parsed") ^ "\t" ^(string_of_int value) ^ "\n"))
let field_of_node str_mode n = function
"arole" -> let content = if n.arole = "" then "null" else n.arole in
add_to_field_map str_mode "arole" content; content
| _ -> failwith "field_of_node: ni"
let field_of_linear_term str_node field = function
Node n -> field_of_node str_node n field
| _ -> failwith "field_of_linear_term: ni"
let field_of_dependency_tree str_node fields dep_tree =
String.concat "\n" (Xlist.map fields (fun field ->
Array.fold_left (fun acc x ->
acc ^ (field_of_linear_term str_node field x) ^ "\n\t\t" ) "" dep_tree))
let field_of_eniam_sentence fields (result : eniam_parse_result) =
match result.status with
Idle -> "Idle"
(* | PreprocessingError -> "PreprocessingError" *)
| LexiconError -> "LexiconError"
| ParseError -> "ParseError"
| ParseTimeout -> "ParseTimeout"
| NotParsed -> "NotParsed"
| ReductionError -> "ReductionError"
| TooManyNodes -> "TooManyNodes"
| NotReduced -> "NotReduced"
| SemError -> "SemError"
(* | NotTranslated -> "NotTranslated" *)
| Parsed -> ignore ("Parsed\n\t\t" ^ (field_of_dependency_tree eniam fields result.dependency_tree)); "Parsed\n"
| _ -> failwith "field_of_eniam_sentence"
let field_of_conll_sentence fields (result : conll_parse_result) =
stat_map := StatMap.add !stat_map result.status;
match result.status with
Idle -> "Idle"
(* | PreprocessingError -> "PreprocessingError" *)
| LexiconError -> "LexiconError " ^ result.msg
| ParseError -> "ParseError " ^ result.msg
| ParseTimeout -> "ParseTimeout"
| NotParsed -> "NotParsed"
| ReductionError -> "ReductionError " ^ result.msg
| TooManyNodes -> "TooManyNodes"
| NotReduced -> "NotReduced"
| SemError -> "SemError"
(* | NotTranslated -> "NotTranslated" *)
| Parsed -> ignore ("Parsed\n\t\t" ^ (field_of_dependency_tree conll fields result.dependency_tree)); "Parsed\n"
| _ -> failwith "field_of_conll_sentence"
let rec field_of_sentence fields = function
RawSentence s -> s
| StructSentence _ -> "StructSentence"
| DepSentence _ -> "DepSentence"
| ENIAMSentence result -> field_of_eniam_sentence fields result
| CONLLSentence result -> field_of_conll_sentence fields result
| QuotedSentences sentences -> "QuotedSentences"
| AltSentence l -> String.concat "\n\t" (Xlist.map l (fun (m, s) ->
Visualization.string_of_mode m ^ "\t" ^ (field_of_sentence fields s)))
| _ -> failwith "field_of_sentence: ni"
let rec field_of_paragraph fields = function
RawParagraph s -> print_endline "no fields detected: only raw paragraph"; s
| StructParagraph sentences ->
String.concat "\n\t" (Xlist.map sentences (fun p -> field_of_sentence fields p.psentence))
| AltParagraph l ->
String.concat "\n" (Xlist.map (List.filter (fun (m,t) -> (*m = ENIAM ||*) m = CONLL) l) (fun (m,t) ->
Visualization.string_of_mode m ^ "\n\t" ^ (field_of_paragraph fields t)))
(* field_of_paragraph fields (snd @@ List.find (fun (mode,text) -> mode = ENIAM || mode = CONLL) l) *)
let rec print_fields_rec fields = function
RawText s -> s
(* print_endline "no fields detected: only raw text"; *)
| StructText(paragraphs) ->
String.concat "\n\n" (Xlist.map paragraphs (field_of_paragraph fields)) ^ "\n"
| AltText l ->
String.concat "\n" (Xlist.map (List.filter (fun (m,t) -> m = Struct || m = CONLL) l) (fun (m,t) ->
Visualization.string_of_mode m ^ "\n\t" ^ (print_fields_rec fields t)))
(* print_fields_rec fields (snd @@ List.find (fun (m,t) -> m = Struct (*|| m = ENIAM*) || m = CONLL) l) *)
let print_fields fields text =
print_endline @@ print_fields_rec fields text
(* ; print_field_map () *)