LCGfields.ml
4.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
open LCGtypes
open Xstd
open ExecTypes
let eniam = "eniam"
let conll = "conll"
module Strings =
struct
type t = string
let compare a b = Pervasives.compare a b
end
module StrMap = Map.Make(Strings)
let field_map = StrMap.(empty |> add eniam (ref empty) |> add conll (ref empty))
let add_to_field_map str_mode field content =
let f_map = StrMap.find str_mode field_map in
let c_map = if StrMap.mem field !f_map
then StrMap.find field !f_map
else let temp = ref StrMap.empty in
f_map := StrMap.add field temp !f_map; temp in
if StrMap.mem content !c_map
then incr (StrMap.find content !c_map)
else c_map := StrMap.add content (ref 1) !c_map
let print_field_map () =
StrMap.iter (fun key1 val1 ->
print_endline key1;
StrMap.iter (fun key2 val2 ->
let i = ref 0 in
print_endline ("\t" ^ key2);
StrMap.iter (fun key3 val3 ->
i := !i + !val3;
print_endline ("\t\t" ^ key3 ^ "\t\t" ^ (string_of_int !val3))
) !val2;
print_endline ("\tsum: " ^ (string_of_int !i))
) !val1
) field_map;
print_newline ()
let field_of_node str_mode n = function
"arole" -> let content = if n.arole = "" then "null" else n.arole in
add_to_field_map str_mode "arole" content; content
| _ -> failwith "field_of_node: ni"
let field_of_linear_term str_node field = function
Node n -> field_of_node str_node n field
| _ -> failwith "field_of_linear_term: ni"
let field_of_dependency_tree str_node fields dep_tree =
String.concat "\n" (Xlist.map fields (fun field ->
Array.fold_left (fun acc x ->
acc ^ (field_of_linear_term str_node field x) ^ "\n\t\t" ) "" dep_tree))
let field_of_eniam_sentence fields tokens (result : eniam_parse_result) =
match result.status with
Idle -> "Idle"
(* | PreprocessingError -> "PreprocessingError" *)
| LexiconError -> "LexiconError"
| ParseError -> "ParseError"
| ParseTimeout -> "ParseTimeout"
| NotParsed -> "NotParsed"
| ReductionError -> "ReductionError"
| TooManyNodes -> "TooManyNodes"
| NotReduced -> "NotReduced"
| SemError -> "SemError"
(* | NotTranslated -> "NotTranslated" *)
| Parsed -> ignore ("Parsed\n\t\t" ^ (field_of_dependency_tree eniam fields result.dependency_tree)); "Parsed\n"
| _ -> failwith "field_of_eniam_sentence"
let field_of_conll_sentence fields tokens (result : conll_parse_result) =
match result.status with
Idle -> "Idle"
(* | PreprocessingError -> "PreprocessingError" *)
| LexiconError -> "LexiconError"
| ParseError -> "ParseError"
| ParseTimeout -> "ParseTimeout"
| NotParsed -> "NotParsed"
| ReductionError -> "ReductionError"
| TooManyNodes -> "TooManyNodes"
| NotReduced -> "NotReduced"
| SemError -> "SemError"
(* | NotTranslated -> "NotTranslated" *)
| Parsed -> ignore ("Parsed\n\t\t" ^ (field_of_dependency_tree conll fields result.dependency_tree)); "Parsed\n"
| _ -> failwith "field_of_conll_sentence"
let rec field_of_sentence fields tokens = function
RawSentence s -> s
| StructSentence(_,paths,last) -> "StructSentence"
| DepSentence(_,paths) -> "DepSentence"
| ENIAMSentence result -> field_of_eniam_sentence fields tokens result
| CONLLSentence result -> field_of_conll_sentence fields tokens result
| QuotedSentences sentences -> "QuotedSentences"
| AltSentence l -> String.concat "\n\t" (Xlist.map l (fun (m, s) ->
Visualization.string_of_mode m ^ "\t" ^ (field_of_sentence fields tokens s)))
(* | _ -> failwith "field_of_sentence: ni" *)
let rec field_of_paragraph fields tokens = function
RawParagraph s -> print_endline "no fields detected: only raw paragraph"; s
| StructParagraph sentences ->
String.concat "\n\t" (Xlist.map sentences (fun p -> field_of_sentence fields tokens p.psentence))
| AltParagraph l ->
String.concat "\n" (Xlist.map (List.filter (fun (m,t) -> m = ENIAM || m = CONLL) l) (fun (m,t) ->
Visualization.string_of_mode m ^ "\n\t" ^ (field_of_paragraph fields tokens t)))
(* field_of_paragraph fields tokens (snd @@ List.find (fun (mode,text) -> mode = ENIAM || mode = CONLL) l) *)
let rec print_fields_rec fields = function
RawText s -> print_endline "no fields detected: only raw text";
| StructText(paragraphs,tokens) ->
print_endline (String.concat "\n\n" (Xlist.map paragraphs (field_of_paragraph fields tokens)) ^ "\n")
| AltText l ->
print_fields_rec fields (snd @@ List.find (fun (m,t) -> m = Struct || m = ENIAM || m = CONLL) l)
let print_fields fields text =
print_fields_rec fields text;
print_field_map ()