Blame view

diagnostics/LCGfields.ml 5.39 KB
Daniel Oklesiński authored
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
open LCGtypes
open Xstd
open ExecTypes

let eniam = "eniam"
let conll = "conll"

module Strings =
  struct
    type t = string
    let compare a b = Pervasives.compare a b
  end

module StrMap = Map.Make(Strings)

let field_map = StrMap.(empty |> add eniam (ref empty) |> add conll (ref empty))

let add_to_field_map str_mode field content =
  let f_map = StrMap.find str_mode field_map in
  let c_map = if StrMap.mem field !f_map
    then StrMap.find field !f_map
    else let temp = ref StrMap.empty in
      f_map := StrMap.add field temp !f_map; temp in
  if StrMap.mem content !c_map
    then incr (StrMap.find content !c_map)
    else c_map := StrMap.add content (ref 1) !c_map

let print_field_map () =
  StrMap.iter (fun key1 val1 ->
    print_endline key1;
    StrMap.iter (fun key2 val2 ->
      let i = ref 0 in
      print_endline ("\t" ^ key2);
      StrMap.iter (fun key3 val3 ->
        i := !i + !val3;
        print_endline ("\t\t" ^ key3 ^ "\t\t" ^ (string_of_int !val3))
        ) !val2;
      print_endline ("\tsum: " ^ (string_of_int !i))
      ) !val1
    ) field_map;
  print_newline ()
Daniel Oklesiński authored
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
  module Statuses =
    struct
      type t = status
      let compare a b = Pervasives.compare a b
    end

  module StatMap = Xmap.MakeQ(Statuses)

  let stat_map = ref StatMap.empty

  let reset () =
    stat_map := StatMap.empty

  let print_results () =
    print_endline "\nStatistics of CONLL statuses:";
    StatMap.iter !stat_map (fun key value -> print_endline ("\t" ^ (match key with
       Idle -> "Idle"
    | PreprocessingError -> "PreprocessingError"
    | LexiconError -> "LexiconError"
    | ParseError -> "ParseError"
    | ParseTimeout -> "ParseTimeout"
    | NotParsed -> "NotParsed"
    | ReductionError -> "ReductionError"
    | TooManyNodes -> "TooManyNodes"
    | NotReduced -> "NotReduced"
    | SemError -> "SemError"
    | NotTranslated -> "NotTranslated"
    | Parsed -> "Parsed") ^ "\t" ^(string_of_int value) ^ "\n"))
Daniel Oklesiński authored
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102

let field_of_node str_mode n = function
    "arole" -> let content = if n.arole = "" then "null" else n.arole in
      add_to_field_map str_mode "arole" content; content
  | _ -> failwith "field_of_node: ni"

let field_of_linear_term str_node field = function
    Node n -> field_of_node str_node n field
  | _ -> failwith "field_of_linear_term: ni"

let field_of_dependency_tree str_node fields dep_tree =
  String.concat "\n" (Xlist.map fields (fun field ->
    Array.fold_left (fun acc x ->
      acc ^ (field_of_linear_term str_node field x) ^ "\n\t\t" ) "" dep_tree))

let field_of_eniam_sentence fields tokens (result : eniam_parse_result) =
  match result.status with
    Idle -> "Idle"
  (* | PreprocessingError -> "PreprocessingError" *)
  | LexiconError -> "LexiconError"
  | ParseError -> "ParseError"
  | ParseTimeout -> "ParseTimeout"
  | NotParsed -> "NotParsed"
  | ReductionError -> "ReductionError"
  | TooManyNodes -> "TooManyNodes"
  | NotReduced -> "NotReduced"
  | SemError -> "SemError"
  (* | NotTranslated -> "NotTranslated"  *)
  | Parsed -> ignore ("Parsed\n\t\t" ^ (field_of_dependency_tree eniam fields result.dependency_tree)); "Parsed\n"
  | _ -> failwith "field_of_eniam_sentence"

let field_of_conll_sentence fields tokens (result : conll_parse_result) =
Daniel Oklesiński authored
103
  stat_map := StatMap.add !stat_map result.status;
Daniel Oklesiński authored
104
105
106
  match result.status with
    Idle -> "Idle"
  (* | PreprocessingError -> "PreprocessingError" *)
Daniel Oklesiński authored
107
108
  | LexiconError -> "LexiconError " ^ result.msg
  | ParseError -> "ParseError " ^ result.msg
Daniel Oklesiński authored
109
110
  | ParseTimeout -> "ParseTimeout"
  | NotParsed -> "NotParsed"
Daniel Oklesiński authored
111
  | ReductionError -> "ReductionError " ^ result.msg
Daniel Oklesiński authored
112
113
114
115
116
117
118
119
120
121
  | TooManyNodes -> "TooManyNodes"
  | NotReduced -> "NotReduced"
  | SemError -> "SemError"
  (* | NotTranslated -> "NotTranslated"  *)
  | Parsed -> ignore ("Parsed\n\t\t" ^ (field_of_dependency_tree conll fields result.dependency_tree)); "Parsed\n"
  | _ -> failwith "field_of_conll_sentence"


let rec field_of_sentence fields tokens = function
    RawSentence s -> s
Wojciech Jaworski authored
122
123
  | StructSentence _ -> "StructSentence"
  | DepSentence _ -> "DepSentence"
Daniel Oklesiński authored
124
125
126
127
128
  | ENIAMSentence result -> field_of_eniam_sentence fields tokens result
  | CONLLSentence result -> field_of_conll_sentence fields tokens result
  | QuotedSentences sentences -> "QuotedSentences"
  | AltSentence l -> String.concat "\n\t" (Xlist.map l (fun (m, s) ->
      Visualization.string_of_mode m ^ "\t" ^ (field_of_sentence fields tokens s)))
Daniel Oklesiński authored
129
  | _ -> failwith "field_of_sentence: ni"
Daniel Oklesiński authored
130
131
132
133
134
135

let rec field_of_paragraph fields tokens = function
    RawParagraph s -> print_endline "no fields detected: only raw paragraph"; s
  | StructParagraph sentences ->
      String.concat "\n\t" (Xlist.map sentences (fun p -> field_of_sentence fields tokens p.psentence))
  | AltParagraph l ->
Daniel Oklesiński authored
136
      String.concat "\n" (Xlist.map (List.filter (fun (m,t) -> (*m = ENIAM ||*) m = CONLL) l) (fun (m,t) ->
Daniel Oklesiński authored
137
138
139
140
141
142
143
144
        Visualization.string_of_mode m ^ "\n\t" ^ (field_of_paragraph fields tokens t)))
      (* field_of_paragraph fields tokens (snd @@ List.find (fun (mode,text) -> mode = ENIAM || mode = CONLL) l) *)

let rec print_fields_rec fields = function
  RawText s -> print_endline "no fields detected: only raw text";
| StructText(paragraphs,tokens) ->
    print_endline (String.concat "\n\n" (Xlist.map paragraphs (field_of_paragraph fields tokens)) ^ "\n")
| AltText l ->
Daniel Oklesiński authored
145
    print_fields_rec fields (snd @@ List.find (fun (m,t) -> m = Struct (*|| m = ENIAM*) || m = CONLL) l)
Daniel Oklesiński authored
146
147

let print_fields fields text =
Daniel Oklesiński authored
148
149
  print_fields_rec fields text
  (* ; print_field_map () *)