Commit 8bc083e57ed28269233702785365375207c3b6cf

Authored by Daniel Oklesiński
1 parent 5007a782

dodanie wstępnej diagnostyki sparsowanych zdań

diagnostics/LCGfields.ml 0 → 100644
  1 +open LCGtypes
  2 +open Xstd
  3 +open ExecTypes
  4 +
  5 +let eniam = "eniam"
  6 +let conll = "conll"
  7 +
  8 +module Strings =
  9 + struct
  10 + type t = string
  11 + let compare a b = Pervasives.compare a b
  12 + end
  13 +
  14 +module StrMap = Map.Make(Strings)
  15 +
  16 +let field_map = StrMap.(empty |> add eniam (ref empty) |> add conll (ref empty))
  17 +
  18 +let add_to_field_map str_mode field content =
  19 + let f_map = StrMap.find str_mode field_map in
  20 + let c_map = if StrMap.mem field !f_map
  21 + then StrMap.find field !f_map
  22 + else let temp = ref StrMap.empty in
  23 + f_map := StrMap.add field temp !f_map; temp in
  24 + if StrMap.mem content !c_map
  25 + then incr (StrMap.find content !c_map)
  26 + else c_map := StrMap.add content (ref 1) !c_map
  27 +
  28 +let print_field_map () =
  29 + StrMap.iter (fun key1 val1 ->
  30 + print_endline key1;
  31 + StrMap.iter (fun key2 val2 ->
  32 + let i = ref 0 in
  33 + print_endline ("\t" ^ key2);
  34 + StrMap.iter (fun key3 val3 ->
  35 + i := !i + !val3;
  36 + print_endline ("\t\t" ^ key3 ^ "\t\t" ^ (string_of_int !val3))
  37 + ) !val2;
  38 + print_endline ("\tsum: " ^ (string_of_int !i))
  39 + ) !val1
  40 + ) field_map;
  41 + print_newline ()
  42 +
  43 +
  44 +let field_of_node str_mode n = function
  45 + "arole" -> let content = if n.arole = "" then "null" else n.arole in
  46 + add_to_field_map str_mode "arole" content; content
  47 + | _ -> failwith "field_of_node: ni"
  48 +
  49 +let field_of_linear_term str_node field = function
  50 + Node n -> field_of_node str_node n field
  51 + | _ -> failwith "field_of_linear_term: ni"
  52 +
  53 +let field_of_dependency_tree str_node fields dep_tree =
  54 + String.concat "\n" (Xlist.map fields (fun field ->
  55 + Array.fold_left (fun acc x ->
  56 + acc ^ (field_of_linear_term str_node field x) ^ "\n\t\t" ) "" dep_tree))
  57 +
  58 +let field_of_eniam_sentence fields tokens (result : eniam_parse_result) =
  59 + match result.status with
  60 + Idle -> "Idle"
  61 + (* | PreprocessingError -> "PreprocessingError" *)
  62 + | LexiconError -> "LexiconError"
  63 + | ParseError -> "ParseError"
  64 + | ParseTimeout -> "ParseTimeout"
  65 + | NotParsed -> "NotParsed"
  66 + | ReductionError -> "ReductionError"
  67 + | TooManyNodes -> "TooManyNodes"
  68 + | NotReduced -> "NotReduced"
  69 + | SemError -> "SemError"
  70 + (* | NotTranslated -> "NotTranslated" *)
  71 + | Parsed -> ignore ("Parsed\n\t\t" ^ (field_of_dependency_tree eniam fields result.dependency_tree)); "Parsed\n"
  72 + | _ -> failwith "field_of_eniam_sentence"
  73 +
  74 +let field_of_conll_sentence fields tokens (result : conll_parse_result) =
  75 + match result.status with
  76 + Idle -> "Idle"
  77 + (* | PreprocessingError -> "PreprocessingError" *)
  78 + | LexiconError -> "LexiconError"
  79 + | ParseError -> "ParseError"
  80 + | ParseTimeout -> "ParseTimeout"
  81 + | NotParsed -> "NotParsed"
  82 + | ReductionError -> "ReductionError"
  83 + | TooManyNodes -> "TooManyNodes"
  84 + | NotReduced -> "NotReduced"
  85 + | SemError -> "SemError"
  86 + (* | NotTranslated -> "NotTranslated" *)
  87 + | Parsed -> ignore ("Parsed\n\t\t" ^ (field_of_dependency_tree conll fields result.dependency_tree)); "Parsed\n"
  88 + | _ -> failwith "field_of_conll_sentence"
  89 +
  90 +
  91 +let rec field_of_sentence fields tokens = function
  92 + RawSentence s -> s
  93 + | StructSentence(_,paths,last) -> "StructSentence"
  94 + | DepSentence(_,paths) -> "DepSentence"
  95 + | ENIAMSentence result -> field_of_eniam_sentence fields tokens result
  96 + | CONLLSentence result -> field_of_conll_sentence fields tokens result
  97 + | QuotedSentences sentences -> "QuotedSentences"
  98 + | AltSentence l -> String.concat "\n\t" (Xlist.map l (fun (m, s) ->
  99 + Visualization.string_of_mode m ^ "\t" ^ (field_of_sentence fields tokens s)))
  100 + (* | _ -> failwith "field_of_sentence: ni" *)
  101 +
  102 +let rec field_of_paragraph fields tokens = function
  103 + RawParagraph s -> print_endline "no fields detected: only raw paragraph"; s
  104 + | StructParagraph sentences ->
  105 + String.concat "\n\t" (Xlist.map sentences (fun p -> field_of_sentence fields tokens p.psentence))
  106 + | AltParagraph l ->
  107 + String.concat "\n" (Xlist.map (List.filter (fun (m,t) -> m = ENIAM || m = CONLL) l) (fun (m,t) ->
  108 + Visualization.string_of_mode m ^ "\n\t" ^ (field_of_paragraph fields tokens t)))
  109 + (* field_of_paragraph fields tokens (snd @@ List.find (fun (mode,text) -> mode = ENIAM || mode = CONLL) l) *)
  110 +
  111 +let rec print_fields_rec fields = function
  112 + RawText s -> print_endline "no fields detected: only raw text";
  113 +| StructText(paragraphs,tokens) ->
  114 + print_endline (String.concat "\n\n" (Xlist.map paragraphs (field_of_paragraph fields tokens)) ^ "\n")
  115 +| AltText l ->
  116 + print_fields_rec fields (snd @@ List.find (fun (m,t) -> m = Struct || m = ENIAM || m = CONLL) l)
  117 +
  118 +let print_fields fields text =
  119 + print_fields_rec fields text;
  120 + print_field_map ()
... ...
parser/exec.ml
... ... @@ -198,10 +198,10 @@ let conll_parse_sentence timeout test_only_flag id paths tokens =
198 198 let result = {result with lex_time=time3 -. time2} in
199 199 try
200 200 (* print_endline "conll_parse_sentence 1"; *)
201   - LCGlatexOf.print_references "results/" "references1" references;
  201 + (* LCGlatexOf.print_references "results/" "references1" references; *)
202 202 let parsed_dep_chart = LCGchart.dep_parse dep_chart references timeout time_fun in (* uwaga: niejawna zmiana imperatywna w references *)
203 203 (* print_endline "conll_parse_sentence 2"; *)
204   - LCGlatexOf.print_references "results/" "references2" references;
  204 + (*LCGlatexOf.print_references "results/" "references2" references;*)
205 205 let time4 = time_fun () in
206 206 let result = if test_only_flag then result else {result with parsed_dep_chart=parsed_dep_chart} in
207 207 let result = {result with parse_time=time4 -. time3} in
... ... @@ -324,21 +324,21 @@ let rec extract_query_text = function
324 324 | _ -> failwith "extract_query_text"
325 325  
326 326 let process_query pre_in pre_out timeout test_only_flag id full_query max_n =
327   - print_endline "process_query 0";
  327 + (* print_endline "process_query 0"; *)
328 328 let result = {empty_result with input_text=translate_text full_query} in
329 329 let time1 = time_fun () in
330   - print_endline "process_query 1";
  330 + (* print_endline "process_query 1"; *)
331 331 Marshal.to_channel pre_out full_query [];
332 332 flush pre_out;
333   - print_endline "process_query 2";
  333 + (* print_endline "process_query 2"; *)
334 334 let pre_text,msg,pre_time1 = (Marshal.from_channel pre_in : PreTypes.text * string * float) in
335 335 let time2 = time_fun () in
336 336 let result = if test_only_flag then result else {result with pre_text=translate_text pre_text} in
337 337 let result = {result with pre_time1=pre_time1; pre_time2=time2 -. time1} in
338 338 if msg <> "" then {result with status=PreprocessingError; msg=msg} else (
339   - print_endline "process_query 3";
  339 + (* print_endline "process_query 3"; *)
340 340 let parsed_text = parse_text timeout test_only_flag Struct (translate_text pre_text) in
341   - print_endline "process_query 4";
  341 + (* print_endline "process_query 4"; *)
342 342 let time3 = time_fun () in
343 343 let result = if test_only_flag then result else {result with status=Parsed; parsed_text=parsed_text} in
344 344 let result = {result with parse_time=time3 -. time2} in
... ...
parser/makefile
1 1 OCAMLC=ocamlc
2 2 OCAMLOPT=ocamlopt
3 3 OCAMLDEP=ocamldep
4   -INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../../../Dropbox/lib/latexvis -I ../../installed/latexvis -I ../lib/xt -I ../../../Dropbox/Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre -I ../corpora
  4 +INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../../../Dropbox/lib/latexvis -I ../../installed/latexvis -I ../lib/xt -I ../../../Dropbox/Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre -I ../corpora -I ../diagnostics
5 5 #INCLUDES=-I +xml-light -I +xlib -I ../pre
6 6 OCAMLFLAGS=$(INCLUDES) -g
7 7 OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa latexvis.cmxa #nkjp.cmxa
... ... @@ -13,7 +13,7 @@ LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexO
13 13 DISAMB= disambSelPref.ml disambLemma.ml
14 14 SEM= semGraph.ml semTypes.ml semStringOf.ml semLatexOf.ml semMmlOf.ml semMrl.ml
15 15 #SEM= semGraph.ml semTypes.ml semStringOf.ml semMmlOf.ml semMrl.ml
16   -EXEC= execTypes.ml visualization.ml exec.ml
  16 +EXEC= execTypes.ml visualization.ml exec.ml ../diagnostics/LCGfields.ml
17 17  
18 18 all:
19 19 $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml
... ...
parser/pipe.ml
... ... @@ -187,19 +187,20 @@ let process_id s =
187 187 let process_conll_corpus filename =
188 188 let corpus = File.file_in filename (fun file -> CONLL.match_corpus (CONLL.load_corpus file)) in
189 189 print_endline "process_conll_corpus";
190   - let corpus = [List.hd corpus] in
  190 + (* let corpus = [List.hd corpus] in *)
191 191 let ic,oc = Unix.open_connection (get_sock_addr Paths.pre_host Paths.pre_port) in
192 192 Xlist.iter corpus (fun query ->
193 193 let id = process_id (get_query_id query) in
194 194 let path = "results/" ^ id ^ "/" in
195 195 ignore (Sys.command ("mkdir -p " ^ path));
196 196 let result = Exec.process_query ic oc 30. false "x" query 10 in
197   - Visualization.print_html_text path "input_text" result.input_text;
198   - Visualization.print_html_text path "pre_text" result.pre_text;
199   - Visualization.print_html_text path "parsed_text" result.parsed_text;
  197 + (* Visualization.print_html_text path "input_text" result.input_text;
  198 + Visualization.print_html_text path "pre_text" result.pre_text; *)
  199 + (* Visualization.print_html_text path "parsed_text" result.parsed_text; *)
200 200 (* printf "input_text:\n%s\n" (Visualization.string_of_text result.input_text);
201 201 printf "pre_text:\n%s\n" (Visualization.string_of_text result.pre_text); *)
202 202 (* Exec.print_result stdout result; *)
  203 + LCGfields.print_fields ["arole"] result.parsed_text;
203 204 (* Visualization.print_paths "results/" "paths" result.paths; *)
204 205 ());
205 206 Marshal.to_channel oc (PreTypes.RawText "") [];
... ...
parser/visualization.ml
... ... @@ -78,7 +78,7 @@ let string_of_status = function
78 78 | ExecTypes.ParseTimeout -> "timeout"
79 79 | ExecTypes.NotParsed -> "not_parsed"
80 80 | ExecTypes.ReductionError -> "error_reduction"
81   - | ExecTypes.TooManyNodes -> "to_many_nodes"
  81 + | ExecTypes.TooManyNodes -> "too_many_nodes"
82 82 | ExecTypes.NotReduced -> "not_reduced"
83 83 | ExecTypes.SemError -> "error_sem"
84 84 | ExecTypes.NotTranslated -> "not_translated"
... ... @@ -811,15 +811,15 @@ let html_of_conll_sentence path tokens (result : conll_parse_result) =
811 811  
812 812  
813 813 let rec html_of_sentence path tokens = function
814   - RawSentence s -> s
815   - | StructSentence(_,paths,last) -> html_of_struct_sentence tokens paths last
816   - | DepSentence(_,paths) -> html_of_dep_sentence tokens paths
817   - | ENIAMSentence result -> html_of_eniam_sentence path tokens result
818   - | CONLLSentence result -> html_of_conll_sentence path tokens result
819   - | QuotedSentences sentences ->
  814 + RawSentence s -> print_endline "RawSentence"; s
  815 + | StructSentence(_,paths,last) -> print_endline "StructSentence"; html_of_struct_sentence tokens paths last
  816 + | DepSentence(_,paths) -> print_endline "DepSentence"; html_of_dep_sentence tokens paths
  817 + | ENIAMSentence result -> print_endline "ENIAMSentence"; html_of_eniam_sentence path tokens result
  818 + | CONLLSentence result -> print_endline "CONLLSentence"; html_of_conll_sentence path tokens result
  819 + | QuotedSentences sentences -> print_endline "QuotedSentences";
820 820 String.concat "<BR>\n" (Xlist.map sentences (fun p ->
821 821 sprintf "pid=%s pbeg=%d plen=%d pnext=%d<BR>%s" p.pid p.pbeg p.plen p.pnext (html_of_sentence path tokens p.psentence)))
822   - | AltSentence l ->
  822 + | AltSentence l -> print_endline "AltSentence";
823 823 "<table border=1>" ^
824 824 String.concat "\n" (Xlist.map l (fun (mode,sentence) ->
825 825 sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_sentence path tokens sentence))) ^
... ... @@ -827,11 +827,11 @@ let rec html_of_sentence path tokens = function
827 827 (* | _ -> failwith "html_of_sentence: ni" *)
828 828  
829 829 let rec html_of_paragraph path tokens = function
830   - RawParagraph s -> s
831   - | StructParagraph sentences ->
  830 + RawParagraph s -> print_endline "RawParagraph"; s
  831 + | StructParagraph sentences -> print_endline "StructParagraph";
832 832 String.concat "<BR>\n" (Xlist.map sentences (fun p ->
833 833 sprintf "pid=%s pbeg=%d plen=%d pnext=%d<BR>%s" p.pid p.pbeg p.plen p.pnext (html_of_sentence path tokens p.psentence)))
834   - | AltParagraph l ->
  834 + | AltParagraph l -> print_endline "AltParagraph";
835 835 "<table border=2>" ^
836 836 String.concat "\n" (Xlist.map l (fun (mode,paragraph) ->
837 837 sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_paragraph path tokens paragraph))) ^
... ...