Commit dd414d0bbf02bf628d6c8953d9dd524c8e190721

Authored by Wojciech Jaworski
1 parent f909ff80

poprawa wizualizacji tablicy parsera

LCGlexicon/ENIAM_LCGlexiconParser.ml
... ... @@ -212,7 +212,10 @@ let rec find_mult_imp = function
212 212 | [] -> []
213 213  
214 214 let rec find_mult = function
215   - A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: l -> failwith "find_mult 1"
  215 + A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "," :: D(s10,t10) :: A "," :: D _ :: l -> failwith "find_mult 1: to many elements in { }"
  216 + | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "," :: D(s10,t10) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8;s9,t9;s10,t10] :: find_mult l
  217 + | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8;s9,t9] :: find_mult l
  218 + | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8] :: find_mult l
216 219 | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7] :: find_mult l
217 220 | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6] :: find_mult l
218 221 | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5] :: find_mult l
... ...
LCGparser/ENIAM_LCGchart.ml
... ... @@ -289,5 +289,19 @@ let merge chart =
289 289 let paths = select_best_paths a.(n) in
290 290 add_inc chart 0 n (make_root_symbol paths) 0
291 291  
  292 +let select_maximal chart =
  293 + let last = last_node chart in
  294 + let a = Array.make last (-1,[],-1) in
  295 + let _ = fold chart () (fun chart (symbol,i,j,sem,layer) ->
  296 + let j0,l,_ = a.(i) in
  297 + if j > j0 then a.(i) <- j,[symbol,sem],layer else
  298 + if j < j0 then () else
  299 + a.(i) <- j,(symbol,sem) :: l,layer) in
  300 + let chart = make last in
  301 + snd (Int.fold 0 (last-1) (-1,chart) (fun (j0,chart) i ->
  302 + let j,l,layer = a.(i) in
  303 + if j <= j0 then j0,chart else
  304 + j,add_list chart i j l layer))
  305 +
292 306 (*FIXME: Bębni na maszynie do pisania.
293 307 Na myśl o czym brykasz?*)
... ...
LCGparser/ENIAM_LCGlatexOf.ml
... ... @@ -215,11 +215,23 @@ let chart page text_fragments g =
215 215 (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$ & $%s$\\\\\n\\hline\n" layer node1 node2 s symbol sem) :: l))))) ^
216 216 "\\end{longtable}"
217 217  
  218 +let chart2 page text_fragments g =
  219 + let n = match page with "a4" -> "10" | "a1" -> "40" | _ -> "20" in
  220 + "\\begin{longtable}{|l|l|l|l|p{" ^ n ^ "cm}|}\n\\hline\n" ^
  221 + String.concat "" (List.rev (ENIAM_LCGchart.fold g [] (fun l (symbol,node1,node2,sem,layer) ->
  222 + let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in
  223 + (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$\\\\\n\\hline\n" layer node1 node2 s (grammar_symbol 0 symbol)) :: l))) ^
  224 + "\\end{longtable}"
  225 +
218 226 let print_chart path name page text_fragments g =
219 227 Xlatex.latex_file_out path name page false (fun file ->
220 228 Printf.fprintf file "%s\n" (chart page text_fragments g));
221 229 Xlatex.latex_compile_and_clean path name
222 230  
  231 +let print_chart2 path name page text_fragments g =
  232 + Xlatex.latex_file_out path name page false (fun file ->
  233 + Printf.fprintf file "%s\n" (chart2 page text_fragments g));
  234 + Xlatex.latex_compile_and_clean path name
223 235  
224 236 let table_entries_of_symbol_term_list l =
225 237 String.concat "" (Xlist.rev_map l (fun (symbol,sem) ->
... ...
LCGparser/ENIAM_LCGrenderer.ml
... ... @@ -143,7 +143,7 @@ let rec make_term_arg dir = function
143 143 let v,arg = make_term_arg dir s in
144 144 let w = get_variable_name () in
145 145 w, Fix(Var w,Lambda(v,arg))
146   - | _ -> failwith "make_term_arg"
  146 + | c -> failwith ("make_term_arg: " ^ ENIAM_LCGstringOf.grammar_symbol_prime c)
147 147  
148 148 let add_args node args =
149 149 {node with args=Tuple(node.args :: args)}
... ...
exec/ENIAMvisualization.ml
... ... @@ -779,6 +779,7 @@ let create_latex_dep_chart path name dep_chart =
779 779 LatexMain.latex_compile_and_clean path name
780 780 *)
781 781  
  782 +
782 783 (* verbosity:
783 784 0 -> jedynie informacja o statusie zdania
784 785 1 -> zawartość struktur danych istotnych dla uzyskanego statusu
... ... @@ -814,23 +815,25 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
814 815 sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix) ^
815 816 ""
816 817 | NotParsed ->
817   - if verbosity = 0 then () else (
  818 + if verbosity < 2 then () else (
818 819 ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.text_fragments result.chart1);
819 820 if verbosity < 2 then () else (
820 821 ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.text_fragments result.chart2;
821 822 ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2;
822   - ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3);
823   - if verbosity = 0 then () else (
  823 + ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3;
824 824 ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.text_fragments result.chart3);
  825 + if verbosity = 0 then () else (
  826 + ENIAM_LCGlatexOf.print_chart2 path (file_prefix ^ "_3_chart_selection") "a4" result.text_fragments (ENIAM_LCGchart.select_maximal result.chart3));
825 827 sprintf "not_parsed: paths_size=%d chart_size=%d\n" result.paths_size result.chart_size ^
826   - (if verbosity = 0 then "" else
  828 + (if verbosity < 2 then "" else
827 829 sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix) ^
828 830 (if verbosity < 2 then "" else
829 831 sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix ^
830 832 sprintf "<BR><A HREF=\"%s_2_references.pdf\">References 2</A>\n" file_prefix ^
831   - sprintf "<BR><A HREF=\"%s_3_references.pdf\">References 3</A>\n" file_prefix) ^
832   - (if verbosity = 0 then "" else
  833 + sprintf "<BR><A HREF=\"%s_3_references.pdf\">References 3</A>\n" file_prefix ^
833 834 sprintf "<BR><A HREF=\"%s_3_chart.pdf\">Chart 3</A>\n" file_prefix) ^
  835 + (if verbosity = 0 then "" else
  836 + sprintf "<BR><A HREF=\"%s_3_chart_selection.pdf\">Chart 3 Selection</A>\n" file_prefix) ^
834 837 ""
835 838 | ReductionError ->
836 839 if verbosity < 2 then () else (
... ...
subsyntax/ENIAMpaths.ml
... ... @@ -260,9 +260,10 @@ let merge_lemmata l =
260 260 (lemma,cat,interp,quantity,status) :: l))*)
261 261  
262 262 let merge_lemmata l =
263   - (* let vl,nvl = Xlist.fold l ([],[]) (fun (vl,nvl) t ->
264   - if t.ENIAMinflexion.status = ENIAMinflexion.LemmaVal || t.ENIAMinflexion.status = ENIAMinflexion.LemmaAlt then t :: vl,nvl else vl,t :: nvl) in
265   - let l = if vl = [] then nvl else vl in *) (* to wycina potrzebne interpretacje *)
  263 + let l = if !ENIAMsubsyntaxTypes.strong_disambiguate_flag then
  264 + let vl,nvl = Xlist.fold l ([],[]) (fun (vl,nvl) t ->
  265 + if t.ENIAMinflexion.status = ENIAMinflexion.LemmaVal || t.ENIAMinflexion.status = ENIAMinflexion.LemmaAlt then t :: vl,nvl else vl,t :: nvl) in
  266 + if vl = [] then nvl else vl else l in (* to wycina potrzebne interpretacje *)
266 267 (* FIXME: excluded_interps, transformed_interps, num:comp *)
267 268 let l = Xlist.rev_map l (fun t ->
268 269 t.ENIAMinflexion.lemma,
... ...
subsyntax/ENIAMsubsyntax.ml
... ... @@ -343,7 +343,7 @@ let parse query =
343 343 let paths = select_tokens paths in
344 344 (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a17"; *)
345 345 (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *)
346   - (* let paths = select_tokens2 paths in *) (* Ta procedura wycina potrzebne tokeny *)
  346 + let paths = if !strong_disambiguate_flag then select_tokens2 paths else paths in (* Ta procedura wycina potrzebne tokeny *)
347 347 let paths = Xlist.sort paths ENIAMpaths.compare_token_record in
348 348 (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a18"; *)
349 349 (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *)
... ...
subsyntax/ENIAMsubsyntaxTypes.ml
... ... @@ -44,6 +44,8 @@ type text =
44 44 | StructText of paragraph list (* * token_record ExtArray.t*) (* akapity * tokeny *)
45 45 | AltText of (mode * text) list
46 46  
  47 +let strong_disambiguate_flag = ref false
  48 +
47 49 let data_path =
48 50 try Sys.getenv "ENIAM_USER_DATA_PATH"
49 51 with Not_found -> "data"
... ...
subsyntax/interface.ml
... ... @@ -34,6 +34,8 @@ let spec_list = [
34 34 "-m", Arg.Unit (fun () -> output:=Marsh), "Output as marshalled Ocaml data structure";
35 35 "-h", Arg.Unit (fun () -> output:=Html), "Output as HTML";
36 36 "-g", Arg.Unit (fun () -> output:=Graphviz; sentence_split:=false), "Output as graphviz dot file; turns sentence split off";
  37 + "--strong-disamb", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.strong_disambiguate_flag:=true), "Perform strong disambiguation";
  38 + "--no-strong-disamb", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.strong_disambiguate_flag:=false), "Do not perform strong disambiguation (default)";
37 39 (* "-r", Arg.String (fun p ->
38 40 ENIAMtokenizerTypes.set_resource_path p;
39 41 ENIAMmorphologyTypes.set_resource_path p;
... ...
tokenizer/ENIAMtokens.ml
... ... @@ -817,9 +817,10 @@ let rec recognize_sign_group poss_s_beg i = function
817 817 Token{empty_token_env with beg=i+factor-30;len=10;next=i+factor-20;token=Interp "</sentence>"};
818 818 Token{empty_token_env with beg=i+factor-20;len=10;next=i+factor-10;token=Interp "<sentence>"};
819 819 Token{empty_token_env with beg=i+factor-10;len=10;next=i+factor;token=Interp "<clause>"}];
820   - Seq[Token{empty_token_env with beg=i;len=10;next=i+10;token=Interp "</clause>"};
  820 + Seq[Token{empty_token_env with beg=i;len=10;next=i+10;token=Interp "</clause>"};
821 821 Token{empty_token_env with beg=i+10;len=10;next=i+20;token=Interp "</sentence>"};
822 822 Token{empty_token_env with orth=":";beg=i+20;len=factor-20;next=i+factor;token=Interp ":s"}]; (* speaker *)
  823 + Token{empty_token_env with orth=":";beg=i;len=factor;next=i+factor;token=Interp ":"}; (* np. w frazie "usługę: wizyta" *)
823 824 ],i+factor,l,true
824 825 (* if is_colon_sentence_end_marker l then
825 826 Seq[Token{empty_token_env with beg=i;len=10;next=i+10;token=Interp "</clause>"};
... ...