From dd414d0bbf02bf628d6c8953d9dd524c8e190721 Mon Sep 17 00:00:00 2001
From: Wojciech Jaworski <wjaworski@mimuw.edu.pl>
Date: Sun, 12 Nov 2017 00:30:11 +0100
Subject: [PATCH] poprawa wizualizacji tablicy parsera

---
 LCGlexicon/ENIAM_LCGlexiconParser.ml |  5 ++++-
 LCGparser/ENIAM_LCGchart.ml          | 14 ++++++++++++++
 LCGparser/ENIAM_LCGlatexOf.ml        | 12 ++++++++++++
 LCGparser/ENIAM_LCGrenderer.ml       |  2 +-
 exec/ENIAMvisualization.ml           | 15 +++++++++------
 subsyntax/ENIAMpaths.ml              |  7 ++++---
 subsyntax/ENIAMsubsyntax.ml          |  2 +-
 subsyntax/ENIAMsubsyntaxTypes.ml     |  2 ++
 subsyntax/interface.ml               |  2 ++
 tokenizer/ENIAMtokens.ml             |  3 ++-
 10 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/LCGlexicon/ENIAM_LCGlexiconParser.ml b/LCGlexicon/ENIAM_LCGlexiconParser.ml
index 04e639c..479832a 100644
--- a/LCGlexicon/ENIAM_LCGlexiconParser.ml
+++ b/LCGlexicon/ENIAM_LCGlexiconParser.ml
@@ -212,7 +212,10 @@ let rec find_mult_imp = function
   | [] -> []
 
 let rec find_mult = function
-    A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: l -> failwith "find_mult 1"
+    A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "," :: D(s10,t10) :: A "," :: D _ :: l -> failwith "find_mult 1: to many elements in { }"
+  | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "," :: D(s10,t10) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8;s9,t9;s10,t10] :: find_mult l
+  | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8;s9,t9] :: find_mult l
+  | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8] :: find_mult l
   | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7] :: find_mult l
   | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6] :: find_mult l
   | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5] :: find_mult l
diff --git a/LCGparser/ENIAM_LCGchart.ml b/LCGparser/ENIAM_LCGchart.ml
index 3e82a09..00bdf0c 100644
--- a/LCGparser/ENIAM_LCGchart.ml
+++ b/LCGparser/ENIAM_LCGchart.ml
@@ -289,5 +289,19 @@ let merge chart =
   let paths = select_best_paths a.(n) in
   add_inc chart 0 n (make_root_symbol paths) 0
 
+let select_maximal chart =
+  let last = last_node chart in
+  let a = Array.make last (-1,[],-1) in
+  let _ = fold chart () (fun chart (symbol,i,j,sem,layer) ->
+    let j0,l,_ = a.(i) in
+    if j > j0 then a.(i) <- j,[symbol,sem],layer else
+    if j < j0 then () else
+    a.(i) <- j,(symbol,sem) :: l,layer) in
+  let chart = make last in
+  snd (Int.fold 0 (last-1) (-1,chart) (fun (j0,chart) i ->
+    let j,l,layer = a.(i) in
+    if j <= j0 then j0,chart else
+    j,add_list chart i j l layer))
+
 (*FIXME:  Bębni na maszynie do pisania.
           Na myśl o czym brykasz?*)
diff --git a/LCGparser/ENIAM_LCGlatexOf.ml b/LCGparser/ENIAM_LCGlatexOf.ml
index 3c8ef7b..ad781d5 100644
--- a/LCGparser/ENIAM_LCGlatexOf.ml
+++ b/LCGparser/ENIAM_LCGlatexOf.ml
@@ -215,11 +215,23 @@ let chart page text_fragments g =
               (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$ & $%s$\\\\\n\\hline\n" layer node1 node2 s symbol sem) :: l))))) ^
   "\\end{longtable}"
 
+let chart2 page text_fragments g =
+  let n = match page with "a4" -> "10" | "a1" -> "40" | _ -> "20" in
+  "\\begin{longtable}{|l|l|l|l|p{" ^ n ^ "cm}|}\n\\hline\n" ^
+  String.concat "" (List.rev (ENIAM_LCGchart.fold g [] (fun l (symbol,node1,node2,sem,layer) ->
+      let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in
+      (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$\\\\\n\\hline\n" layer node1 node2 s (grammar_symbol 0 symbol)) :: l))) ^
+  "\\end{longtable}"
+
 let print_chart path name page text_fragments g =
   Xlatex.latex_file_out path name page false (fun file ->
       Printf.fprintf file "%s\n" (chart page text_fragments g));
   Xlatex.latex_compile_and_clean path name
 
+let print_chart2 path name page text_fragments g =
+  Xlatex.latex_file_out path name page false (fun file ->
+      Printf.fprintf file "%s\n" (chart2 page text_fragments g));
+  Xlatex.latex_compile_and_clean path name
 
 let table_entries_of_symbol_term_list l =
   String.concat "" (Xlist.rev_map l (fun (symbol,sem) ->
diff --git a/LCGparser/ENIAM_LCGrenderer.ml b/LCGparser/ENIAM_LCGrenderer.ml
index ca92e50..c0d7135 100644
--- a/LCGparser/ENIAM_LCGrenderer.ml
+++ b/LCGparser/ENIAM_LCGrenderer.ml
@@ -143,7 +143,7 @@ let rec make_term_arg dir = function
     let v,arg = make_term_arg dir s in
     let w = get_variable_name () in
     w, Fix(Var w,Lambda(v,arg))
-  | _ -> failwith "make_term_arg"
+  | c -> failwith ("make_term_arg: " ^ ENIAM_LCGstringOf.grammar_symbol_prime c)
 
 let add_args node args =
   {node with args=Tuple(node.args :: args)}
diff --git a/exec/ENIAMvisualization.ml b/exec/ENIAMvisualization.ml
index 4471d1f..c0640ab 100644
--- a/exec/ENIAMvisualization.ml
+++ b/exec/ENIAMvisualization.ml
@@ -779,6 +779,7 @@ let create_latex_dep_chart path name dep_chart =
   LatexMain.latex_compile_and_clean path name
 *)
 
+
 (* verbosity:
   0 -> jedynie informacja o statusie zdania
   1 -> zawartość struktur danych istotnych dla uzyskanego statusu
@@ -814,23 +815,25 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
         sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix) ^
       ""
   | NotParsed ->
-      if verbosity = 0 then () else (
+      if verbosity < 2 then () else (
         ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.text_fragments result.chart1);
       if verbosity < 2 then () else (
         ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.text_fragments result.chart2;
         ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2;
-        ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3);
-      if verbosity = 0 then () else (
+        ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3;
         ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.text_fragments result.chart3);
+      if verbosity = 0 then () else (
+        ENIAM_LCGlatexOf.print_chart2 path (file_prefix ^ "_3_chart_selection") "a4" result.text_fragments (ENIAM_LCGchart.select_maximal result.chart3));
       sprintf "not_parsed: paths_size=%d chart_size=%d\n" result.paths_size result.chart_size ^
-      (if verbosity = 0 then "" else
+      (if verbosity < 2 then "" else
         sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix) ^
       (if verbosity < 2 then "" else
         sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix ^
         sprintf "<BR><A HREF=\"%s_2_references.pdf\">References 2</A>\n" file_prefix ^
-        sprintf "<BR><A HREF=\"%s_3_references.pdf\">References 3</A>\n" file_prefix) ^
-      (if verbosity = 0 then "" else
+        sprintf "<BR><A HREF=\"%s_3_references.pdf\">References 3</A>\n" file_prefix ^
         sprintf "<BR><A HREF=\"%s_3_chart.pdf\">Chart 3</A>\n" file_prefix) ^
+      (if verbosity = 0 then "" else
+        sprintf "<BR><A HREF=\"%s_3_chart_selection.pdf\">Chart 3 Selection</A>\n" file_prefix) ^
       ""
   | ReductionError ->
       if verbosity < 2 then () else (
diff --git a/subsyntax/ENIAMpaths.ml b/subsyntax/ENIAMpaths.ml
index 3aba056..13a3692 100644
--- a/subsyntax/ENIAMpaths.ml
+++ b/subsyntax/ENIAMpaths.ml
@@ -260,9 +260,10 @@ let merge_lemmata l =
       (lemma,cat,interp,quantity,status) :: l))*)
 
 let merge_lemmata l =
-  (* let vl,nvl = Xlist.fold l ([],[]) (fun (vl,nvl) t ->
-    if t.ENIAMinflexion.status = ENIAMinflexion.LemmaVal || t.ENIAMinflexion.status = ENIAMinflexion.LemmaAlt then t :: vl,nvl else vl,t :: nvl) in
-  let l = if vl = [] then nvl else vl in *) (* to wycina potrzebne interpretacje *)
+  let l = if !ENIAMsubsyntaxTypes.strong_disambiguate_flag then
+    let vl,nvl = Xlist.fold l ([],[]) (fun (vl,nvl) t ->
+      if t.ENIAMinflexion.status = ENIAMinflexion.LemmaVal || t.ENIAMinflexion.status = ENIAMinflexion.LemmaAlt then t :: vl,nvl else vl,t :: nvl) in
+    if vl = [] then nvl else vl else l in (* to wycina potrzebne interpretacje *)
   (* FIXME: excluded_interps, transformed_interps, num:comp *)
   let l = Xlist.rev_map l (fun t ->
     t.ENIAMinflexion.lemma,
diff --git a/subsyntax/ENIAMsubsyntax.ml b/subsyntax/ENIAMsubsyntax.ml
index bdd38ec..0a723fb 100644
--- a/subsyntax/ENIAMsubsyntax.ml
+++ b/subsyntax/ENIAMsubsyntax.ml
@@ -343,7 +343,7 @@ let parse query =
   let paths = select_tokens paths in
   (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a17"; *)
   (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *)
-  (* let paths = select_tokens2 paths in *) (* Ta procedura wycina potrzebne tokeny *)
+  let paths = if !strong_disambiguate_flag then select_tokens2 paths else paths in (* Ta procedura wycina potrzebne tokeny *)
   let paths = Xlist.sort paths ENIAMpaths.compare_token_record in
   (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a18"; *)
   (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *)
diff --git a/subsyntax/ENIAMsubsyntaxTypes.ml b/subsyntax/ENIAMsubsyntaxTypes.ml
index 8d811ad..445e4ad 100644
--- a/subsyntax/ENIAMsubsyntaxTypes.ml
+++ b/subsyntax/ENIAMsubsyntaxTypes.ml
@@ -44,6 +44,8 @@ type text =
   | StructText of paragraph list (* * token_record ExtArray.t*) (* akapity * tokeny *)
   | AltText of (mode * text) list
 
+let strong_disambiguate_flag = ref false
+
 let data_path =
   try Sys.getenv "ENIAM_USER_DATA_PATH"
   with Not_found -> "data"
diff --git a/subsyntax/interface.ml b/subsyntax/interface.ml
index 868e066..3fec291 100644
--- a/subsyntax/interface.ml
+++ b/subsyntax/interface.ml
@@ -34,6 +34,8 @@ let spec_list = [
   "-m", Arg.Unit (fun () -> output:=Marsh), "Output as marshalled Ocaml data structure";
   "-h", Arg.Unit (fun () -> output:=Html), "Output as HTML";
   "-g", Arg.Unit (fun () -> output:=Graphviz; sentence_split:=false), "Output as graphviz dot file; turns sentence split off";
+  "--strong-disamb", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.strong_disambiguate_flag:=true), "Perform strong disambiguation";
+  "--no-strong-disamb", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.strong_disambiguate_flag:=false), "Do not perform strong disambiguation (default)";
   (* "-r", Arg.String (fun p ->
         ENIAMtokenizerTypes.set_resource_path p;
         ENIAMmorphologyTypes.set_resource_path p;
diff --git a/tokenizer/ENIAMtokens.ml b/tokenizer/ENIAMtokens.ml
index f4b47a9..447d6d8 100644
--- a/tokenizer/ENIAMtokens.ml
+++ b/tokenizer/ENIAMtokens.ml
@@ -817,9 +817,10 @@ let rec recognize_sign_group poss_s_beg i = function
             Token{empty_token_env with beg=i+factor-30;len=10;next=i+factor-20;token=Interp "</sentence>"};
             Token{empty_token_env with beg=i+factor-20;len=10;next=i+factor-10;token=Interp "<sentence>"};
             Token{empty_token_env with beg=i+factor-10;len=10;next=i+factor;token=Interp "<clause>"}];
-         Seq[Token{empty_token_env with beg=i;len=10;next=i+10;token=Interp "</clause>"};
+          Seq[Token{empty_token_env with beg=i;len=10;next=i+10;token=Interp "</clause>"};
             Token{empty_token_env with beg=i+10;len=10;next=i+20;token=Interp "</sentence>"};
             Token{empty_token_env with orth=":";beg=i+20;len=factor-20;next=i+factor;token=Interp ":s"}]; (* speaker *)
+          Token{empty_token_env with orth=":";beg=i;len=factor;next=i+factor;token=Interp ":"}; (* np. w frazie "usługę: wizyta" *)
           ],i+factor,l,true
 (*      if is_colon_sentence_end_marker l then
         Seq[Token{empty_token_env with beg=i;len=10;next=i+10;token=Interp "</clause>"};
--
libgit2 0.22.2