diff --git a/LCGlexicon/ENIAM_LCGlexiconParser.ml b/LCGlexicon/ENIAM_LCGlexiconParser.ml
index 04e639c..479832a 100644
--- a/LCGlexicon/ENIAM_LCGlexiconParser.ml
+++ b/LCGlexicon/ENIAM_LCGlexiconParser.ml
@@ -212,7 +212,10 @@ let rec find_mult_imp = function
| [] -> []
let rec find_mult = function
- A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: l -> failwith "find_mult 1"
+ A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "," :: D(s10,t10) :: A "," :: D _ :: l -> failwith "find_mult 1: to many elements in { }"
+ | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "," :: D(s10,t10) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8;s9,t9;s10,t10] :: find_mult l
+ | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8;s9,t9] :: find_mult l
+ | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8] :: find_mult l
| A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7] :: find_mult l
| A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6] :: find_mult l
| A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5] :: find_mult l
diff --git a/LCGparser/ENIAM_LCGchart.ml b/LCGparser/ENIAM_LCGchart.ml
index 3e82a09..00bdf0c 100644
--- a/LCGparser/ENIAM_LCGchart.ml
+++ b/LCGparser/ENIAM_LCGchart.ml
@@ -289,5 +289,19 @@ let merge chart =
let paths = select_best_paths a.(n) in
add_inc chart 0 n (make_root_symbol paths) 0
+let select_maximal chart =
+ let last = last_node chart in
+ let a = Array.make last (-1,[],-1) in
+ let _ = fold chart () (fun chart (symbol,i,j,sem,layer) ->
+ let j0,l,_ = a.(i) in
+ if j > j0 then a.(i) <- j,[symbol,sem],layer else
+ if j < j0 then () else
+ a.(i) <- j,(symbol,sem) :: l,layer) in
+ let chart = make last in
+ snd (Int.fold 0 (last-1) (-1,chart) (fun (j0,chart) i ->
+ let j,l,layer = a.(i) in
+ if j <= j0 then j0,chart else
+ j,add_list chart i j l layer))
+
(*FIXME: Bębni na maszynie do pisania.
Na myśl o czym brykasz?*)
diff --git a/LCGparser/ENIAM_LCGlatexOf.ml b/LCGparser/ENIAM_LCGlatexOf.ml
index 3c8ef7b..ad781d5 100644
--- a/LCGparser/ENIAM_LCGlatexOf.ml
+++ b/LCGparser/ENIAM_LCGlatexOf.ml
@@ -215,11 +215,23 @@ let chart page text_fragments g =
(Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$ & $%s$\\\\\n\\hline\n" layer node1 node2 s symbol sem) :: l))))) ^
"\\end{longtable}"
+let chart2 page text_fragments g =
+ let n = match page with "a4" -> "10" | "a1" -> "40" | _ -> "20" in
+ "\\begin{longtable}{|l|l|l|l|p{" ^ n ^ "cm}|}\n\\hline\n" ^
+ String.concat "" (List.rev (ENIAM_LCGchart.fold g [] (fun l (symbol,node1,node2,sem,layer) ->
+ let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in
+ (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$\\\\\n\\hline\n" layer node1 node2 s (grammar_symbol 0 symbol)) :: l))) ^
+ "\\end{longtable}"
+
let print_chart path name page text_fragments g =
Xlatex.latex_file_out path name page false (fun file ->
Printf.fprintf file "%s\n" (chart page text_fragments g));
Xlatex.latex_compile_and_clean path name
+let print_chart2 path name page text_fragments g =
+ Xlatex.latex_file_out path name page false (fun file ->
+ Printf.fprintf file "%s\n" (chart2 page text_fragments g));
+ Xlatex.latex_compile_and_clean path name
let table_entries_of_symbol_term_list l =
String.concat "" (Xlist.rev_map l (fun (symbol,sem) ->
diff --git a/LCGparser/ENIAM_LCGrenderer.ml b/LCGparser/ENIAM_LCGrenderer.ml
index ca92e50..c0d7135 100644
--- a/LCGparser/ENIAM_LCGrenderer.ml
+++ b/LCGparser/ENIAM_LCGrenderer.ml
@@ -143,7 +143,7 @@ let rec make_term_arg dir = function
let v,arg = make_term_arg dir s in
let w = get_variable_name () in
w, Fix(Var w,Lambda(v,arg))
- | _ -> failwith "make_term_arg"
+ | c -> failwith ("make_term_arg: " ^ ENIAM_LCGstringOf.grammar_symbol_prime c)
let add_args node args =
{node with args=Tuple(node.args :: args)}
diff --git a/exec/ENIAMvisualization.ml b/exec/ENIAMvisualization.ml
index 4471d1f..c0640ab 100644
--- a/exec/ENIAMvisualization.ml
+++ b/exec/ENIAMvisualization.ml
@@ -779,6 +779,7 @@ let create_latex_dep_chart path name dep_chart =
LatexMain.latex_compile_and_clean path name
*)
+
(* verbosity:
0 -> jedynie informacja o statusie zdania
1 -> zawartość struktur danych istotnych dla uzyskanego statusu
@@ -814,23 +815,25 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix) ^
""
| NotParsed ->
- if verbosity = 0 then () else (
+ if verbosity < 2 then () else (
ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.text_fragments result.chart1);
if verbosity < 2 then () else (
ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.text_fragments result.chart2;
ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2;
- ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3);
- if verbosity = 0 then () else (
+ ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3;
ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.text_fragments result.chart3);
+ if verbosity = 0 then () else (
+ ENIAM_LCGlatexOf.print_chart2 path (file_prefix ^ "_3_chart_selection") "a4" result.text_fragments (ENIAM_LCGchart.select_maximal result.chart3));
sprintf "not_parsed: paths_size=%d chart_size=%d\n" result.paths_size result.chart_size ^
- (if verbosity = 0 then "" else
+ (if verbosity < 2 then "" else
sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix) ^
(if verbosity < 2 then "" else
sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix ^
sprintf "<BR><A HREF=\"%s_2_references.pdf\">References 2</A>\n" file_prefix ^
- sprintf "<BR><A HREF=\"%s_3_references.pdf\">References 3</A>\n" file_prefix) ^
- (if verbosity = 0 then "" else
+ sprintf "<BR><A HREF=\"%s_3_references.pdf\">References 3</A>\n" file_prefix ^
sprintf "<BR><A HREF=\"%s_3_chart.pdf\">Chart 3</A>\n" file_prefix) ^
+ (if verbosity = 0 then "" else
+ sprintf "<BR><A HREF=\"%s_3_chart_selection.pdf\">Chart 3 Selection</A>\n" file_prefix) ^
""
| ReductionError ->
if verbosity < 2 then () else (
diff --git a/subsyntax/ENIAMpaths.ml b/subsyntax/ENIAMpaths.ml
index 3aba056..13a3692 100644
--- a/subsyntax/ENIAMpaths.ml
+++ b/subsyntax/ENIAMpaths.ml
@@ -260,9 +260,10 @@ let merge_lemmata l =
(lemma,cat,interp,quantity,status) :: l))*)
let merge_lemmata l =
- (* let vl,nvl = Xlist.fold l ([],[]) (fun (vl,nvl) t ->
- if t.ENIAMinflexion.status = ENIAMinflexion.LemmaVal || t.ENIAMinflexion.status = ENIAMinflexion.LemmaAlt then t :: vl,nvl else vl,t :: nvl) in
- let l = if vl = [] then nvl else vl in *) (* to wycina potrzebne interpretacje *)
+ let l = if !ENIAMsubsyntaxTypes.strong_disambiguate_flag then
+ let vl,nvl = Xlist.fold l ([],[]) (fun (vl,nvl) t ->
+ if t.ENIAMinflexion.status = ENIAMinflexion.LemmaVal || t.ENIAMinflexion.status = ENIAMinflexion.LemmaAlt then t :: vl,nvl else vl,t :: nvl) in
+ if vl = [] then nvl else vl else l in (* to wycina potrzebne interpretacje *)
(* FIXME: excluded_interps, transformed_interps, num:comp *)
let l = Xlist.rev_map l (fun t ->
t.ENIAMinflexion.lemma,
diff --git a/subsyntax/ENIAMsubsyntax.ml b/subsyntax/ENIAMsubsyntax.ml
index bdd38ec..0a723fb 100644
--- a/subsyntax/ENIAMsubsyntax.ml
+++ b/subsyntax/ENIAMsubsyntax.ml
@@ -343,7 +343,7 @@ let parse query =
let paths = select_tokens paths in
(* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a17"; *)
(* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *)
- (* let paths = select_tokens2 paths in *) (* Ta procedura wycina potrzebne tokeny *)
+ let paths = if !strong_disambiguate_flag then select_tokens2 paths else paths in (* Ta procedura wycina potrzebne tokeny *)
let paths = Xlist.sort paths ENIAMpaths.compare_token_record in
(* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a18"; *)
(* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *)
diff --git a/subsyntax/ENIAMsubsyntaxTypes.ml b/subsyntax/ENIAMsubsyntaxTypes.ml
index 8d811ad..445e4ad 100644
--- a/subsyntax/ENIAMsubsyntaxTypes.ml
+++ b/subsyntax/ENIAMsubsyntaxTypes.ml
@@ -44,6 +44,8 @@ type text =
| StructText of paragraph list (* * token_record ExtArray.t*) (* akapity * tokeny *)
| AltText of (mode * text) list
+let strong_disambiguate_flag = ref false
+
let data_path =
try Sys.getenv "ENIAM_USER_DATA_PATH"
with Not_found -> "data"
diff --git a/subsyntax/interface.ml b/subsyntax/interface.ml
index 868e066..3fec291 100644
--- a/subsyntax/interface.ml
+++ b/subsyntax/interface.ml
@@ -34,6 +34,8 @@ let spec_list = [
"-m", Arg.Unit (fun () -> output:=Marsh), "Output as marshalled Ocaml data structure";
"-h", Arg.Unit (fun () -> output:=Html), "Output as HTML";
"-g", Arg.Unit (fun () -> output:=Graphviz; sentence_split:=false), "Output as graphviz dot file; turns sentence split off";
+ "--strong-disamb", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.strong_disambiguate_flag:=true), "Perform strong disambiguation";
+ "--no-strong-disamb", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.strong_disambiguate_flag:=false), "Do not perform strong disambiguation (default)";
(* "-r", Arg.String (fun p ->
ENIAMtokenizerTypes.set_resource_path p;
ENIAMmorphologyTypes.set_resource_path p;
diff --git a/tokenizer/ENIAMtokens.ml b/tokenizer/ENIAMtokens.ml
index f4b47a9..447d6d8 100644
--- a/tokenizer/ENIAMtokens.ml
+++ b/tokenizer/ENIAMtokens.ml
@@ -817,9 +817,10 @@ let rec recognize_sign_group poss_s_beg i = function
Token{empty_token_env with beg=i+factor-30;len=10;next=i+factor-20;token=Interp "</sentence>"};
Token{empty_token_env with beg=i+factor-20;len=10;next=i+factor-10;token=Interp "<sentence>"};
Token{empty_token_env with beg=i+factor-10;len=10;next=i+factor;token=Interp "<clause>"}];
- Seq[Token{empty_token_env with beg=i;len=10;next=i+10;token=Interp "</clause>"};
+ Seq[Token{empty_token_env with beg=i;len=10;next=i+10;token=Interp "</clause>"};
Token{empty_token_env with beg=i+10;len=10;next=i+20;token=Interp "</sentence>"};
Token{empty_token_env with orth=":";beg=i+20;len=factor-20;next=i+factor;token=Interp ":s"}]; (* speaker *)
+ Token{empty_token_env with orth=":";beg=i;len=factor;next=i+factor;token=Interp ":"}; (* np. w frazie "usługę: wizyta" *)
],i+factor,l,true
(* if is_colon_sentence_end_marker l then
Seq[Token{empty_token_env with beg=i;len=10;next=i+10;token=Interp "</clause>"};