diff --git a/LCGlexicon/ENIAM_LCGlexiconParser.ml b/LCGlexicon/ENIAM_LCGlexiconParser.ml index 04e639c..479832a 100644 --- a/LCGlexicon/ENIAM_LCGlexiconParser.ml +++ b/LCGlexicon/ENIAM_LCGlexiconParser.ml @@ -212,7 +212,10 @@ let rec find_mult_imp = function | [] -> [] let rec find_mult = function - A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: l -> failwith "find_mult 1" + A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "," :: D(s10,t10) :: A "," :: D _ :: l -> failwith "find_mult 1: to many elements in { }" + | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "," :: D(s10,t10) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8;s9,t9;s10,t10] :: find_mult l + | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8;s9,t9] :: find_mult l + | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8] :: find_mult l | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7] :: find_mult l | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6] :: find_mult l | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5] :: find_mult l diff --git a/LCGparser/ENIAM_LCGchart.ml b/LCGparser/ENIAM_LCGchart.ml index 3e82a09..00bdf0c 100644 --- a/LCGparser/ENIAM_LCGchart.ml +++ b/LCGparser/ENIAM_LCGchart.ml @@ -289,5 +289,19 @@ let merge chart = let paths = select_best_paths a.(n) in add_inc chart 0 n (make_root_symbol paths) 0 +let select_maximal chart = + let last = last_node chart in + let a = Array.make last (-1,[],-1) in + let _ = fold chart () (fun chart (symbol,i,j,sem,layer) -> + let j0,l,_ = a.(i) in + if j > j0 then a.(i) <- j,[symbol,sem],layer else + if j < j0 then () else + a.(i) <- j,(symbol,sem) :: l,layer) in + let chart = make last in + snd (Int.fold 0 (last-1) (-1,chart) (fun (j0,chart) i -> + let j,l,layer = a.(i) in + if j <= j0 then j0,chart else + j,add_list chart i j l layer)) + (*FIXME: Bębni na maszynie do pisania. Na myśl o czym brykasz?*) diff --git a/LCGparser/ENIAM_LCGlatexOf.ml b/LCGparser/ENIAM_LCGlatexOf.ml index 3c8ef7b..ad781d5 100644 --- a/LCGparser/ENIAM_LCGlatexOf.ml +++ b/LCGparser/ENIAM_LCGlatexOf.ml @@ -215,11 +215,23 @@ let chart page text_fragments g = (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$ & $%s$\\\\\n\\hline\n" layer node1 node2 s symbol sem) :: l))))) ^ "\\end{longtable}" +let chart2 page text_fragments g = + let n = match page with "a4" -> "10" | "a1" -> "40" | _ -> "20" in + "\\begin{longtable}{|l|l|l|l|p{" ^ n ^ "cm}|}\n\\hline\n" ^ + String.concat "" (List.rev (ENIAM_LCGchart.fold g [] (fun l (symbol,node1,node2,sem,layer) -> + let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in + (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$\\\\\n\\hline\n" layer node1 node2 s (grammar_symbol 0 symbol)) :: l))) ^ + "\\end{longtable}" + let print_chart path name page text_fragments g = Xlatex.latex_file_out path name page false (fun file -> Printf.fprintf file "%s\n" (chart page text_fragments g)); Xlatex.latex_compile_and_clean path name +let print_chart2 path name page text_fragments g = + Xlatex.latex_file_out path name page false (fun file -> + Printf.fprintf file "%s\n" (chart2 page text_fragments g)); + Xlatex.latex_compile_and_clean path name let table_entries_of_symbol_term_list l = String.concat "" (Xlist.rev_map l (fun (symbol,sem) -> diff --git a/LCGparser/ENIAM_LCGrenderer.ml b/LCGparser/ENIAM_LCGrenderer.ml index ca92e50..c0d7135 100644 --- a/LCGparser/ENIAM_LCGrenderer.ml +++ b/LCGparser/ENIAM_LCGrenderer.ml @@ -143,7 +143,7 @@ let rec make_term_arg dir = function let v,arg = make_term_arg dir s in let w = get_variable_name () in w, Fix(Var w,Lambda(v,arg)) - | _ -> failwith "make_term_arg" + | c -> failwith ("make_term_arg: " ^ ENIAM_LCGstringOf.grammar_symbol_prime c) let add_args node args = {node with args=Tuple(node.args :: args)} diff --git a/exec/ENIAMvisualization.ml b/exec/ENIAMvisualization.ml index 4471d1f..c0640ab 100644 --- a/exec/ENIAMvisualization.ml +++ b/exec/ENIAMvisualization.ml @@ -779,6 +779,7 @@ let create_latex_dep_chart path name dep_chart = LatexMain.latex_compile_and_clean path name *) + (* verbosity: 0 -> jedynie informacja o statusie zdania 1 -> zawartość struktur danych istotnych dla uzyskanego statusu @@ -814,23 +815,25 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix) ^ "" | NotParsed -> - if verbosity = 0 then () else ( + if verbosity < 2 then () else ( ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.text_fragments result.chart1); if verbosity < 2 then () else ( ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.text_fragments result.chart2; ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2; - ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3); - if verbosity = 0 then () else ( + ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3; ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.text_fragments result.chart3); + if verbosity = 0 then () else ( + ENIAM_LCGlatexOf.print_chart2 path (file_prefix ^ "_3_chart_selection") "a4" result.text_fragments (ENIAM_LCGchart.select_maximal result.chart3)); sprintf "not_parsed: paths_size=%d chart_size=%d\n" result.paths_size result.chart_size ^ - (if verbosity = 0 then "" else + (if verbosity < 2 then "" else sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix) ^ (if verbosity < 2 then "" else sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix ^ sprintf "<BR><A HREF=\"%s_2_references.pdf\">References 2</A>\n" file_prefix ^ - sprintf "<BR><A HREF=\"%s_3_references.pdf\">References 3</A>\n" file_prefix) ^ - (if verbosity = 0 then "" else + sprintf "<BR><A HREF=\"%s_3_references.pdf\">References 3</A>\n" file_prefix ^ sprintf "<BR><A HREF=\"%s_3_chart.pdf\">Chart 3</A>\n" file_prefix) ^ + (if verbosity = 0 then "" else + sprintf "<BR><A HREF=\"%s_3_chart_selection.pdf\">Chart 3 Selection</A>\n" file_prefix) ^ "" | ReductionError -> if verbosity < 2 then () else ( diff --git a/subsyntax/ENIAMpaths.ml b/subsyntax/ENIAMpaths.ml index 3aba056..13a3692 100644 --- a/subsyntax/ENIAMpaths.ml +++ b/subsyntax/ENIAMpaths.ml @@ -260,9 +260,10 @@ let merge_lemmata l = (lemma,cat,interp,quantity,status) :: l))*) let merge_lemmata l = - (* let vl,nvl = Xlist.fold l ([],[]) (fun (vl,nvl) t -> - if t.ENIAMinflexion.status = ENIAMinflexion.LemmaVal || t.ENIAMinflexion.status = ENIAMinflexion.LemmaAlt then t :: vl,nvl else vl,t :: nvl) in - let l = if vl = [] then nvl else vl in *) (* to wycina potrzebne interpretacje *) + let l = if !ENIAMsubsyntaxTypes.strong_disambiguate_flag then + let vl,nvl = Xlist.fold l ([],[]) (fun (vl,nvl) t -> + if t.ENIAMinflexion.status = ENIAMinflexion.LemmaVal || t.ENIAMinflexion.status = ENIAMinflexion.LemmaAlt then t :: vl,nvl else vl,t :: nvl) in + if vl = [] then nvl else vl else l in (* to wycina potrzebne interpretacje *) (* FIXME: excluded_interps, transformed_interps, num:comp *) let l = Xlist.rev_map l (fun t -> t.ENIAMinflexion.lemma, diff --git a/subsyntax/ENIAMsubsyntax.ml b/subsyntax/ENIAMsubsyntax.ml index bdd38ec..0a723fb 100644 --- a/subsyntax/ENIAMsubsyntax.ml +++ b/subsyntax/ENIAMsubsyntax.ml @@ -343,7 +343,7 @@ let parse query = let paths = select_tokens paths in (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a17"; *) (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *) - (* let paths = select_tokens2 paths in *) (* Ta procedura wycina potrzebne tokeny *) + let paths = if !strong_disambiguate_flag then select_tokens2 paths else paths in (* Ta procedura wycina potrzebne tokeny *) let paths = Xlist.sort paths ENIAMpaths.compare_token_record in (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a18"; *) (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *) diff --git a/subsyntax/ENIAMsubsyntaxTypes.ml b/subsyntax/ENIAMsubsyntaxTypes.ml index 8d811ad..445e4ad 100644 --- a/subsyntax/ENIAMsubsyntaxTypes.ml +++ b/subsyntax/ENIAMsubsyntaxTypes.ml @@ -44,6 +44,8 @@ type text = | StructText of paragraph list (* * token_record ExtArray.t*) (* akapity * tokeny *) | AltText of (mode * text) list +let strong_disambiguate_flag = ref false + let data_path = try Sys.getenv "ENIAM_USER_DATA_PATH" with Not_found -> "data" diff --git a/subsyntax/interface.ml b/subsyntax/interface.ml index 868e066..3fec291 100644 --- a/subsyntax/interface.ml +++ b/subsyntax/interface.ml @@ -34,6 +34,8 @@ let spec_list = [ "-m", Arg.Unit (fun () -> output:=Marsh), "Output as marshalled Ocaml data structure"; "-h", Arg.Unit (fun () -> output:=Html), "Output as HTML"; "-g", Arg.Unit (fun () -> output:=Graphviz; sentence_split:=false), "Output as graphviz dot file; turns sentence split off"; + "--strong-disamb", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.strong_disambiguate_flag:=true), "Perform strong disambiguation"; + "--no-strong-disamb", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.strong_disambiguate_flag:=false), "Do not perform strong disambiguation (default)"; (* "-r", Arg.String (fun p -> ENIAMtokenizerTypes.set_resource_path p; ENIAMmorphologyTypes.set_resource_path p; diff --git a/tokenizer/ENIAMtokens.ml b/tokenizer/ENIAMtokens.ml index f4b47a9..447d6d8 100644 --- a/tokenizer/ENIAMtokens.ml +++ b/tokenizer/ENIAMtokens.ml @@ -817,9 +817,10 @@ let rec recognize_sign_group poss_s_beg i = function Token{empty_token_env with beg=i+factor-30;len=10;next=i+factor-20;token=Interp "</sentence>"}; Token{empty_token_env with beg=i+factor-20;len=10;next=i+factor-10;token=Interp "<sentence>"}; Token{empty_token_env with beg=i+factor-10;len=10;next=i+factor;token=Interp "<clause>"}]; - Seq[Token{empty_token_env with beg=i;len=10;next=i+10;token=Interp "</clause>"}; + Seq[Token{empty_token_env with beg=i;len=10;next=i+10;token=Interp "</clause>"}; Token{empty_token_env with beg=i+10;len=10;next=i+20;token=Interp "</sentence>"}; Token{empty_token_env with orth=":";beg=i+20;len=factor-20;next=i+factor;token=Interp ":s"}]; (* speaker *) + Token{empty_token_env with orth=":";beg=i;len=factor;next=i+factor;token=Interp ":"}; (* np. w frazie "usługę: wizyta" *) ],i+factor,l,true (* if is_colon_sentence_end_marker l then Seq[Token{empty_token_env with beg=i;len=10;next=i+10;token=Interp "</clause>"};