Commit dd414d0bbf02bf628d6c8953d9dd524c8e190721
1 parent
f909ff80
poprawa wizualizacji tablicy parsera
Showing
10 changed files
with
51 additions
and
13 deletions
LCGlexicon/ENIAM_LCGlexiconParser.ml
... | ... | @@ -212,7 +212,10 @@ let rec find_mult_imp = function |
212 | 212 | | [] -> [] |
213 | 213 | |
214 | 214 | let rec find_mult = function |
215 | - A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: l -> failwith "find_mult 1" | |
215 | + A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "," :: D(s10,t10) :: A "," :: D _ :: l -> failwith "find_mult 1: to many elements in { }" | |
216 | + | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "," :: D(s10,t10) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8;s9,t9;s10,t10] :: find_mult l | |
217 | + | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8;s9,t9] :: find_mult l | |
218 | + | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8] :: find_mult l | |
216 | 219 | | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7] :: find_mult l |
217 | 220 | | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6] :: find_mult l |
218 | 221 | | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5] :: find_mult l |
... | ... |
LCGparser/ENIAM_LCGchart.ml
... | ... | @@ -289,5 +289,19 @@ let merge chart = |
289 | 289 | let paths = select_best_paths a.(n) in |
290 | 290 | add_inc chart 0 n (make_root_symbol paths) 0 |
291 | 291 | |
292 | +let select_maximal chart = | |
293 | + let last = last_node chart in | |
294 | + let a = Array.make last (-1,[],-1) in | |
295 | + let _ = fold chart () (fun chart (symbol,i,j,sem,layer) -> | |
296 | + let j0,l,_ = a.(i) in | |
297 | + if j > j0 then a.(i) <- j,[symbol,sem],layer else | |
298 | + if j < j0 then () else | |
299 | + a.(i) <- j,(symbol,sem) :: l,layer) in | |
300 | + let chart = make last in | |
301 | + snd (Int.fold 0 (last-1) (-1,chart) (fun (j0,chart) i -> | |
302 | + let j,l,layer = a.(i) in | |
303 | + if j <= j0 then j0,chart else | |
304 | + j,add_list chart i j l layer)) | |
305 | + | |
292 | 306 | (*FIXME: Bębni na maszynie do pisania. |
293 | 307 | Na myśl o czym brykasz?*) |
... | ... |
LCGparser/ENIAM_LCGlatexOf.ml
... | ... | @@ -215,11 +215,23 @@ let chart page text_fragments g = |
215 | 215 | (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$ & $%s$\\\\\n\\hline\n" layer node1 node2 s symbol sem) :: l))))) ^ |
216 | 216 | "\\end{longtable}" |
217 | 217 | |
218 | +let chart2 page text_fragments g = | |
219 | + let n = match page with "a4" -> "10" | "a1" -> "40" | _ -> "20" in | |
220 | + "\\begin{longtable}{|l|l|l|l|p{" ^ n ^ "cm}|}\n\\hline\n" ^ | |
221 | + String.concat "" (List.rev (ENIAM_LCGchart.fold g [] (fun l (symbol,node1,node2,sem,layer) -> | |
222 | + let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in | |
223 | + (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$\\\\\n\\hline\n" layer node1 node2 s (grammar_symbol 0 symbol)) :: l))) ^ | |
224 | + "\\end{longtable}" | |
225 | + | |
218 | 226 | let print_chart path name page text_fragments g = |
219 | 227 | Xlatex.latex_file_out path name page false (fun file -> |
220 | 228 | Printf.fprintf file "%s\n" (chart page text_fragments g)); |
221 | 229 | Xlatex.latex_compile_and_clean path name |
222 | 230 | |
231 | +let print_chart2 path name page text_fragments g = | |
232 | + Xlatex.latex_file_out path name page false (fun file -> | |
233 | + Printf.fprintf file "%s\n" (chart2 page text_fragments g)); | |
234 | + Xlatex.latex_compile_and_clean path name | |
223 | 235 | |
224 | 236 | let table_entries_of_symbol_term_list l = |
225 | 237 | String.concat "" (Xlist.rev_map l (fun (symbol,sem) -> |
... | ... |
LCGparser/ENIAM_LCGrenderer.ml
... | ... | @@ -143,7 +143,7 @@ let rec make_term_arg dir = function |
143 | 143 | let v,arg = make_term_arg dir s in |
144 | 144 | let w = get_variable_name () in |
145 | 145 | w, Fix(Var w,Lambda(v,arg)) |
146 | - | _ -> failwith "make_term_arg" | |
146 | + | c -> failwith ("make_term_arg: " ^ ENIAM_LCGstringOf.grammar_symbol_prime c) | |
147 | 147 | |
148 | 148 | let add_args node args = |
149 | 149 | {node with args=Tuple(node.args :: args)} |
... | ... |
exec/ENIAMvisualization.ml
... | ... | @@ -779,6 +779,7 @@ let create_latex_dep_chart path name dep_chart = |
779 | 779 | LatexMain.latex_compile_and_clean path name |
780 | 780 | *) |
781 | 781 | |
782 | + | |
782 | 783 | (* verbosity: |
783 | 784 | 0 -> jedynie informacja o statusie zdania |
784 | 785 | 1 -> zawartość struktur danych istotnych dla uzyskanego statusu |
... | ... | @@ -814,23 +815,25 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam |
814 | 815 | sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix) ^ |
815 | 816 | "" |
816 | 817 | | NotParsed -> |
817 | - if verbosity = 0 then () else ( | |
818 | + if verbosity < 2 then () else ( | |
818 | 819 | ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.text_fragments result.chart1); |
819 | 820 | if verbosity < 2 then () else ( |
820 | 821 | ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.text_fragments result.chart2; |
821 | 822 | ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2; |
822 | - ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3); | |
823 | - if verbosity = 0 then () else ( | |
823 | + ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3; | |
824 | 824 | ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.text_fragments result.chart3); |
825 | + if verbosity = 0 then () else ( | |
826 | + ENIAM_LCGlatexOf.print_chart2 path (file_prefix ^ "_3_chart_selection") "a4" result.text_fragments (ENIAM_LCGchart.select_maximal result.chart3)); | |
825 | 827 | sprintf "not_parsed: paths_size=%d chart_size=%d\n" result.paths_size result.chart_size ^ |
826 | - (if verbosity = 0 then "" else | |
828 | + (if verbosity < 2 then "" else | |
827 | 829 | sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix) ^ |
828 | 830 | (if verbosity < 2 then "" else |
829 | 831 | sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix ^ |
830 | 832 | sprintf "<BR><A HREF=\"%s_2_references.pdf\">References 2</A>\n" file_prefix ^ |
831 | - sprintf "<BR><A HREF=\"%s_3_references.pdf\">References 3</A>\n" file_prefix) ^ | |
832 | - (if verbosity = 0 then "" else | |
833 | + sprintf "<BR><A HREF=\"%s_3_references.pdf\">References 3</A>\n" file_prefix ^ | |
833 | 834 | sprintf "<BR><A HREF=\"%s_3_chart.pdf\">Chart 3</A>\n" file_prefix) ^ |
835 | + (if verbosity = 0 then "" else | |
836 | + sprintf "<BR><A HREF=\"%s_3_chart_selection.pdf\">Chart 3 Selection</A>\n" file_prefix) ^ | |
834 | 837 | "" |
835 | 838 | | ReductionError -> |
836 | 839 | if verbosity < 2 then () else ( |
... | ... |
subsyntax/ENIAMpaths.ml
... | ... | @@ -260,9 +260,10 @@ let merge_lemmata l = |
260 | 260 | (lemma,cat,interp,quantity,status) :: l))*) |
261 | 261 | |
262 | 262 | let merge_lemmata l = |
263 | - (* let vl,nvl = Xlist.fold l ([],[]) (fun (vl,nvl) t -> | |
264 | - if t.ENIAMinflexion.status = ENIAMinflexion.LemmaVal || t.ENIAMinflexion.status = ENIAMinflexion.LemmaAlt then t :: vl,nvl else vl,t :: nvl) in | |
265 | - let l = if vl = [] then nvl else vl in *) (* to wycina potrzebne interpretacje *) | |
263 | + let l = if !ENIAMsubsyntaxTypes.strong_disambiguate_flag then | |
264 | + let vl,nvl = Xlist.fold l ([],[]) (fun (vl,nvl) t -> | |
265 | + if t.ENIAMinflexion.status = ENIAMinflexion.LemmaVal || t.ENIAMinflexion.status = ENIAMinflexion.LemmaAlt then t :: vl,nvl else vl,t :: nvl) in | |
266 | + if vl = [] then nvl else vl else l in (* to wycina potrzebne interpretacje *) | |
266 | 267 | (* FIXME: excluded_interps, transformed_interps, num:comp *) |
267 | 268 | let l = Xlist.rev_map l (fun t -> |
268 | 269 | t.ENIAMinflexion.lemma, |
... | ... |
subsyntax/ENIAMsubsyntax.ml
... | ... | @@ -343,7 +343,7 @@ let parse query = |
343 | 343 | let paths = select_tokens paths in |
344 | 344 | (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a17"; *) |
345 | 345 | (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *) |
346 | - (* let paths = select_tokens2 paths in *) (* Ta procedura wycina potrzebne tokeny *) | |
346 | + let paths = if !strong_disambiguate_flag then select_tokens2 paths else paths in (* Ta procedura wycina potrzebne tokeny *) | |
347 | 347 | let paths = Xlist.sort paths ENIAMpaths.compare_token_record in |
348 | 348 | (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a18"; *) |
349 | 349 | (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *) |
... | ... |
subsyntax/ENIAMsubsyntaxTypes.ml
... | ... | @@ -44,6 +44,8 @@ type text = |
44 | 44 | | StructText of paragraph list (* * token_record ExtArray.t*) (* akapity * tokeny *) |
45 | 45 | | AltText of (mode * text) list |
46 | 46 | |
47 | +let strong_disambiguate_flag = ref false | |
48 | + | |
47 | 49 | let data_path = |
48 | 50 | try Sys.getenv "ENIAM_USER_DATA_PATH" |
49 | 51 | with Not_found -> "data" |
... | ... |
subsyntax/interface.ml
... | ... | @@ -34,6 +34,8 @@ let spec_list = [ |
34 | 34 | "-m", Arg.Unit (fun () -> output:=Marsh), "Output as marshalled Ocaml data structure"; |
35 | 35 | "-h", Arg.Unit (fun () -> output:=Html), "Output as HTML"; |
36 | 36 | "-g", Arg.Unit (fun () -> output:=Graphviz; sentence_split:=false), "Output as graphviz dot file; turns sentence split off"; |
37 | + "--strong-disamb", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.strong_disambiguate_flag:=true), "Perform strong disambiguation"; | |
38 | + "--no-strong-disamb", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.strong_disambiguate_flag:=false), "Do not perform strong disambiguation (default)"; | |
37 | 39 | (* "-r", Arg.String (fun p -> |
38 | 40 | ENIAMtokenizerTypes.set_resource_path p; |
39 | 41 | ENIAMmorphologyTypes.set_resource_path p; |
... | ... |
tokenizer/ENIAMtokens.ml
... | ... | @@ -817,9 +817,10 @@ let rec recognize_sign_group poss_s_beg i = function |
817 | 817 | Token{empty_token_env with beg=i+factor-30;len=10;next=i+factor-20;token=Interp "</sentence>"}; |
818 | 818 | Token{empty_token_env with beg=i+factor-20;len=10;next=i+factor-10;token=Interp "<sentence>"}; |
819 | 819 | Token{empty_token_env with beg=i+factor-10;len=10;next=i+factor;token=Interp "<clause>"}]; |
820 | - Seq[Token{empty_token_env with beg=i;len=10;next=i+10;token=Interp "</clause>"}; | |
820 | + Seq[Token{empty_token_env with beg=i;len=10;next=i+10;token=Interp "</clause>"}; | |
821 | 821 | Token{empty_token_env with beg=i+10;len=10;next=i+20;token=Interp "</sentence>"}; |
822 | 822 | Token{empty_token_env with orth=":";beg=i+20;len=factor-20;next=i+factor;token=Interp ":s"}]; (* speaker *) |
823 | + Token{empty_token_env with orth=":";beg=i;len=factor;next=i+factor;token=Interp ":"}; (* np. w frazie "usługę: wizyta" *) | |
823 | 824 | ],i+factor,l,true |
824 | 825 | (* if is_colon_sentence_end_marker l then |
825 | 826 | Seq[Token{empty_token_env with beg=i;len=10;next=i+10;token=Interp "</clause>"}; |
... | ... |