diff --git a/diagnostics/LCGfields.ml b/diagnostics/LCGfields.ml new file mode 100644 index 0000000..1630bbe --- /dev/null +++ b/diagnostics/LCGfields.ml @@ -0,0 +1,120 @@ +open LCGtypes +open Xstd +open ExecTypes + +let eniam = "eniam" +let conll = "conll" + +module Strings = + struct + type t = string + let compare a b = Pervasives.compare a b + end + +module StrMap = Map.Make(Strings) + +let field_map = StrMap.(empty |> add eniam (ref empty) |> add conll (ref empty)) + +let add_to_field_map str_mode field content = + let f_map = StrMap.find str_mode field_map in + let c_map = if StrMap.mem field !f_map + then StrMap.find field !f_map + else let temp = ref StrMap.empty in + f_map := StrMap.add field temp !f_map; temp in + if StrMap.mem content !c_map + then incr (StrMap.find content !c_map) + else c_map := StrMap.add content (ref 1) !c_map + +let print_field_map () = + StrMap.iter (fun key1 val1 -> + print_endline key1; + StrMap.iter (fun key2 val2 -> + let i = ref 0 in + print_endline ("\t" ^ key2); + StrMap.iter (fun key3 val3 -> + i := !i + !val3; + print_endline ("\t\t" ^ key3 ^ "\t\t" ^ (string_of_int !val3)) + ) !val2; + print_endline ("\tsum: " ^ (string_of_int !i)) + ) !val1 + ) field_map; + print_newline () + + +let field_of_node str_mode n = function + "arole" -> let content = if n.arole = "" then "null" else n.arole in + add_to_field_map str_mode "arole" content; content + | _ -> failwith "field_of_node: ni" + +let field_of_linear_term str_node field = function + Node n -> field_of_node str_node n field + | _ -> failwith "field_of_linear_term: ni" + +let field_of_dependency_tree str_node fields dep_tree = + String.concat "\n" (Xlist.map fields (fun field -> + Array.fold_left (fun acc x -> + acc ^ (field_of_linear_term str_node field x) ^ "\n\t\t" ) "" dep_tree)) + +let field_of_eniam_sentence fields tokens (result : eniam_parse_result) = + match result.status with + Idle -> "Idle" + (* | PreprocessingError -> "PreprocessingError" *) + | LexiconError -> "LexiconError" + | ParseError -> "ParseError" + | ParseTimeout -> "ParseTimeout" + | NotParsed -> "NotParsed" + | ReductionError -> "ReductionError" + | TooManyNodes -> "TooManyNodes" + | NotReduced -> "NotReduced" + | SemError -> "SemError" + (* | NotTranslated -> "NotTranslated" *) + | Parsed -> ignore ("Parsed\n\t\t" ^ (field_of_dependency_tree eniam fields result.dependency_tree)); "Parsed\n" + | _ -> failwith "field_of_eniam_sentence" + +let field_of_conll_sentence fields tokens (result : conll_parse_result) = + match result.status with + Idle -> "Idle" + (* | PreprocessingError -> "PreprocessingError" *) + | LexiconError -> "LexiconError" + | ParseError -> "ParseError" + | ParseTimeout -> "ParseTimeout" + | NotParsed -> "NotParsed" + | ReductionError -> "ReductionError" + | TooManyNodes -> "TooManyNodes" + | NotReduced -> "NotReduced" + | SemError -> "SemError" + (* | NotTranslated -> "NotTranslated" *) + | Parsed -> ignore ("Parsed\n\t\t" ^ (field_of_dependency_tree conll fields result.dependency_tree)); "Parsed\n" + | _ -> failwith "field_of_conll_sentence" + + +let rec field_of_sentence fields tokens = function + RawSentence s -> s + | StructSentence(_,paths,last) -> "StructSentence" + | DepSentence(_,paths) -> "DepSentence" + | ENIAMSentence result -> field_of_eniam_sentence fields tokens result + | CONLLSentence result -> field_of_conll_sentence fields tokens result + | QuotedSentences sentences -> "QuotedSentences" + | AltSentence l -> String.concat "\n\t" (Xlist.map l (fun (m, s) -> + Visualization.string_of_mode m ^ "\t" ^ (field_of_sentence fields tokens s))) + (* | _ -> failwith "field_of_sentence: ni" *) + +let rec field_of_paragraph fields tokens = function + RawParagraph s -> print_endline "no fields detected: only raw paragraph"; s + | StructParagraph sentences -> + String.concat "\n\t" (Xlist.map sentences (fun p -> field_of_sentence fields tokens p.psentence)) + | AltParagraph l -> + String.concat "\n" (Xlist.map (List.filter (fun (m,t) -> m = ENIAM || m = CONLL) l) (fun (m,t) -> + Visualization.string_of_mode m ^ "\n\t" ^ (field_of_paragraph fields tokens t))) + (* field_of_paragraph fields tokens (snd @@ List.find (fun (mode,text) -> mode = ENIAM || mode = CONLL) l) *) + +let rec print_fields_rec fields = function + RawText s -> print_endline "no fields detected: only raw text"; +| StructText(paragraphs,tokens) -> + print_endline (String.concat "\n\n" (Xlist.map paragraphs (field_of_paragraph fields tokens)) ^ "\n") +| AltText l -> + print_fields_rec fields (snd @@ List.find (fun (m,t) -> m = Struct || m = ENIAM || m = CONLL) l) + +let print_fields fields text = + print_fields_rec fields text + (* ; print_field_map () *) diff --git a/diagnostics/treeChange.ml b/diagnostics/treeChange.ml new file mode 100644 index 0000000..a1e6a19 --- /dev/null +++ b/diagnostics/treeChange.ml @@ -0,0 +1,23 @@ +open Xstd +open PreTypes + +let remove_interps paths tokens = + let paths_ls = Array.to_list paths in + Array.iter (fun (id,super,label) -> + if ((ExtArray.get tokens id).orth = "," || + (ExtArray.get tokens id).orth = "." || + (ExtArray.get tokens id).orth = "-") && + not (List.exists (fun (_,super,_) -> super = id) paths_ls) + then paths.(id) <- (0,-1,"")) paths; paths + +let move_comp paths tokens = + let correct_dep (id,super,label) = + let is_comp = function + Lemma(_,"comp",_) -> true + | _ -> false in + if ((ExtArray.get tokens id).orth = "by" || (ExtArray.get tokens id).orth = "że") + && is_comp (ExtArray.get tokens id).token + then (let id_S, super_S, label_S = paths.(super) in + paths.(id) <- (id,super_S,label); + paths.(super) <- (id_S, id, label_S)) in + Array.iter correct_dep paths; paths diff --git a/parser/LCGchart.ml b/parser/LCGchart.ml index 1e828ce..a2d750a 100644 --- a/parser/LCGchart.ml +++ b/parser/LCGchart.ml @@ -137,40 +137,46 @@ let assign_not_parsed left right (t,sem) = t, sem let rec dep_parse_rec references start_time timeout time_fun (DepNode(id,left,funct,right)) = + (* printf "dep_parse_rec 1 id=%d\n%!" id; *) let time = time_fun () in if time -. start_time > timeout then raise (Timeout(time -. start_time)) else let left = Xlist.map left (dep_parse_rec references start_time timeout time_fun) in let right = Xlist.map right (dep_parse_rec references start_time timeout time_fun) in - (* printf "dep_parse_rec id=%d\n" id; *) + (* printf "dep_parse_rec 2 id=%d\n%!" id; *) let funct,left = Xlist.fold left (funct,[]) (fun (funct,left) arg -> (* printf "larg: %s\n" (LCGstringOf.symbol_sem_list arg); printf "funct: %s\n" (LCGstringOf.symbol_sem_list funct); *) match LCGrules.backward_application references arg funct with [] -> (*printf "NOT PARSED\n";*) funct, arg :: left | funct -> merge_sems funct, left) in + (* printf "dep_parse_rec 3 |right|=%d \n%!" (Xlist.size right); *) let funct,right = Xlist.fold right (funct,[]) (fun (funct,right) arg -> (* printf "funct: %s\n" (LCGstringOf.symbol_sem_list funct); - printf "rarg: %s\n" (LCGstringOf.symbol_sem_list arg); *) + printf "rarg: %s\n" (LCGstringOf.symbol_sem_list arg); *) match LCGrules.forward_application references funct arg with [] -> (*printf "NOT PARSED\n";*) funct, arg :: right | funct -> merge_sems funct, right) in - if left = [] && right = [] then funct else + (* printf "dep_parse_rec 4\n%!"; *) + if left = [] && right = [] then funct else ( let xleft = Xlist.rev_map left (fun arg -> Xlist.rev_map arg LCGrules.set_x_type) in let xright = Xlist.rev_map right (fun arg -> Xlist.rev_map arg LCGrules.set_x_type) in + (* printf "dep_parse_rec 5\n%!"; *) let xfunct,xleft = Xlist.fold xleft (funct,[]) (fun (xfunct,left) arg -> (* printf "larg: %s\n" (LCGstringOf.symbol_sem_list arg); printf "funct: %s\n" (LCGstringOf.symbol_sem_list xfunct); *) match LCGrules.backward_application references arg xfunct with [] -> (*printf "NOT PARSED\n";*) xfunct, arg :: left | xfunct -> merge_sems xfunct, left) in + (* printf "dep_parse_rec 6\n%!"; *) let xfunct,xright = Xlist.fold xright (xfunct,[]) (fun (xfunct,right) arg -> (* printf "funct: %s\n" (LCGstringOf.symbol_sem_list xfunct); printf "rarg: %s\n" (LCGstringOf.symbol_sem_list arg); *) match LCGrules.forward_application references xfunct arg with [] -> (*printf "NOT PARSED\n";*) xfunct, arg :: right | xfunct -> merge_sems xfunct, right) in + (* printf "dep_parse_rec 7\n%!"; *) if xleft = [] && xright = [] then xfunct else - raise (NotDepParsed(id,left,funct,right)) + raise (NotDepParsed(id,left,funct,right))) let dep_parse dep_chart references timeout time_fun = (* print_endline "dep_parse"; *) diff --git a/parser/LCGlexicon.ml b/parser/LCGlexicon.ml index 38d0c50..58292a7 100644 --- a/parser/LCGlexicon.ml +++ b/parser/LCGlexicon.ml @@ -1169,6 +1169,7 @@ let rec process_interp (d:PreTypes.token_record) = function (* FIXME: rozpoznawa [LCGrenderer.make_frame_simple quant t d ( batrs)] | _,"xxx",[] -> [] (* FIXME *) | ".","interp",[] -> [LCGrenderer.make_frame_simple [] ["dot"] d (make_node "." "interp" d.weight 0 [])] (* FIXME: to jest potrzebne przy CONLL *) + | lemma,"brev",_ -> [LCGrenderer.make_frame_simple [] ["brev"] d (make_node lemma "brev" d.weight 0 [])] (* FIXME: to jest potrzebne przy CONLL *) | "<conll_root>","interp",[] -> let batrs = (make_node "<conll_root>" "interp" d.weight 0 []) in let schema_list = [[schema_field CLAUSE "Clause" Forward [Phrase IP;Phrase (CP(Int,CompUndef));Phrase (NP(Case "voc"));Phrase (Lex "interj")]]] in diff --git a/parser/LCGrules.ml b/parser/LCGrules.ml index d1b3000..18f2d57 100644 --- a/parser/LCGrules.ml +++ b/parser/LCGrules.ml @@ -135,6 +135,7 @@ let rec imp_selector s dir fv in_sem d = function let rec impset_selector s dir fv in_sem rev = function [],_ -> [] | (d,Maybe t) :: l,i -> + (* print_endline "impset_selector Maybe"; *) (if d = Both || d = dir then let x = get_new_variable () in let y = get_new_variable () in @@ -143,6 +144,7 @@ let rec impset_selector s dir fv in_sem rev = function else []) @ (impset_selector s dir fv in_sem ((d,Maybe t) :: rev) (l,i+1)) | (d,t) :: l,i -> + (* print_endline "impset_selector"; *) (if d = Both || d = dir then let s = if rev = [] && l = [] then s else ImpSet(s,List.rev rev @ l) in [fv,s,t,LambdaRot(i,in_sem)] @@ -231,6 +233,7 @@ and deduce_optarg in_sem t = String.concat " " (Xlist.map l (fun (_,_,sem) -> LCGstringOf.linear_term 0 sem)));*) failwith "deduce_optarg" and deduce_optargs sem l = + (* print_endline "deduce_optargs"; *) let b,sems = Xlist.fold (List.rev l) (true,[]) (fun (b,sems) (_,t) -> if not b then b,[] else let l = deduce_matching empty_fv empty_fv (Dot(*Triple(Dot,Dot,Dot)*)) (One,t) in @@ -240,45 +243,59 @@ and deduce_optargs sem l = [Xlist.fold sems sem (fun sem (_,_,s) -> App(LambdaRot(1,sem),s))] else [] -let make_forward sem l = +let make_forward sem l = (* FIXME: po co jest ta procedura? *) + (* print_endline "make_forward 1"; *) let l,sem,_ = Xlist.fold l ([],sem,1) (fun (l,sem,i) -> function Forward,t -> (Forward,t) :: l,sem,i+1 | Both,t -> (Forward,t) :: l,sem,i+1 | Backward,t -> + (* print_endline "make_forward 2"; *) let res = deduce_matching empty_fv empty_fv Dot (One,t) in + (* Printf.printf "make_forward 3 |res|=%d\n%!" (Xlist.size res); *) if res = [] then raise Not_found else let _,_,res = List.hd res in l, App(LambdaRot(i,sem),res), i) in + (* print_endline "make_forward 3"; *) List.rev l, sem let rec deduce_imp dir afv in_sem = function Tensor _ -> [] | Star _ -> [] | Plus _ -> [] - | WithVar(v,g,e,s) -> deduce_imp dir (add_fv afv v (g,e)) (ProjVar(v,in_sem)) s + | WithVar(v,g,e,s) -> (*print_endline "deduce_imp WithVar";*) deduce_imp dir (add_fv afv v (g,e)) (ProjVar(v,in_sem)) s | Imp(s,d,t) -> + (* print_endline "deduce_imp Imp"; *) (List.flatten (Xlist.map (deduce_optarg in_sem t) (fun sem -> deduce_imp dir afv sem s))) @ (imp_selector s dir afv in_sem d t) | ImpSet(s,l) -> - let l2,in_sem2 = if dir = Backward then l,in_sem else make_forward in_sem l in - (List.flatten (Xlist.map (deduce_optargs in_sem l) (fun sem -> deduce_imp dir afv sem s))) @ - (impset_selector s dir afv in_sem2 [] (l2,1)) + (* print_endline "deduce_imp ImpSet 1"; *) + let (l2,in_sem2),b = + if dir = Backward then (l,in_sem),true + else try make_forward in_sem l,true with Not_found -> ([],Dot),false in + (* print_endline "deduce_imp ImpSet 2"; *) + if b then + (List.flatten (Xlist.map (deduce_optargs in_sem l) (fun sem -> deduce_imp dir afv sem s))) @ + (impset_selector s dir afv in_sem2 [] (l2,1)) + else [] | s -> failwith ("deduce_imp: " ^ LCGstringOf.grammar_symbol 1 s) let rec deduce_app references dir (funct,funct_sem) args = -(* Printf.printf "deduce_app: '%s' [%s]\n%!" (LCGstringOf.grammar_symbol 1 funct) + (* Printf.printf "deduce_app 1: '%s' [%s]\n%!" (LCGstringOf.grammar_symbol 1 funct) (String.concat "; " (Xlist.map args (fun (arg,_) -> "'" ^ LCGstringOf.grammar_symbol 1 arg ^ "'"))); *) - List.flatten (Xlist.map (deduce_imp dir empty_fv funct_sem funct) (fun (fv,psi,phi,funct_sem) -> + let x = List.flatten (Xlist.map (deduce_imp dir empty_fv funct_sem funct) (fun (fv,psi,phi,funct_sem) -> + (* print_endline "deduce_app 2"; *) let l = Xlist.fold args [] (fun l (arg,arg_sem) -> let res = deduce_matching empty_fv fv arg_sem (arg,phi) in -(* Printf.printf "deduce_matching: '%s' '%s' -> %d\n%!" (LCGstringOf.grammar_symbol 1 arg) (LCGstringOf.grammar_symbol 1 phi) (Xlist.size res); *) + (* Printf.printf "deduce_matching: '%s' '%s' -> %d\n%!" (LCGstringOf.grammar_symbol 1 arg) (LCGstringOf.grammar_symbol 1 phi) (Xlist.size res); *) res @ l) in let map = Xlist.fold l StringMap.empty (fun map (afv,bfv,sem) -> if not (is_empty_fv afv) then failwith "deduce_app" else StringMap.add_inc map (string_of_fv bfv) (bfv,[sem]) (fun (fv,sems) -> fv, sem :: sems)) in StringMap.fold map [] (fun l _ (bfv,sems) -> let reference = ExtArray.add references (App(funct_sem,make_variant sems)) in - (fold_fv bfv (psi,Ref reference) (fun (t,sem) v (g,e) -> WithVar(v,g,e,t), VariantVar(v,sem))) :: l))) + (fold_fv bfv (psi,Ref reference) (fun (t,sem) v (g,e) -> WithVar(v,g,e,t), VariantVar(v,sem))) :: l))) in + (* print_endline "deduce_app 3"; *) + x (*let rec forward_application = function (Bracket(lf,false,funct),sem), (Bracket(false,rf,arg),arg_sem) -> Xlist.map (deduce_app Forward (funct,sem) (arg,arg_sem)) (fun (t,sem) -> Bracket(lf,rf,t), LCGreductions.linear_term_beta_reduction2 sem) diff --git a/parser/exec.ml b/parser/exec.ml index 06b46ea..ed1b891 100644 --- a/parser/exec.ml +++ b/parser/exec.ml @@ -200,6 +200,8 @@ let conll_parse_sentence timeout test_only_flag paths tokens = let result = empty_conll_parse_result in let time2 = time_fun () in try + let paths = TreeChange.remove_interps paths tokens in + let paths = TreeChange.move_comp paths tokens in let dep_chart = LCGlexicon.dep_create paths tokens in let dep_chart,references = LCGchart.dep_lazify dep_chart in let result = if test_only_flag then result else {result with dep_chart=dep_chart} in @@ -549,103 +551,6 @@ let process_query pre_in pre_out timeout test_only_flag id full_query max_n = selected_semantic_text=selected_semantic_text} in let result = {result with semantic_time=time4 -. time3} in result) -(** (* let text,msg,pre_time1 = PreProcessing.mail_loop2 query in *) - (* let text = PreTypes.RawText "" in - let msg = "" in - let pre_time1 = 0. in *) - print_endline "process_query 3"; - let text = process_text Struct text in - (* let paths = match paths with - PreTypes.RawText "" -> [],0,0 - | PreTypes.StructText [PreTypes.StructParagraph [{PreTypes.pcontents=PreTypes.StructSentence(paths,last,next_id)}]] -> paths,last,next_id - | _ -> failwith "process_query: pre format" in *) - let paths_array = make_paths_array paths in - let result = if test_only_flag then result else {result with paths=paths_array} in -(* if not test_only_flag then print_endline (paths_to_string_indexed paths); *) - let time2 = time_fun () in - let result = {result with pre_time1=pre_time1; pre_time2=time2 -. time1; - paths_size=let _,_,next_id = paths in next_id-1} in - (*if msg <> "" then*) {result with status=PreprocessingError; msg=msg} (*else*) - (*try - let graph = LCGlexicon.create query paths in - let graph,references,next_reference = LCGchart.lazify graph in - let time3 = time_fun () in - let result = {result with lex_time=time3 -. time2} in - try - let graph,references,next_reference = LCGchart.parse timeout graph references next_reference time_fun in - let time4 = time_fun () in - let result = if test_only_flag then result else {result with graph=graph} in - let result = {result with parse_time=time4 -. time3; graph_size=LCGchart.get_no_entries graph} in - if LCGchart.is_parsed graph then - try - let term = LCGchart.get_parsed_term graph in - let dependency_tree = LCGreductions.reduce term references next_reference in - let time5 = time_fun () in - let result = if test_only_flag then result else {result with dependency_tree=dependency_tree} in - let result = {result with reduction_time=time5 -. time4; dependency_tree_size=Array.length dependency_tree} in - if LCGreductions.is_reduced_dependency_tree dependency_tree then - try - LCGreductions.assign_labels dependency_tree; (* uwaga: niejawna zmiana imperatywna w result *) - LCGreductions.remove_cuts dependency_tree; (* uwaga: niejawna zmiana imperatywna w result *) -(* if Array.length dependency_tree < 10000 then print_xml_dependency_tree "results/trees/" id dependency_tree; *) - let paths_array = extend_paths_array paths_array dependency_tree in - let result = if test_only_flag then result else {result with paths=paths_array} in - let (*dependency_tree2*)(*sem*)disamb = LCGvalence.assign_frames_and_senses paths_array dependency_tree(*disamb*) in (* FIXME: wstawić jako nowy etap i na nową zmienną *) - let disamb(*sem*) = DisambSelPref.fit_sel_prefs DisambSelPref.fit_node1 (*dependency_tree2*)disamb in - let (*sem*)disamb = DisambLemma.disambiguate_nodes (*dependency_tree*)(*sem*)disamb in - let (*sem*)disamb = DisambLemma.remove_unused(*disambiguate_nodes*) (*dependency_tree*)(*sem*)disamb in - let (*sem*)disamb = DisambLemma.remove_unused_choices(*disambiguate_nodes*) (*dependency_tree*)(*sem*)disamb in - let (*disamb*)sem = DisambSelPref.fit_sel_prefs DisambSelPref.fit_node2 (*dependency_tree2*)disamb in - let result = if test_only_flag then result else {result with disamb=disamb} in - let sem = DisambLemma.disambiguate_meanings (*dependency_tree*)sem in - let sem(*disamb*) = DisambLemma.remove_unused_choices(*disambiguate_nodes*) (*dependency_tree*)sem(*disamb*) in - let result = if test_only_flag then result else {result with sem=sem} in - let sem2 = SemGraph.translate paths_array (*disamb*)sem in - let result = if test_only_flag then result else {result with sem2=sem2} in - let sem3(*disamb*) = SemGraph.make_tree(*disambiguate_nodes*) (*dependency_tree*)sem2(*disamb*) in - let sem3(*disamb*) = SemGraph.simplify_tree(*disambiguate_nodes*) (*dependency_tree*)sem3(*disamb*) in -(* let sem3(*disamb*) = SemGraph.manage_quantification(*disambiguate_nodes*) (*dependency_tree*)sem3(*disamb*) in *) - let sem3(*disamb*) = SemGraph.simplify_gender(*disambiguate_nodes*) (*dependency_tree*)sem3(*disamb*) in -(* if Array.length disamb < 10000 then print_xml_dependency_tree "results/trees/" (id ^ "dis") disamb; *) - let result = if test_only_flag then result else {result with sem3=sem3} in - let time6 = time_fun () in - if SemGraph.validate_semantics sem3 then - let trees = SemGraph.draw_trees max_n sem3 in - let trees2 = Xlist.map trees SemMrl.variable_alpha_convertion in - let mrls = Xlist.map trees2 SemMrl.make_mrl in - let mrls = Xlist.map mrls SemMrl.move_requirements in - let mrss = Xlist.map mrls SemMrl.make_mrs_of_mrl in - let mrss = Xlist.map mrss SemMrl.mrs_handle_alpha_convertion in - let fols = Xlist.map mrss (fun mrs -> - let l = SemMrl.foll_of_mrs_greedy mrs in - if l = [] then failwith "empty fol" else - List.hd l) in - let result = if test_only_flag then result else {result with trees=trees; mrls=fols(*mrls*)} in - {result with status=Parsed; sem_time=time6 -. time5} - else {result with status=NotTranslated; sem_time=time6 -. time5} - with e -> - let time6 = time_fun () in - {result with status=SemError; msg=Printexc.to_string e; sem_time=time6 -. time5} - else - {result with status=NotReduced} - with - | SemTooBig -> - let time5 = time_fun () in - {result with status=TooManyNodes; reduction_time=time5 -. time4} - | e -> - let time5 = time_fun () in - {result with status=ReductionError; msg=Printexc.to_string e; reduction_time=time5 -. time4} - else {result with status=NotParsed} - with - Timeout t -> - let time4 = time_fun () in - {result with status=ParseTimeout; msg=Printf.sprintf "%f" t; parse_time=time4 -. time3} - | e -> - let time4 = time_fun () in - {result with status=ParseError; msg=Printexc.to_string e; parse_time=time4 -. time3} - with e -> - let time3 = time_fun () in - {result with status=LexiconError; msg=Printexc.to_string e; lex_time=time3 -. time2}*)**) let print_result file result = Printf.fprintf file "query: %s\n" (extract_query_text result.input_text); diff --git a/parser/makefile b/parser/makefile index b7a95e5..3bd901f 100755 --- a/parser/makefile +++ b/parser/makefile @@ -1,7 +1,7 @@ OCAMLC=ocamlc OCAMLOPT=ocamlopt OCAMLDEP=ocamldep -INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../../../Dropbox/lib/latexvis -I ../../installed/latexvis -I ../lib/xt -I ../../../Dropbox/Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre -I ../corpora +INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../../../Dropbox/lib/latexvis -I ../../installed/latexvis -I ../lib/xt -I ../../../Dropbox/Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre -I ../corpora -I ../diagnostics #INCLUDES=-I +xml-light -I +xlib -I ../pre OCAMLFLAGS=$(INCLUDES) -g OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa latexvis.cmxa #nkjp.cmxa @@ -13,12 +13,12 @@ LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexO DISAMB= disambSelPref.ml disambLemma.ml SEM= semGraph.ml semTypes.ml semStringOf.ml semLatexOf.ml semMmlOf.ml semMrl.ml #SEM= semGraph.ml semTypes.ml semStringOf.ml semMmlOf.ml semMrl.ml -EXEC= execTypes.ml visualization.ml exec.ml +EXEC= execTypes.ml visualization.ml ../diagnostics/treeChange.ml exec.ml ../diagnostics/LCGfields.ml ../diagnostics/compTrees.ml all: -# $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml - $(OCAMLOPT) -o server2 $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml - $(OCAMLOPT) -o parser2.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml + $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml +# $(OCAMLOPT) -o server2 $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml +# $(OCAMLOPT) -o parser2.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml # $(OCAMLOPT) -o eniam.distr $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) overseer.ml # $(OCAMLOPT) -o eniam.worker $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) worker.ml # $(OCAMLOPT) -o parser.api $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) apiInterface.ml diff --git a/parser/pipe.ml b/parser/pipe.ml index dd1a558..dc6a813 100644 --- a/parser/pipe.ml +++ b/parser/pipe.ml @@ -55,76 +55,14 @@ let lcg_process query = Visualization.print_html_text path "selected_semantic_text" result.selected_semantic_text; Visualization.print_main_result_text "aaa/" (path ^ "main/") "xxxx" result.selected_semantic_text; Exec.print_result stdout result; - (*Visualization.print_paths "results/" "paths" result.paths; - Visualization.print_paths_latex "paths" result.paths; - (match result.status with - Parsed -> -(* LCGreductions.print_references "chart" result.term; *) -(* Visualization.print_tree "results/" "tree1" result.paths result.term; *) -(* Visualization.print_tree "results/" "tree2" result.paths result.disamb; *) -(* Visualization.print_graph "results/" "term1" result.term; - Visualization.print_graph "results/" "term2" result.disamb;*) - Visualization.print_graph "results/" "term3" result.sem; - Visualization.print_graph "results/" "term4" result.sem2; - Visualization.print_graph2 "results/" "term5" query result.sem3; -(* Visualization.print_xml_graph "results/" "graph" result.term; *) -(* LatexMain.latex_file_out "results/" "chart" "a0" false (fun file -> - Int.iter 0 (Array.length result.sem - 1) (fun i -> - Printf.fprintf file "%s\n" (LCGchart.latex_of_linear_term 0 result.sem.(i))));*) -(* Printf.fprintf file "$%s$\n\n" (LCGchart.latex_of_linear_term_simple 0 result.sem); *) -(* Printf.fprintf file "$%s$\n" (LCGchart.latex_of_linear_term 0 result.sem)); *) -(* LatexMain.latex_compile_and_clean "results/" "chart" *) - let path = "results/web/" in - ignore(Xlist.fold2 result.trees result.mrls 1 (fun n tree mrl -> - Visualization.print_graph2 path ("tree_" ^ string_of_int n) "" tree; - Visualization.print_xml_tree path ("tree_" ^ string_of_int n) tree; - let mml = SemMmlOf.mml_of_mrl mrl in - Visualization.print_mml path ("formula_" ^ string_of_int n) mml; - n+1)); -(* ignore(Xlist.fold result.trees 1 (fun n tree -> - Visualization.print_graph2 "results/" ("tree_" ^ string_of_int n) query tree; - n+1)); - SemLatexOf.print_mrls_latex "results/" "result" query result.mrls;*) - () - | NotTranslated -> -(* LCGreductions.print_references "chart" result.term; *) -(* Visualization.print_tree "results/" "tree1" result.paths result.term; *) -(* Visualization.print_tree "results/" "tree2" result.paths result.disamb; *) -(* Visualization.print_graph "results/" "term1" result.term; - Visualization.print_graph "results/" "term2" result.disamb;*) - Visualization.print_graph "results/" "term3" result.sem; - Visualization.print_graph "results/" "term4" result.sem2; - Visualization.print_graph2 "results/" "term5" query result.sem3; -(* Visualization.print_xml_graph "results/" "graph" result.term; *) - () - | SemError -> - Visualization.print_graph "results/" "term1" result.term; - Visualization.print_graph "results/" "term2" result.disamb; - Visualization.print_graph "results/" "term3" result.sem; - Visualization.print_graph "results/" "term4" result.sem2; - Visualization.print_graph2 "results/" "term5" query result.sem3; - | NotParsed -> - LatexMain.latex_file_out "results/" "chart" "a1" false (fun file -> - Printf.fprintf file "%s\n" (LCGlatexOf.graph result.graph)); - LatexMain.latex_compile_and_clean "results/" "chart" - | NotReduced -> - LCGlatexOf.print_references "chart" result.term -(* LatexMain.latex_file_out "results/" "chart" "a0" false (fun file -> - Int.iter 0 (Array.length result.sem - 1) (fun i -> - Printf.fprintf file "%s\n" (LCGchart.latex_of_linear_term 0 result.sem.(i)))); -(* Printf.fprintf file "$%s$\n\n" (LCGchart.latex_of_linear_term_simple 0 result.sem); *) -(* Printf.fprintf file "$%s$\n" (LCGchart.latex_of_linear_term 0 result.sem)); *) - LatexMain.latex_compile_and_clean "results/" "chart"*) - | _ -> ());*) - (* Printf.fprintf oc "\n%!"; *) Marshal.to_channel oc (PreTypes.RawText "") []; flush oc; let _ = Unix.shutdown_connection ic in () -let _ = +(*let _ = if Array.length Sys.argv < 2 then print_endline "missing argument" else - lcg_process Sys.argv.(1) + lcg_process Sys.argv.(1)*) (* FIXME: parser dziwnie się zachowuje dla 'ścieżki anomalia.' 'ścieżki anomalia. GG' itp. - nie parsuje '.' a jak sparsuje to nie chce redukować *) @@ -191,7 +129,7 @@ let process_id s = let process_conll_corpus filename = let corpus = File.file_in filename (fun file -> CONLL.match_corpus (CONLL.load_corpus file)) in print_endline "process_conll_corpus"; - let corpus = [List.hd corpus] in + (* let corpus = [List.hd corpus] in *) let ic,oc = Unix.open_connection (get_sock_addr Paths.pre_host Paths.pre_port) in Xlist.iter corpus (fun query -> let id = process_id (get_query_id query) in @@ -201,9 +139,14 @@ let process_conll_corpus filename = Visualization.print_html_text path "input_text" result.input_text; Visualization.print_html_text path "pre_text" result.pre_text; Visualization.print_html_text path "parsed_text" result.parsed_text; + Visualization.print_html_text path "selected_sent_text" result.selected_sent_text; + Visualization.print_html_text path "semantic_text" result.semantic_text; + Visualization.print_html_text path "selected_semantic_text" result.selected_semantic_text; (* printf "input_text:\n%s\n" (Visualization.string_of_text result.input_text); printf "pre_text:\n%s\n" (Visualization.string_of_text result.pre_text); *) (* Exec.print_result stdout result; *) + LCGfields.print_fields ["arole"] result.parsed_text; + (* CompTrees.compare_results result.parsed_text; *) (* Visualization.print_paths "results/" "paths" result.paths; *) ()); Marshal.to_channel oc (PreTypes.RawText "") []; @@ -214,7 +157,7 @@ let process_conll_corpus filename = let _ = (* process_conll_corpus "../../NLP resources/Skladnica-zaleznosciowa-mod_130121.conll"; *) (* process_conll_corpus "../../NLP resources/skladnica_zaleznosciowa.conll"; *) - (* process_conll_corpus "../testy/skladnica-test1.conll"; *) + process_conll_corpus "../testy/skladnica-test1.conll"; () (* TO DO: diff --git a/parser/visualization.ml b/parser/visualization.ml index 28f3ef9..4a3e886 100644 --- a/parser/visualization.ml +++ b/parser/visualization.ml @@ -95,7 +95,7 @@ let string_of_status = function | ExecTypes.ParseTimeout -> "timeout" | ExecTypes.NotParsed -> "not_parsed" | ExecTypes.ReductionError -> "error_reduction" - | ExecTypes.TooManyNodes -> "to_many_nodes" + | ExecTypes.TooManyNodes -> "too_many_nodes" | ExecTypes.NotReduced -> "not_reduced" | ExecTypes.SemError -> "error_sem" | ExecTypes.NotTranslated -> "not_translated" @@ -798,13 +798,13 @@ let html_of_eniam_sentence path tokens (result : eniam_parse_result) = | SemError -> sprintf "error_sem: %s dependency_tree_size=%d\n" result.msg result.dependency_tree_size (* | NotTranslated -> "not_translated: \n" *) | Parsed -> - print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree; - print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree; + (* print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree; *) + (* print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree; *) (* LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree; *) - sprintf "parsed: paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.paths_size result.chart_size result.dependency_tree_size ^ - sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^ - sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix (* ^ - sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix *) + sprintf "parsed: paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.paths_size result.chart_size result.dependency_tree_size (*^ *) + (* sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^ *) + (* sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix ^ *) + (* sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix *) | _ -> failwith "html_of_eniam_sentence" let html_of_conll_sentence path tokens (result : conll_parse_result) = @@ -840,13 +840,13 @@ let html_of_conll_sentence path tokens (result : conll_parse_result) = | SemError -> sprintf "error_sem: %s dependency_tree_size=%d\n" result.msg result.dependency_tree_size (* | NotTranslated -> "not_translated: \n" *) | Parsed -> - print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree; - print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree; - LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree; - sprintf "parsed: paths_size=%d dependency_tree_size=%d\n" result.paths_size result.dependency_tree_size ^ - sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^ - sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix ^ - sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix + (* print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree; *) + (* print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree; *) + (* LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree; *) + sprintf "parsed: paths_size=%d dependency_tree_size=%d\n" result.paths_size result.dependency_tree_size (* ^ *) + (* sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^ *) + (* sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix ^ *) + (* sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix *) | _ -> failwith "html_of_conll_sentence" let html_of_sem_sentence path tokens (result : semantic_processing_result) = @@ -884,7 +884,7 @@ let rec html_of_sentence path tokens = function | QuotedSentences sentences -> String.concat "<BR>\n" (Xlist.map sentences (fun p -> sprintf "pid=%s pbeg=%d plen=%d pnext=%d<BR>%s" p.pid p.pbeg p.plen p.pnext (html_of_sentence path tokens p.psentence))) - | AltSentence l -> + | AltSentence l -> (*print_endline "AltSentence";*) "<table border=1>" ^ String.concat "\n" (Xlist.map l (fun (mode,sentence) -> sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_sentence path tokens sentence))) ^ @@ -892,11 +892,11 @@ let rec html_of_sentence path tokens = function (* | _ -> failwith "html_of_sentence: ni" *) let rec html_of_paragraph path tokens = function - RawParagraph s -> s - | StructParagraph sentences -> + RawParagraph s -> (*print_endline "RawParagraph";*) s + | StructParagraph sentences -> (*print_endline "StructParagraph";*) String.concat "<BR>\n" (Xlist.map sentences (fun p -> sprintf "pid=%s pbeg=%d plen=%d pnext=%d<BR>%s" p.pid p.pbeg p.plen p.pnext (html_of_sentence path tokens p.psentence))) - | AltParagraph l -> + | AltParagraph l -> (*print_endline "AltParagraph";*) "<table border=2>" ^ String.concat "\n" (Xlist.map l (fun (mode,paragraph) -> sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_paragraph path tokens paragraph))) ^