diff --git a/diagnostics/LCGfields.ml b/diagnostics/LCGfields.ml
new file mode 100644
index 0000000..1630bbe
--- /dev/null
+++ b/diagnostics/LCGfields.ml
@@ -0,0 +1,120 @@
+open LCGtypes
+open Xstd
+open ExecTypes
+
+let eniam = "eniam"
+let conll = "conll"
+
+module Strings =
+ struct
+ type t = string
+ let compare a b = Pervasives.compare a b
+ end
+
+module StrMap = Map.Make(Strings)
+
+let field_map = StrMap.(empty |> add eniam (ref empty) |> add conll (ref empty))
+
+let add_to_field_map str_mode field content =
+ let f_map = StrMap.find str_mode field_map in
+ let c_map = if StrMap.mem field !f_map
+ then StrMap.find field !f_map
+ else let temp = ref StrMap.empty in
+ f_map := StrMap.add field temp !f_map; temp in
+ if StrMap.mem content !c_map
+ then incr (StrMap.find content !c_map)
+ else c_map := StrMap.add content (ref 1) !c_map
+
+let print_field_map () =
+ StrMap.iter (fun key1 val1 ->
+ print_endline key1;
+ StrMap.iter (fun key2 val2 ->
+ let i = ref 0 in
+ print_endline ("\t" ^ key2);
+ StrMap.iter (fun key3 val3 ->
+ i := !i + !val3;
+ print_endline ("\t\t" ^ key3 ^ "\t\t" ^ (string_of_int !val3))
+ ) !val2;
+ print_endline ("\tsum: " ^ (string_of_int !i))
+ ) !val1
+ ) field_map;
+ print_newline ()
+
+
+let field_of_node str_mode n = function
+ "arole" -> let content = if n.arole = "" then "null" else n.arole in
+ add_to_field_map str_mode "arole" content; content
+ | _ -> failwith "field_of_node: ni"
+
+let field_of_linear_term str_node field = function
+ Node n -> field_of_node str_node n field
+ | _ -> failwith "field_of_linear_term: ni"
+
+let field_of_dependency_tree str_node fields dep_tree =
+ String.concat "\n" (Xlist.map fields (fun field ->
+ Array.fold_left (fun acc x ->
+ acc ^ (field_of_linear_term str_node field x) ^ "\n\t\t" ) "" dep_tree))
+
+let field_of_eniam_sentence fields tokens (result : eniam_parse_result) =
+ match result.status with
+ Idle -> "Idle"
+ (* | PreprocessingError -> "PreprocessingError" *)
+ | LexiconError -> "LexiconError"
+ | ParseError -> "ParseError"
+ | ParseTimeout -> "ParseTimeout"
+ | NotParsed -> "NotParsed"
+ | ReductionError -> "ReductionError"
+ | TooManyNodes -> "TooManyNodes"
+ | NotReduced -> "NotReduced"
+ | SemError -> "SemError"
+ (* | NotTranslated -> "NotTranslated" *)
+ | Parsed -> ignore ("Parsed\n\t\t" ^ (field_of_dependency_tree eniam fields result.dependency_tree)); "Parsed\n"
+ | _ -> failwith "field_of_eniam_sentence"
+
+let field_of_conll_sentence fields tokens (result : conll_parse_result) =
+ match result.status with
+ Idle -> "Idle"
+ (* | PreprocessingError -> "PreprocessingError" *)
+ | LexiconError -> "LexiconError"
+ | ParseError -> "ParseError"
+ | ParseTimeout -> "ParseTimeout"
+ | NotParsed -> "NotParsed"
+ | ReductionError -> "ReductionError"
+ | TooManyNodes -> "TooManyNodes"
+ | NotReduced -> "NotReduced"
+ | SemError -> "SemError"
+ (* | NotTranslated -> "NotTranslated" *)
+ | Parsed -> ignore ("Parsed\n\t\t" ^ (field_of_dependency_tree conll fields result.dependency_tree)); "Parsed\n"
+ | _ -> failwith "field_of_conll_sentence"
+
+
+let rec field_of_sentence fields tokens = function
+ RawSentence s -> s
+ | StructSentence(_,paths,last) -> "StructSentence"
+ | DepSentence(_,paths) -> "DepSentence"
+ | ENIAMSentence result -> field_of_eniam_sentence fields tokens result
+ | CONLLSentence result -> field_of_conll_sentence fields tokens result
+ | QuotedSentences sentences -> "QuotedSentences"
+ | AltSentence l -> String.concat "\n\t" (Xlist.map l (fun (m, s) ->
+ Visualization.string_of_mode m ^ "\t" ^ (field_of_sentence fields tokens s)))
+ (* | _ -> failwith "field_of_sentence: ni" *)
+
+let rec field_of_paragraph fields tokens = function
+ RawParagraph s -> print_endline "no fields detected: only raw paragraph"; s
+ | StructParagraph sentences ->
+ String.concat "\n\t" (Xlist.map sentences (fun p -> field_of_sentence fields tokens p.psentence))
+ | AltParagraph l ->
+ String.concat "\n" (Xlist.map (List.filter (fun (m,t) -> m = ENIAM || m = CONLL) l) (fun (m,t) ->
+ Visualization.string_of_mode m ^ "\n\t" ^ (field_of_paragraph fields tokens t)))
+ (* field_of_paragraph fields tokens (snd @@ List.find (fun (mode,text) -> mode = ENIAM || mode = CONLL) l) *)
+
+let rec print_fields_rec fields = function
+ RawText s -> print_endline "no fields detected: only raw text";
+| StructText(paragraphs,tokens) ->
+ print_endline (String.concat "\n\n" (Xlist.map paragraphs (field_of_paragraph fields tokens)) ^ "\n")
+| AltText l ->
+ print_fields_rec fields (snd @@ List.find (fun (m,t) -> m = Struct || m = ENIAM || m = CONLL) l)
+
+let print_fields fields text =
+ print_fields_rec fields text
+ (* ; print_field_map () *)
diff --git a/diagnostics/treeChange.ml b/diagnostics/treeChange.ml
new file mode 100644
index 0000000..a1e6a19
--- /dev/null
+++ b/diagnostics/treeChange.ml
@@ -0,0 +1,23 @@
+open Xstd
+open PreTypes
+
+let remove_interps paths tokens =
+ let paths_ls = Array.to_list paths in
+ Array.iter (fun (id,super,label) ->
+ if ((ExtArray.get tokens id).orth = "," ||
+ (ExtArray.get tokens id).orth = "." ||
+ (ExtArray.get tokens id).orth = "-") &&
+ not (List.exists (fun (_,super,_) -> super = id) paths_ls)
+ then paths.(id) <- (0,-1,"")) paths; paths
+
+let move_comp paths tokens =
+ let correct_dep (id,super,label) =
+ let is_comp = function
+ Lemma(_,"comp",_) -> true
+ | _ -> false in
+ if ((ExtArray.get tokens id).orth = "by" || (ExtArray.get tokens id).orth = "że")
+ && is_comp (ExtArray.get tokens id).token
+ then (let id_S, super_S, label_S = paths.(super) in
+ paths.(id) <- (id,super_S,label);
+ paths.(super) <- (id_S, id, label_S)) in
+ Array.iter correct_dep paths; paths
diff --git a/parser/LCGchart.ml b/parser/LCGchart.ml
index 1e828ce..a2d750a 100644
--- a/parser/LCGchart.ml
+++ b/parser/LCGchart.ml
@@ -137,40 +137,46 @@ let assign_not_parsed left right (t,sem) =
t, sem
let rec dep_parse_rec references start_time timeout time_fun (DepNode(id,left,funct,right)) =
+ (* printf "dep_parse_rec 1 id=%d\n%!" id; *)
let time = time_fun () in
if time -. start_time > timeout then raise (Timeout(time -. start_time)) else
let left = Xlist.map left (dep_parse_rec references start_time timeout time_fun) in
let right = Xlist.map right (dep_parse_rec references start_time timeout time_fun) in
- (* printf "dep_parse_rec id=%d\n" id; *)
+ (* printf "dep_parse_rec 2 id=%d\n%!" id; *)
let funct,left = Xlist.fold left (funct,[]) (fun (funct,left) arg ->
(* printf "larg: %s\n" (LCGstringOf.symbol_sem_list arg);
printf "funct: %s\n" (LCGstringOf.symbol_sem_list funct); *)
match LCGrules.backward_application references arg funct with
[] -> (*printf "NOT PARSED\n";*) funct, arg :: left
| funct -> merge_sems funct, left) in
+ (* printf "dep_parse_rec 3 |right|=%d \n%!" (Xlist.size right); *)
let funct,right = Xlist.fold right (funct,[]) (fun (funct,right) arg ->
(* printf "funct: %s\n" (LCGstringOf.symbol_sem_list funct);
- printf "rarg: %s\n" (LCGstringOf.symbol_sem_list arg); *)
+ printf "rarg: %s\n" (LCGstringOf.symbol_sem_list arg); *)
match LCGrules.forward_application references funct arg with
[] -> (*printf "NOT PARSED\n";*) funct, arg :: right
| funct -> merge_sems funct, right) in
- if left = [] && right = [] then funct else
+ (* printf "dep_parse_rec 4\n%!"; *)
+ if left = [] && right = [] then funct else (
let xleft = Xlist.rev_map left (fun arg -> Xlist.rev_map arg LCGrules.set_x_type) in
let xright = Xlist.rev_map right (fun arg -> Xlist.rev_map arg LCGrules.set_x_type) in
+ (* printf "dep_parse_rec 5\n%!"; *)
let xfunct,xleft = Xlist.fold xleft (funct,[]) (fun (xfunct,left) arg ->
(* printf "larg: %s\n" (LCGstringOf.symbol_sem_list arg);
printf "funct: %s\n" (LCGstringOf.symbol_sem_list xfunct); *)
match LCGrules.backward_application references arg xfunct with
[] -> (*printf "NOT PARSED\n";*) xfunct, arg :: left
| xfunct -> merge_sems xfunct, left) in
+ (* printf "dep_parse_rec 6\n%!"; *)
let xfunct,xright = Xlist.fold xright (xfunct,[]) (fun (xfunct,right) arg ->
(* printf "funct: %s\n" (LCGstringOf.symbol_sem_list xfunct);
printf "rarg: %s\n" (LCGstringOf.symbol_sem_list arg); *)
match LCGrules.forward_application references xfunct arg with
[] -> (*printf "NOT PARSED\n";*) xfunct, arg :: right
| xfunct -> merge_sems xfunct, right) in
+ (* printf "dep_parse_rec 7\n%!"; *)
if xleft = [] && xright = [] then xfunct else
- raise (NotDepParsed(id,left,funct,right))
+ raise (NotDepParsed(id,left,funct,right)))
let dep_parse dep_chart references timeout time_fun =
(* print_endline "dep_parse"; *)
diff --git a/parser/LCGlexicon.ml b/parser/LCGlexicon.ml
index 38d0c50..58292a7 100644
--- a/parser/LCGlexicon.ml
+++ b/parser/LCGlexicon.ml
@@ -1169,6 +1169,7 @@ let rec process_interp (d:PreTypes.token_record) = function (* FIXME: rozpoznawa
[LCGrenderer.make_frame_simple quant t d ( batrs)]
| _,"xxx",[] -> [] (* FIXME *)
| ".","interp",[] -> [LCGrenderer.make_frame_simple [] ["dot"] d (make_node "." "interp" d.weight 0 [])] (* FIXME: to jest potrzebne przy CONLL *)
+ | lemma,"brev",_ -> [LCGrenderer.make_frame_simple [] ["brev"] d (make_node lemma "brev" d.weight 0 [])] (* FIXME: to jest potrzebne przy CONLL *)
| "<conll_root>","interp",[] ->
let batrs = (make_node "<conll_root>" "interp" d.weight 0 []) in
let schema_list = [[schema_field CLAUSE "Clause" Forward [Phrase IP;Phrase (CP(Int,CompUndef));Phrase (NP(Case "voc"));Phrase (Lex "interj")]]] in
diff --git a/parser/LCGrules.ml b/parser/LCGrules.ml
index d1b3000..18f2d57 100644
--- a/parser/LCGrules.ml
+++ b/parser/LCGrules.ml
@@ -135,6 +135,7 @@ let rec imp_selector s dir fv in_sem d = function
let rec impset_selector s dir fv in_sem rev = function
[],_ -> []
| (d,Maybe t) :: l,i ->
+ (* print_endline "impset_selector Maybe"; *)
(if d = Both || d = dir then
let x = get_new_variable () in
let y = get_new_variable () in
@@ -143,6 +144,7 @@ let rec impset_selector s dir fv in_sem rev = function
else []) @
(impset_selector s dir fv in_sem ((d,Maybe t) :: rev) (l,i+1))
| (d,t) :: l,i ->
+ (* print_endline "impset_selector"; *)
(if d = Both || d = dir then
let s = if rev = [] && l = [] then s else ImpSet(s,List.rev rev @ l) in
[fv,s,t,LambdaRot(i,in_sem)]
@@ -231,6 +233,7 @@ and deduce_optarg in_sem t =
String.concat " " (Xlist.map l (fun (_,_,sem) -> LCGstringOf.linear_term 0 sem)));*) failwith "deduce_optarg"
and deduce_optargs sem l =
+ (* print_endline "deduce_optargs"; *)
let b,sems = Xlist.fold (List.rev l) (true,[]) (fun (b,sems) (_,t) ->
if not b then b,[] else
let l = deduce_matching empty_fv empty_fv (Dot(*Triple(Dot,Dot,Dot)*)) (One,t) in
@@ -240,45 +243,59 @@ and deduce_optargs sem l =
[Xlist.fold sems sem (fun sem (_,_,s) -> App(LambdaRot(1,sem),s))]
else []
-let make_forward sem l =
+let make_forward sem l = (* FIXME: po co jest ta procedura? *)
+ (* print_endline "make_forward 1"; *)
let l,sem,_ = Xlist.fold l ([],sem,1) (fun (l,sem,i) -> function
Forward,t -> (Forward,t) :: l,sem,i+1
| Both,t -> (Forward,t) :: l,sem,i+1
| Backward,t ->
+ (* print_endline "make_forward 2"; *)
let res = deduce_matching empty_fv empty_fv Dot (One,t) in
+ (* Printf.printf "make_forward 3 |res|=%d\n%!" (Xlist.size res); *)
if res = [] then raise Not_found else
let _,_,res = List.hd res in
l, App(LambdaRot(i,sem),res), i) in
+ (* print_endline "make_forward 3"; *)
List.rev l, sem
let rec deduce_imp dir afv in_sem = function
Tensor _ -> []
| Star _ -> []
| Plus _ -> []
- | WithVar(v,g,e,s) -> deduce_imp dir (add_fv afv v (g,e)) (ProjVar(v,in_sem)) s
+ | WithVar(v,g,e,s) -> (*print_endline "deduce_imp WithVar";*) deduce_imp dir (add_fv afv v (g,e)) (ProjVar(v,in_sem)) s
| Imp(s,d,t) ->
+ (* print_endline "deduce_imp Imp"; *)
(List.flatten (Xlist.map (deduce_optarg in_sem t) (fun sem -> deduce_imp dir afv sem s))) @
(imp_selector s dir afv in_sem d t)
| ImpSet(s,l) ->
- let l2,in_sem2 = if dir = Backward then l,in_sem else make_forward in_sem l in
- (List.flatten (Xlist.map (deduce_optargs in_sem l) (fun sem -> deduce_imp dir afv sem s))) @
- (impset_selector s dir afv in_sem2 [] (l2,1))
+ (* print_endline "deduce_imp ImpSet 1"; *)
+ let (l2,in_sem2),b =
+ if dir = Backward then (l,in_sem),true
+ else try make_forward in_sem l,true with Not_found -> ([],Dot),false in
+ (* print_endline "deduce_imp ImpSet 2"; *)
+ if b then
+ (List.flatten (Xlist.map (deduce_optargs in_sem l) (fun sem -> deduce_imp dir afv sem s))) @
+ (impset_selector s dir afv in_sem2 [] (l2,1))
+ else []
| s -> failwith ("deduce_imp: " ^ LCGstringOf.grammar_symbol 1 s)
let rec deduce_app references dir (funct,funct_sem) args =
-(* Printf.printf "deduce_app: '%s' [%s]\n%!" (LCGstringOf.grammar_symbol 1 funct)
+ (* Printf.printf "deduce_app 1: '%s' [%s]\n%!" (LCGstringOf.grammar_symbol 1 funct)
(String.concat "; " (Xlist.map args (fun (arg,_) -> "'" ^ LCGstringOf.grammar_symbol 1 arg ^ "'"))); *)
- List.flatten (Xlist.map (deduce_imp dir empty_fv funct_sem funct) (fun (fv,psi,phi,funct_sem) ->
+ let x = List.flatten (Xlist.map (deduce_imp dir empty_fv funct_sem funct) (fun (fv,psi,phi,funct_sem) ->
+ (* print_endline "deduce_app 2"; *)
let l = Xlist.fold args [] (fun l (arg,arg_sem) ->
let res = deduce_matching empty_fv fv arg_sem (arg,phi) in
-(* Printf.printf "deduce_matching: '%s' '%s' -> %d\n%!" (LCGstringOf.grammar_symbol 1 arg) (LCGstringOf.grammar_symbol 1 phi) (Xlist.size res); *)
+ (* Printf.printf "deduce_matching: '%s' '%s' -> %d\n%!" (LCGstringOf.grammar_symbol 1 arg) (LCGstringOf.grammar_symbol 1 phi) (Xlist.size res); *)
res @ l) in
let map = Xlist.fold l StringMap.empty (fun map (afv,bfv,sem) ->
if not (is_empty_fv afv) then failwith "deduce_app" else
StringMap.add_inc map (string_of_fv bfv) (bfv,[sem]) (fun (fv,sems) -> fv, sem :: sems)) in
StringMap.fold map [] (fun l _ (bfv,sems) ->
let reference = ExtArray.add references (App(funct_sem,make_variant sems)) in
- (fold_fv bfv (psi,Ref reference) (fun (t,sem) v (g,e) -> WithVar(v,g,e,t), VariantVar(v,sem))) :: l)))
+ (fold_fv bfv (psi,Ref reference) (fun (t,sem) v (g,e) -> WithVar(v,g,e,t), VariantVar(v,sem))) :: l))) in
+ (* print_endline "deduce_app 3"; *)
+ x
(*let rec forward_application = function
(Bracket(lf,false,funct),sem), (Bracket(false,rf,arg),arg_sem) -> Xlist.map (deduce_app Forward (funct,sem) (arg,arg_sem)) (fun (t,sem) -> Bracket(lf,rf,t), LCGreductions.linear_term_beta_reduction2 sem)
diff --git a/parser/exec.ml b/parser/exec.ml
index 06b46ea..ed1b891 100644
--- a/parser/exec.ml
+++ b/parser/exec.ml
@@ -200,6 +200,8 @@ let conll_parse_sentence timeout test_only_flag paths tokens =
let result = empty_conll_parse_result in
let time2 = time_fun () in
try
+ let paths = TreeChange.remove_interps paths tokens in
+ let paths = TreeChange.move_comp paths tokens in
let dep_chart = LCGlexicon.dep_create paths tokens in
let dep_chart,references = LCGchart.dep_lazify dep_chart in
let result = if test_only_flag then result else {result with dep_chart=dep_chart} in
@@ -549,103 +551,6 @@ let process_query pre_in pre_out timeout test_only_flag id full_query max_n =
selected_semantic_text=selected_semantic_text} in
let result = {result with semantic_time=time4 -. time3} in
result)
-(** (* let text,msg,pre_time1 = PreProcessing.mail_loop2 query in *)
- (* let text = PreTypes.RawText "" in
- let msg = "" in
- let pre_time1 = 0. in *)
- print_endline "process_query 3";
- let text = process_text Struct text in
- (* let paths = match paths with
- PreTypes.RawText "" -> [],0,0
- | PreTypes.StructText [PreTypes.StructParagraph [{PreTypes.pcontents=PreTypes.StructSentence(paths,last,next_id)}]] -> paths,last,next_id
- | _ -> failwith "process_query: pre format" in *)
- let paths_array = make_paths_array paths in
- let result = if test_only_flag then result else {result with paths=paths_array} in
-(* if not test_only_flag then print_endline (paths_to_string_indexed paths); *)
- let time2 = time_fun () in
- let result = {result with pre_time1=pre_time1; pre_time2=time2 -. time1;
- paths_size=let _,_,next_id = paths in next_id-1} in
- (*if msg <> "" then*) {result with status=PreprocessingError; msg=msg} (*else*)
- (*try
- let graph = LCGlexicon.create query paths in
- let graph,references,next_reference = LCGchart.lazify graph in
- let time3 = time_fun () in
- let result = {result with lex_time=time3 -. time2} in
- try
- let graph,references,next_reference = LCGchart.parse timeout graph references next_reference time_fun in
- let time4 = time_fun () in
- let result = if test_only_flag then result else {result with graph=graph} in
- let result = {result with parse_time=time4 -. time3; graph_size=LCGchart.get_no_entries graph} in
- if LCGchart.is_parsed graph then
- try
- let term = LCGchart.get_parsed_term graph in
- let dependency_tree = LCGreductions.reduce term references next_reference in
- let time5 = time_fun () in
- let result = if test_only_flag then result else {result with dependency_tree=dependency_tree} in
- let result = {result with reduction_time=time5 -. time4; dependency_tree_size=Array.length dependency_tree} in
- if LCGreductions.is_reduced_dependency_tree dependency_tree then
- try
- LCGreductions.assign_labels dependency_tree; (* uwaga: niejawna zmiana imperatywna w result *)
- LCGreductions.remove_cuts dependency_tree; (* uwaga: niejawna zmiana imperatywna w result *)
-(* if Array.length dependency_tree < 10000 then print_xml_dependency_tree "results/trees/" id dependency_tree; *)
- let paths_array = extend_paths_array paths_array dependency_tree in
- let result = if test_only_flag then result else {result with paths=paths_array} in
- let (*dependency_tree2*)(*sem*)disamb = LCGvalence.assign_frames_and_senses paths_array dependency_tree(*disamb*) in (* FIXME: wstawić jako nowy etap i na nową zmienną *)
- let disamb(*sem*) = DisambSelPref.fit_sel_prefs DisambSelPref.fit_node1 (*dependency_tree2*)disamb in
- let (*sem*)disamb = DisambLemma.disambiguate_nodes (*dependency_tree*)(*sem*)disamb in
- let (*sem*)disamb = DisambLemma.remove_unused(*disambiguate_nodes*) (*dependency_tree*)(*sem*)disamb in
- let (*sem*)disamb = DisambLemma.remove_unused_choices(*disambiguate_nodes*) (*dependency_tree*)(*sem*)disamb in
- let (*disamb*)sem = DisambSelPref.fit_sel_prefs DisambSelPref.fit_node2 (*dependency_tree2*)disamb in
- let result = if test_only_flag then result else {result with disamb=disamb} in
- let sem = DisambLemma.disambiguate_meanings (*dependency_tree*)sem in
- let sem(*disamb*) = DisambLemma.remove_unused_choices(*disambiguate_nodes*) (*dependency_tree*)sem(*disamb*) in
- let result = if test_only_flag then result else {result with sem=sem} in
- let sem2 = SemGraph.translate paths_array (*disamb*)sem in
- let result = if test_only_flag then result else {result with sem2=sem2} in
- let sem3(*disamb*) = SemGraph.make_tree(*disambiguate_nodes*) (*dependency_tree*)sem2(*disamb*) in
- let sem3(*disamb*) = SemGraph.simplify_tree(*disambiguate_nodes*) (*dependency_tree*)sem3(*disamb*) in
-(* let sem3(*disamb*) = SemGraph.manage_quantification(*disambiguate_nodes*) (*dependency_tree*)sem3(*disamb*) in *)
- let sem3(*disamb*) = SemGraph.simplify_gender(*disambiguate_nodes*) (*dependency_tree*)sem3(*disamb*) in
-(* if Array.length disamb < 10000 then print_xml_dependency_tree "results/trees/" (id ^ "dis") disamb; *)
- let result = if test_only_flag then result else {result with sem3=sem3} in
- let time6 = time_fun () in
- if SemGraph.validate_semantics sem3 then
- let trees = SemGraph.draw_trees max_n sem3 in
- let trees2 = Xlist.map trees SemMrl.variable_alpha_convertion in
- let mrls = Xlist.map trees2 SemMrl.make_mrl in
- let mrls = Xlist.map mrls SemMrl.move_requirements in
- let mrss = Xlist.map mrls SemMrl.make_mrs_of_mrl in
- let mrss = Xlist.map mrss SemMrl.mrs_handle_alpha_convertion in
- let fols = Xlist.map mrss (fun mrs ->
- let l = SemMrl.foll_of_mrs_greedy mrs in
- if l = [] then failwith "empty fol" else
- List.hd l) in
- let result = if test_only_flag then result else {result with trees=trees; mrls=fols(*mrls*)} in
- {result with status=Parsed; sem_time=time6 -. time5}
- else {result with status=NotTranslated; sem_time=time6 -. time5}
- with e ->
- let time6 = time_fun () in
- {result with status=SemError; msg=Printexc.to_string e; sem_time=time6 -. time5}
- else
- {result with status=NotReduced}
- with
- | SemTooBig ->
- let time5 = time_fun () in
- {result with status=TooManyNodes; reduction_time=time5 -. time4}
- | e ->
- let time5 = time_fun () in
- {result with status=ReductionError; msg=Printexc.to_string e; reduction_time=time5 -. time4}
- else {result with status=NotParsed}
- with
- Timeout t ->
- let time4 = time_fun () in
- {result with status=ParseTimeout; msg=Printf.sprintf "%f" t; parse_time=time4 -. time3}
- | e ->
- let time4 = time_fun () in
- {result with status=ParseError; msg=Printexc.to_string e; parse_time=time4 -. time3}
- with e ->
- let time3 = time_fun () in
- {result with status=LexiconError; msg=Printexc.to_string e; lex_time=time3 -. time2}*)**)
let print_result file result =
Printf.fprintf file "query: %s\n" (extract_query_text result.input_text);
diff --git a/parser/makefile b/parser/makefile
index b7a95e5..3bd901f 100755
--- a/parser/makefile
+++ b/parser/makefile
@@ -1,7 +1,7 @@
OCAMLC=ocamlc
OCAMLOPT=ocamlopt
OCAMLDEP=ocamldep
-INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../../../Dropbox/lib/latexvis -I ../../installed/latexvis -I ../lib/xt -I ../../../Dropbox/Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre -I ../corpora
+INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../../../Dropbox/lib/latexvis -I ../../installed/latexvis -I ../lib/xt -I ../../../Dropbox/Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre -I ../corpora -I ../diagnostics
#INCLUDES=-I +xml-light -I +xlib -I ../pre
OCAMLFLAGS=$(INCLUDES) -g
OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa latexvis.cmxa #nkjp.cmxa
@@ -13,12 +13,12 @@ LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexO
DISAMB= disambSelPref.ml disambLemma.ml
SEM= semGraph.ml semTypes.ml semStringOf.ml semLatexOf.ml semMmlOf.ml semMrl.ml
#SEM= semGraph.ml semTypes.ml semStringOf.ml semMmlOf.ml semMrl.ml
-EXEC= execTypes.ml visualization.ml exec.ml
+EXEC= execTypes.ml visualization.ml ../diagnostics/treeChange.ml exec.ml ../diagnostics/LCGfields.ml ../diagnostics/compTrees.ml
all:
-# $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml
- $(OCAMLOPT) -o server2 $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml
- $(OCAMLOPT) -o parser2.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml
+ $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml
+# $(OCAMLOPT) -o server2 $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml
+# $(OCAMLOPT) -o parser2.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml
# $(OCAMLOPT) -o eniam.distr $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) overseer.ml
# $(OCAMLOPT) -o eniam.worker $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) worker.ml
# $(OCAMLOPT) -o parser.api $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) apiInterface.ml
diff --git a/parser/pipe.ml b/parser/pipe.ml
index dd1a558..dc6a813 100644
--- a/parser/pipe.ml
+++ b/parser/pipe.ml
@@ -55,76 +55,14 @@ let lcg_process query =
Visualization.print_html_text path "selected_semantic_text" result.selected_semantic_text;
Visualization.print_main_result_text "aaa/" (path ^ "main/") "xxxx" result.selected_semantic_text;
Exec.print_result stdout result;
- (*Visualization.print_paths "results/" "paths" result.paths;
- Visualization.print_paths_latex "paths" result.paths;
- (match result.status with
- Parsed ->
-(* LCGreductions.print_references "chart" result.term; *)
-(* Visualization.print_tree "results/" "tree1" result.paths result.term; *)
-(* Visualization.print_tree "results/" "tree2" result.paths result.disamb; *)
-(* Visualization.print_graph "results/" "term1" result.term;
- Visualization.print_graph "results/" "term2" result.disamb;*)
- Visualization.print_graph "results/" "term3" result.sem;
- Visualization.print_graph "results/" "term4" result.sem2;
- Visualization.print_graph2 "results/" "term5" query result.sem3;
-(* Visualization.print_xml_graph "results/" "graph" result.term; *)
-(* LatexMain.latex_file_out "results/" "chart" "a0" false (fun file ->
- Int.iter 0 (Array.length result.sem - 1) (fun i ->
- Printf.fprintf file "%s\n" (LCGchart.latex_of_linear_term 0 result.sem.(i))));*)
-(* Printf.fprintf file "$%s$\n\n" (LCGchart.latex_of_linear_term_simple 0 result.sem); *)
-(* Printf.fprintf file "$%s$\n" (LCGchart.latex_of_linear_term 0 result.sem)); *)
-(* LatexMain.latex_compile_and_clean "results/" "chart" *)
- let path = "results/web/" in
- ignore(Xlist.fold2 result.trees result.mrls 1 (fun n tree mrl ->
- Visualization.print_graph2 path ("tree_" ^ string_of_int n) "" tree;
- Visualization.print_xml_tree path ("tree_" ^ string_of_int n) tree;
- let mml = SemMmlOf.mml_of_mrl mrl in
- Visualization.print_mml path ("formula_" ^ string_of_int n) mml;
- n+1));
-(* ignore(Xlist.fold result.trees 1 (fun n tree ->
- Visualization.print_graph2 "results/" ("tree_" ^ string_of_int n) query tree;
- n+1));
- SemLatexOf.print_mrls_latex "results/" "result" query result.mrls;*)
- ()
- | NotTranslated ->
-(* LCGreductions.print_references "chart" result.term; *)
-(* Visualization.print_tree "results/" "tree1" result.paths result.term; *)
-(* Visualization.print_tree "results/" "tree2" result.paths result.disamb; *)
-(* Visualization.print_graph "results/" "term1" result.term;
- Visualization.print_graph "results/" "term2" result.disamb;*)
- Visualization.print_graph "results/" "term3" result.sem;
- Visualization.print_graph "results/" "term4" result.sem2;
- Visualization.print_graph2 "results/" "term5" query result.sem3;
-(* Visualization.print_xml_graph "results/" "graph" result.term; *)
- ()
- | SemError ->
- Visualization.print_graph "results/" "term1" result.term;
- Visualization.print_graph "results/" "term2" result.disamb;
- Visualization.print_graph "results/" "term3" result.sem;
- Visualization.print_graph "results/" "term4" result.sem2;
- Visualization.print_graph2 "results/" "term5" query result.sem3;
- | NotParsed ->
- LatexMain.latex_file_out "results/" "chart" "a1" false (fun file ->
- Printf.fprintf file "%s\n" (LCGlatexOf.graph result.graph));
- LatexMain.latex_compile_and_clean "results/" "chart"
- | NotReduced ->
- LCGlatexOf.print_references "chart" result.term
-(* LatexMain.latex_file_out "results/" "chart" "a0" false (fun file ->
- Int.iter 0 (Array.length result.sem - 1) (fun i ->
- Printf.fprintf file "%s\n" (LCGchart.latex_of_linear_term 0 result.sem.(i))));
-(* Printf.fprintf file "$%s$\n\n" (LCGchart.latex_of_linear_term_simple 0 result.sem); *)
-(* Printf.fprintf file "$%s$\n" (LCGchart.latex_of_linear_term 0 result.sem)); *)
- LatexMain.latex_compile_and_clean "results/" "chart"*)
- | _ -> ());*)
- (* Printf.fprintf oc "\n%!"; *)
Marshal.to_channel oc (PreTypes.RawText "") [];
flush oc;
let _ = Unix.shutdown_connection ic in
()
-let _ =
+(*let _ =
if Array.length Sys.argv < 2 then print_endline "missing argument" else
- lcg_process Sys.argv.(1)
+ lcg_process Sys.argv.(1)*)
(* FIXME: parser dziwnie się zachowuje dla 'ścieżki anomalia.' 'ścieżki anomalia. GG' itp. - nie parsuje '.' a jak sparsuje to nie chce redukować *)
@@ -191,7 +129,7 @@ let process_id s =
let process_conll_corpus filename =
let corpus = File.file_in filename (fun file -> CONLL.match_corpus (CONLL.load_corpus file)) in
print_endline "process_conll_corpus";
- let corpus = [List.hd corpus] in
+ (* let corpus = [List.hd corpus] in *)
let ic,oc = Unix.open_connection (get_sock_addr Paths.pre_host Paths.pre_port) in
Xlist.iter corpus (fun query ->
let id = process_id (get_query_id query) in
@@ -201,9 +139,14 @@ let process_conll_corpus filename =
Visualization.print_html_text path "input_text" result.input_text;
Visualization.print_html_text path "pre_text" result.pre_text;
Visualization.print_html_text path "parsed_text" result.parsed_text;
+ Visualization.print_html_text path "selected_sent_text" result.selected_sent_text;
+ Visualization.print_html_text path "semantic_text" result.semantic_text;
+ Visualization.print_html_text path "selected_semantic_text" result.selected_semantic_text;
(* printf "input_text:\n%s\n" (Visualization.string_of_text result.input_text);
printf "pre_text:\n%s\n" (Visualization.string_of_text result.pre_text); *)
(* Exec.print_result stdout result; *)
+ LCGfields.print_fields ["arole"] result.parsed_text;
+ (* CompTrees.compare_results result.parsed_text; *)
(* Visualization.print_paths "results/" "paths" result.paths; *)
());
Marshal.to_channel oc (PreTypes.RawText "") [];
@@ -214,7 +157,7 @@ let process_conll_corpus filename =
let _ =
(* process_conll_corpus "../../NLP resources/Skladnica-zaleznosciowa-mod_130121.conll"; *)
(* process_conll_corpus "../../NLP resources/skladnica_zaleznosciowa.conll"; *)
- (* process_conll_corpus "../testy/skladnica-test1.conll"; *)
+ process_conll_corpus "../testy/skladnica-test1.conll";
()
(* TO DO:
diff --git a/parser/visualization.ml b/parser/visualization.ml
index 28f3ef9..4a3e886 100644
--- a/parser/visualization.ml
+++ b/parser/visualization.ml
@@ -95,7 +95,7 @@ let string_of_status = function
| ExecTypes.ParseTimeout -> "timeout"
| ExecTypes.NotParsed -> "not_parsed"
| ExecTypes.ReductionError -> "error_reduction"
- | ExecTypes.TooManyNodes -> "to_many_nodes"
+ | ExecTypes.TooManyNodes -> "too_many_nodes"
| ExecTypes.NotReduced -> "not_reduced"
| ExecTypes.SemError -> "error_sem"
| ExecTypes.NotTranslated -> "not_translated"
@@ -798,13 +798,13 @@ let html_of_eniam_sentence path tokens (result : eniam_parse_result) =
| SemError -> sprintf "error_sem: %s dependency_tree_size=%d\n" result.msg result.dependency_tree_size
(* | NotTranslated -> "not_translated: \n" *)
| Parsed ->
- print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree;
- print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree;
+ (* print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree; *)
+ (* print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree; *)
(* LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree; *)
- sprintf "parsed: paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.paths_size result.chart_size result.dependency_tree_size ^
- sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^
- sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix (* ^
- sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix *)
+ sprintf "parsed: paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.paths_size result.chart_size result.dependency_tree_size (*^ *)
+ (* sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^ *)
+ (* sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix ^ *)
+ (* sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix *)
| _ -> failwith "html_of_eniam_sentence"
let html_of_conll_sentence path tokens (result : conll_parse_result) =
@@ -840,13 +840,13 @@ let html_of_conll_sentence path tokens (result : conll_parse_result) =
| SemError -> sprintf "error_sem: %s dependency_tree_size=%d\n" result.msg result.dependency_tree_size
(* | NotTranslated -> "not_translated: \n" *)
| Parsed ->
- print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree;
- print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree;
- LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree;
- sprintf "parsed: paths_size=%d dependency_tree_size=%d\n" result.paths_size result.dependency_tree_size ^
- sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^
- sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix ^
- sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix
+ (* print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree; *)
+ (* print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree; *)
+ (* LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree; *)
+ sprintf "parsed: paths_size=%d dependency_tree_size=%d\n" result.paths_size result.dependency_tree_size (* ^ *)
+ (* sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^ *)
+ (* sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix ^ *)
+ (* sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix *)
| _ -> failwith "html_of_conll_sentence"
let html_of_sem_sentence path tokens (result : semantic_processing_result) =
@@ -884,7 +884,7 @@ let rec html_of_sentence path tokens = function
| QuotedSentences sentences ->
String.concat "<BR>\n" (Xlist.map sentences (fun p ->
sprintf "pid=%s pbeg=%d plen=%d pnext=%d<BR>%s" p.pid p.pbeg p.plen p.pnext (html_of_sentence path tokens p.psentence)))
- | AltSentence l ->
+ | AltSentence l -> (*print_endline "AltSentence";*)
"<table border=1>" ^
String.concat "\n" (Xlist.map l (fun (mode,sentence) ->
sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_sentence path tokens sentence))) ^
@@ -892,11 +892,11 @@ let rec html_of_sentence path tokens = function
(* | _ -> failwith "html_of_sentence: ni" *)
let rec html_of_paragraph path tokens = function
- RawParagraph s -> s
- | StructParagraph sentences ->
+ RawParagraph s -> (*print_endline "RawParagraph";*) s
+ | StructParagraph sentences -> (*print_endline "StructParagraph";*)
String.concat "<BR>\n" (Xlist.map sentences (fun p ->
sprintf "pid=%s pbeg=%d plen=%d pnext=%d<BR>%s" p.pid p.pbeg p.plen p.pnext (html_of_sentence path tokens p.psentence)))
- | AltParagraph l ->
+ | AltParagraph l -> (*print_endline "AltParagraph";*)
"<table border=2>" ^
String.concat "\n" (Xlist.map l (fun (mode,paragraph) ->
sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_paragraph path tokens paragraph))) ^