Commit 7989117c2342518994b51ff9a429abb6c1fe2768

Authored by Wojciech Jaworski
2 parents 24f2f00f fff6fc9e

rozwiązanie konfliktów przy merge dep_trees

diagnostics/LCGfields.ml 0 → 100644
  1 +open LCGtypes
  2 +open Xstd
  3 +open ExecTypes
  4 +
  5 +let eniam = "eniam"
  6 +let conll = "conll"
  7 +
  8 +module Strings =
  9 + struct
  10 + type t = string
  11 + let compare a b = Pervasives.compare a b
  12 + end
  13 +
  14 +module StrMap = Map.Make(Strings)
  15 +
  16 +let field_map = StrMap.(empty |> add eniam (ref empty) |> add conll (ref empty))
  17 +
  18 +let add_to_field_map str_mode field content =
  19 + let f_map = StrMap.find str_mode field_map in
  20 + let c_map = if StrMap.mem field !f_map
  21 + then StrMap.find field !f_map
  22 + else let temp = ref StrMap.empty in
  23 + f_map := StrMap.add field temp !f_map; temp in
  24 + if StrMap.mem content !c_map
  25 + then incr (StrMap.find content !c_map)
  26 + else c_map := StrMap.add content (ref 1) !c_map
  27 +
  28 +let print_field_map () =
  29 + StrMap.iter (fun key1 val1 ->
  30 + print_endline key1;
  31 + StrMap.iter (fun key2 val2 ->
  32 + let i = ref 0 in
  33 + print_endline ("\t" ^ key2);
  34 + StrMap.iter (fun key3 val3 ->
  35 + i := !i + !val3;
  36 + print_endline ("\t\t" ^ key3 ^ "\t\t" ^ (string_of_int !val3))
  37 + ) !val2;
  38 + print_endline ("\tsum: " ^ (string_of_int !i))
  39 + ) !val1
  40 + ) field_map;
  41 + print_newline ()
  42 +
  43 +
  44 +let field_of_node str_mode n = function
  45 + "arole" -> let content = if n.arole = "" then "null" else n.arole in
  46 + add_to_field_map str_mode "arole" content; content
  47 + | _ -> failwith "field_of_node: ni"
  48 +
  49 +let field_of_linear_term str_node field = function
  50 + Node n -> field_of_node str_node n field
  51 + | _ -> failwith "field_of_linear_term: ni"
  52 +
  53 +let field_of_dependency_tree str_node fields dep_tree =
  54 + String.concat "\n" (Xlist.map fields (fun field ->
  55 + Array.fold_left (fun acc x ->
  56 + acc ^ (field_of_linear_term str_node field x) ^ "\n\t\t" ) "" dep_tree))
  57 +
  58 +let field_of_eniam_sentence fields tokens (result : eniam_parse_result) =
  59 + match result.status with
  60 + Idle -> "Idle"
  61 + (* | PreprocessingError -> "PreprocessingError" *)
  62 + | LexiconError -> "LexiconError"
  63 + | ParseError -> "ParseError"
  64 + | ParseTimeout -> "ParseTimeout"
  65 + | NotParsed -> "NotParsed"
  66 + | ReductionError -> "ReductionError"
  67 + | TooManyNodes -> "TooManyNodes"
  68 + | NotReduced -> "NotReduced"
  69 + | SemError -> "SemError"
  70 + (* | NotTranslated -> "NotTranslated" *)
  71 + | Parsed -> ignore ("Parsed\n\t\t" ^ (field_of_dependency_tree eniam fields result.dependency_tree)); "Parsed\n"
  72 + | _ -> failwith "field_of_eniam_sentence"
  73 +
  74 +let field_of_conll_sentence fields tokens (result : conll_parse_result) =
  75 + match result.status with
  76 + Idle -> "Idle"
  77 + (* | PreprocessingError -> "PreprocessingError" *)
  78 + | LexiconError -> "LexiconError"
  79 + | ParseError -> "ParseError"
  80 + | ParseTimeout -> "ParseTimeout"
  81 + | NotParsed -> "NotParsed"
  82 + | ReductionError -> "ReductionError"
  83 + | TooManyNodes -> "TooManyNodes"
  84 + | NotReduced -> "NotReduced"
  85 + | SemError -> "SemError"
  86 + (* | NotTranslated -> "NotTranslated" *)
  87 + | Parsed -> ignore ("Parsed\n\t\t" ^ (field_of_dependency_tree conll fields result.dependency_tree)); "Parsed\n"
  88 + | _ -> failwith "field_of_conll_sentence"
  89 +
  90 +
  91 +let rec field_of_sentence fields tokens = function
  92 + RawSentence s -> s
  93 + | StructSentence(_,paths,last) -> "StructSentence"
  94 + | DepSentence(_,paths) -> "DepSentence"
  95 + | ENIAMSentence result -> field_of_eniam_sentence fields tokens result
  96 + | CONLLSentence result -> field_of_conll_sentence fields tokens result
  97 + | QuotedSentences sentences -> "QuotedSentences"
  98 + | AltSentence l -> String.concat "\n\t" (Xlist.map l (fun (m, s) ->
  99 + Visualization.string_of_mode m ^ "\t" ^ (field_of_sentence fields tokens s)))
  100 + (* | _ -> failwith "field_of_sentence: ni" *)
  101 +
  102 +let rec field_of_paragraph fields tokens = function
  103 + RawParagraph s -> print_endline "no fields detected: only raw paragraph"; s
  104 + | StructParagraph sentences ->
  105 + String.concat "\n\t" (Xlist.map sentences (fun p -> field_of_sentence fields tokens p.psentence))
  106 + | AltParagraph l ->
  107 + String.concat "\n" (Xlist.map (List.filter (fun (m,t) -> m = ENIAM || m = CONLL) l) (fun (m,t) ->
  108 + Visualization.string_of_mode m ^ "\n\t" ^ (field_of_paragraph fields tokens t)))
  109 + (* field_of_paragraph fields tokens (snd @@ List.find (fun (mode,text) -> mode = ENIAM || mode = CONLL) l) *)
  110 +
  111 +let rec print_fields_rec fields = function
  112 + RawText s -> print_endline "no fields detected: only raw text";
  113 +| StructText(paragraphs,tokens) ->
  114 + print_endline (String.concat "\n\n" (Xlist.map paragraphs (field_of_paragraph fields tokens)) ^ "\n")
  115 +| AltText l ->
  116 + print_fields_rec fields (snd @@ List.find (fun (m,t) -> m = Struct || m = ENIAM || m = CONLL) l)
  117 +
  118 +let print_fields fields text =
  119 + print_fields_rec fields text
  120 + (* ; print_field_map () *)
... ...
diagnostics/treeChange.ml 0 → 100644
  1 +open Xstd
  2 +open PreTypes
  3 +
  4 +let remove_interps paths tokens =
  5 + let paths_ls = Array.to_list paths in
  6 + Array.iter (fun (id,super,label) ->
  7 + if ((ExtArray.get tokens id).orth = "," ||
  8 + (ExtArray.get tokens id).orth = "." ||
  9 + (ExtArray.get tokens id).orth = "-") &&
  10 + not (List.exists (fun (_,super,_) -> super = id) paths_ls)
  11 + then paths.(id) <- (0,-1,"")) paths; paths
  12 +
  13 +let move_comp paths tokens =
  14 + let correct_dep (id,super,label) =
  15 + let is_comp = function
  16 + Lemma(_,"comp",_) -> true
  17 + | _ -> false in
  18 + if ((ExtArray.get tokens id).orth = "by" || (ExtArray.get tokens id).orth = "że")
  19 + && is_comp (ExtArray.get tokens id).token
  20 + then (let id_S, super_S, label_S = paths.(super) in
  21 + paths.(id) <- (id,super_S,label);
  22 + paths.(super) <- (id_S, id, label_S)) in
  23 + Array.iter correct_dep paths; paths
... ...
parser/LCGchart.ml
... ... @@ -137,40 +137,46 @@ let assign_not_parsed left right (t,sem) =
137 137 t, sem
138 138  
139 139 let rec dep_parse_rec references start_time timeout time_fun (DepNode(id,left,funct,right)) =
  140 + (* printf "dep_parse_rec 1 id=%d\n%!" id; *)
140 141 let time = time_fun () in
141 142 if time -. start_time > timeout then raise (Timeout(time -. start_time)) else
142 143 let left = Xlist.map left (dep_parse_rec references start_time timeout time_fun) in
143 144 let right = Xlist.map right (dep_parse_rec references start_time timeout time_fun) in
144   - (* printf "dep_parse_rec id=%d\n" id; *)
  145 + (* printf "dep_parse_rec 2 id=%d\n%!" id; *)
145 146 let funct,left = Xlist.fold left (funct,[]) (fun (funct,left) arg ->
146 147 (* printf "larg: %s\n" (LCGstringOf.symbol_sem_list arg);
147 148 printf "funct: %s\n" (LCGstringOf.symbol_sem_list funct); *)
148 149 match LCGrules.backward_application references arg funct with
149 150 [] -> (*printf "NOT PARSED\n";*) funct, arg :: left
150 151 | funct -> merge_sems funct, left) in
  152 + (* printf "dep_parse_rec 3 |right|=%d \n%!" (Xlist.size right); *)
151 153 let funct,right = Xlist.fold right (funct,[]) (fun (funct,right) arg ->
152 154 (* printf "funct: %s\n" (LCGstringOf.symbol_sem_list funct);
153   - printf "rarg: %s\n" (LCGstringOf.symbol_sem_list arg); *)
  155 + printf "rarg: %s\n" (LCGstringOf.symbol_sem_list arg); *)
154 156 match LCGrules.forward_application references funct arg with
155 157 [] -> (*printf "NOT PARSED\n";*) funct, arg :: right
156 158 | funct -> merge_sems funct, right) in
157   - if left = [] && right = [] then funct else
  159 + (* printf "dep_parse_rec 4\n%!"; *)
  160 + if left = [] && right = [] then funct else (
158 161 let xleft = Xlist.rev_map left (fun arg -> Xlist.rev_map arg LCGrules.set_x_type) in
159 162 let xright = Xlist.rev_map right (fun arg -> Xlist.rev_map arg LCGrules.set_x_type) in
  163 + (* printf "dep_parse_rec 5\n%!"; *)
160 164 let xfunct,xleft = Xlist.fold xleft (funct,[]) (fun (xfunct,left) arg ->
161 165 (* printf "larg: %s\n" (LCGstringOf.symbol_sem_list arg);
162 166 printf "funct: %s\n" (LCGstringOf.symbol_sem_list xfunct); *)
163 167 match LCGrules.backward_application references arg xfunct with
164 168 [] -> (*printf "NOT PARSED\n";*) xfunct, arg :: left
165 169 | xfunct -> merge_sems xfunct, left) in
  170 + (* printf "dep_parse_rec 6\n%!"; *)
166 171 let xfunct,xright = Xlist.fold xright (xfunct,[]) (fun (xfunct,right) arg ->
167 172 (* printf "funct: %s\n" (LCGstringOf.symbol_sem_list xfunct);
168 173 printf "rarg: %s\n" (LCGstringOf.symbol_sem_list arg); *)
169 174 match LCGrules.forward_application references xfunct arg with
170 175 [] -> (*printf "NOT PARSED\n";*) xfunct, arg :: right
171 176 | xfunct -> merge_sems xfunct, right) in
  177 + (* printf "dep_parse_rec 7\n%!"; *)
172 178 if xleft = [] && xright = [] then xfunct else
173   - raise (NotDepParsed(id,left,funct,right))
  179 + raise (NotDepParsed(id,left,funct,right)))
174 180  
175 181 let dep_parse dep_chart references timeout time_fun =
176 182 (* print_endline "dep_parse"; *)
... ...
parser/LCGlexicon.ml
... ... @@ -1169,6 +1169,7 @@ let rec process_interp (d:PreTypes.token_record) = function (* FIXME: rozpoznawa
1169 1169 [LCGrenderer.make_frame_simple quant t d ( batrs)]
1170 1170 | _,"xxx",[] -> [] (* FIXME *)
1171 1171 | ".","interp",[] -> [LCGrenderer.make_frame_simple [] ["dot"] d (make_node "." "interp" d.weight 0 [])] (* FIXME: to jest potrzebne przy CONLL *)
  1172 + | lemma,"brev",_ -> [LCGrenderer.make_frame_simple [] ["brev"] d (make_node lemma "brev" d.weight 0 [])] (* FIXME: to jest potrzebne przy CONLL *)
1172 1173 | "<conll_root>","interp",[] ->
1173 1174 let batrs = (make_node "<conll_root>" "interp" d.weight 0 []) in
1174 1175 let schema_list = [[schema_field CLAUSE "Clause" Forward [Phrase IP;Phrase (CP(Int,CompUndef));Phrase (NP(Case "voc"));Phrase (Lex "interj")]]] in
... ...
parser/LCGrules.ml
... ... @@ -135,6 +135,7 @@ let rec imp_selector s dir fv in_sem d = function
135 135 let rec impset_selector s dir fv in_sem rev = function
136 136 [],_ -> []
137 137 | (d,Maybe t) :: l,i ->
  138 + (* print_endline "impset_selector Maybe"; *)
138 139 (if d = Both || d = dir then
139 140 let x = get_new_variable () in
140 141 let y = get_new_variable () in
... ... @@ -143,6 +144,7 @@ let rec impset_selector s dir fv in_sem rev = function
143 144 else []) @
144 145 (impset_selector s dir fv in_sem ((d,Maybe t) :: rev) (l,i+1))
145 146 | (d,t) :: l,i ->
  147 + (* print_endline "impset_selector"; *)
146 148 (if d = Both || d = dir then
147 149 let s = if rev = [] && l = [] then s else ImpSet(s,List.rev rev @ l) in
148 150 [fv,s,t,LambdaRot(i,in_sem)]
... ... @@ -231,6 +233,7 @@ and deduce_optarg in_sem t =
231 233 String.concat " " (Xlist.map l (fun (_,_,sem) -> LCGstringOf.linear_term 0 sem)));*) failwith "deduce_optarg"
232 234  
233 235 and deduce_optargs sem l =
  236 + (* print_endline "deduce_optargs"; *)
234 237 let b,sems = Xlist.fold (List.rev l) (true,[]) (fun (b,sems) (_,t) ->
235 238 if not b then b,[] else
236 239 let l = deduce_matching empty_fv empty_fv (Dot(*Triple(Dot,Dot,Dot)*)) (One,t) in
... ... @@ -240,45 +243,59 @@ and deduce_optargs sem l =
240 243 [Xlist.fold sems sem (fun sem (_,_,s) -> App(LambdaRot(1,sem),s))]
241 244 else []
242 245  
243   -let make_forward sem l =
  246 +let make_forward sem l = (* FIXME: po co jest ta procedura? *)
  247 + (* print_endline "make_forward 1"; *)
244 248 let l,sem,_ = Xlist.fold l ([],sem,1) (fun (l,sem,i) -> function
245 249 Forward,t -> (Forward,t) :: l,sem,i+1
246 250 | Both,t -> (Forward,t) :: l,sem,i+1
247 251 | Backward,t ->
  252 + (* print_endline "make_forward 2"; *)
248 253 let res = deduce_matching empty_fv empty_fv Dot (One,t) in
  254 + (* Printf.printf "make_forward 3 |res|=%d\n%!" (Xlist.size res); *)
249 255 if res = [] then raise Not_found else
250 256 let _,_,res = List.hd res in
251 257 l, App(LambdaRot(i,sem),res), i) in
  258 + (* print_endline "make_forward 3"; *)
252 259 List.rev l, sem
253 260  
254 261 let rec deduce_imp dir afv in_sem = function
255 262 Tensor _ -> []
256 263 | Star _ -> []
257 264 | Plus _ -> []
258   - | WithVar(v,g,e,s) -> deduce_imp dir (add_fv afv v (g,e)) (ProjVar(v,in_sem)) s
  265 + | WithVar(v,g,e,s) -> (*print_endline "deduce_imp WithVar";*) deduce_imp dir (add_fv afv v (g,e)) (ProjVar(v,in_sem)) s
259 266 | Imp(s,d,t) ->
  267 + (* print_endline "deduce_imp Imp"; *)
260 268 (List.flatten (Xlist.map (deduce_optarg in_sem t) (fun sem -> deduce_imp dir afv sem s))) @
261 269 (imp_selector s dir afv in_sem d t)
262 270 | ImpSet(s,l) ->
263   - let l2,in_sem2 = if dir = Backward then l,in_sem else make_forward in_sem l in
264   - (List.flatten (Xlist.map (deduce_optargs in_sem l) (fun sem -> deduce_imp dir afv sem s))) @
265   - (impset_selector s dir afv in_sem2 [] (l2,1))
  271 + (* print_endline "deduce_imp ImpSet 1"; *)
  272 + let (l2,in_sem2),b =
  273 + if dir = Backward then (l,in_sem),true
  274 + else try make_forward in_sem l,true with Not_found -> ([],Dot),false in
  275 + (* print_endline "deduce_imp ImpSet 2"; *)
  276 + if b then
  277 + (List.flatten (Xlist.map (deduce_optargs in_sem l) (fun sem -> deduce_imp dir afv sem s))) @
  278 + (impset_selector s dir afv in_sem2 [] (l2,1))
  279 + else []
266 280 | s -> failwith ("deduce_imp: " ^ LCGstringOf.grammar_symbol 1 s)
267 281  
268 282 let rec deduce_app references dir (funct,funct_sem) args =
269   -(* Printf.printf "deduce_app: '%s' [%s]\n%!" (LCGstringOf.grammar_symbol 1 funct)
  283 + (* Printf.printf "deduce_app 1: '%s' [%s]\n%!" (LCGstringOf.grammar_symbol 1 funct)
270 284 (String.concat "; " (Xlist.map args (fun (arg,_) -> "'" ^ LCGstringOf.grammar_symbol 1 arg ^ "'"))); *)
271   - List.flatten (Xlist.map (deduce_imp dir empty_fv funct_sem funct) (fun (fv,psi,phi,funct_sem) ->
  285 + let x = List.flatten (Xlist.map (deduce_imp dir empty_fv funct_sem funct) (fun (fv,psi,phi,funct_sem) ->
  286 + (* print_endline "deduce_app 2"; *)
272 287 let l = Xlist.fold args [] (fun l (arg,arg_sem) ->
273 288 let res = deduce_matching empty_fv fv arg_sem (arg,phi) in
274   -(* Printf.printf "deduce_matching: '%s' '%s' -> %d\n%!" (LCGstringOf.grammar_symbol 1 arg) (LCGstringOf.grammar_symbol 1 phi) (Xlist.size res); *)
  289 + (* Printf.printf "deduce_matching: '%s' '%s' -> %d\n%!" (LCGstringOf.grammar_symbol 1 arg) (LCGstringOf.grammar_symbol 1 phi) (Xlist.size res); *)
275 290 res @ l) in
276 291 let map = Xlist.fold l StringMap.empty (fun map (afv,bfv,sem) ->
277 292 if not (is_empty_fv afv) then failwith "deduce_app" else
278 293 StringMap.add_inc map (string_of_fv bfv) (bfv,[sem]) (fun (fv,sems) -> fv, sem :: sems)) in
279 294 StringMap.fold map [] (fun l _ (bfv,sems) ->
280 295 let reference = ExtArray.add references (App(funct_sem,make_variant sems)) in
281   - (fold_fv bfv (psi,Ref reference) (fun (t,sem) v (g,e) -> WithVar(v,g,e,t), VariantVar(v,sem))) :: l)))
  296 + (fold_fv bfv (psi,Ref reference) (fun (t,sem) v (g,e) -> WithVar(v,g,e,t), VariantVar(v,sem))) :: l))) in
  297 + (* print_endline "deduce_app 3"; *)
  298 + x
282 299  
283 300 (*let rec forward_application = function
284 301 (Bracket(lf,false,funct),sem), (Bracket(false,rf,arg),arg_sem) -> Xlist.map (deduce_app Forward (funct,sem) (arg,arg_sem)) (fun (t,sem) -> Bracket(lf,rf,t), LCGreductions.linear_term_beta_reduction2 sem)
... ...
parser/exec.ml
... ... @@ -200,6 +200,8 @@ let conll_parse_sentence timeout test_only_flag paths tokens =
200 200 let result = empty_conll_parse_result in
201 201 let time2 = time_fun () in
202 202 try
  203 + let paths = TreeChange.remove_interps paths tokens in
  204 + let paths = TreeChange.move_comp paths tokens in
203 205 let dep_chart = LCGlexicon.dep_create paths tokens in
204 206 let dep_chart,references = LCGchart.dep_lazify dep_chart in
205 207 let result = if test_only_flag then result else {result with dep_chart=dep_chart} in
... ... @@ -549,103 +551,6 @@ let process_query pre_in pre_out timeout test_only_flag id full_query max_n =
549 551 selected_semantic_text=selected_semantic_text} in
550 552 let result = {result with semantic_time=time4 -. time3} in
551 553 result)
552   -(** (* let text,msg,pre_time1 = PreProcessing.mail_loop2 query in *)
553   - (* let text = PreTypes.RawText "" in
554   - let msg = "" in
555   - let pre_time1 = 0. in *)
556   - print_endline "process_query 3";
557   - let text = process_text Struct text in
558   - (* let paths = match paths with
559   - PreTypes.RawText "" -> [],0,0
560   - | PreTypes.StructText [PreTypes.StructParagraph [{PreTypes.pcontents=PreTypes.StructSentence(paths,last,next_id)}]] -> paths,last,next_id
561   - | _ -> failwith "process_query: pre format" in *)
562   - let paths_array = make_paths_array paths in
563   - let result = if test_only_flag then result else {result with paths=paths_array} in
564   -(* if not test_only_flag then print_endline (paths_to_string_indexed paths); *)
565   - let time2 = time_fun () in
566   - let result = {result with pre_time1=pre_time1; pre_time2=time2 -. time1;
567   - paths_size=let _,_,next_id = paths in next_id-1} in
568   - (*if msg <> "" then*) {result with status=PreprocessingError; msg=msg} (*else*)
569   - (*try
570   - let graph = LCGlexicon.create query paths in
571   - let graph,references,next_reference = LCGchart.lazify graph in
572   - let time3 = time_fun () in
573   - let result = {result with lex_time=time3 -. time2} in
574   - try
575   - let graph,references,next_reference = LCGchart.parse timeout graph references next_reference time_fun in
576   - let time4 = time_fun () in
577   - let result = if test_only_flag then result else {result with graph=graph} in
578   - let result = {result with parse_time=time4 -. time3; graph_size=LCGchart.get_no_entries graph} in
579   - if LCGchart.is_parsed graph then
580   - try
581   - let term = LCGchart.get_parsed_term graph in
582   - let dependency_tree = LCGreductions.reduce term references next_reference in
583   - let time5 = time_fun () in
584   - let result = if test_only_flag then result else {result with dependency_tree=dependency_tree} in
585   - let result = {result with reduction_time=time5 -. time4; dependency_tree_size=Array.length dependency_tree} in
586   - if LCGreductions.is_reduced_dependency_tree dependency_tree then
587   - try
588   - LCGreductions.assign_labels dependency_tree; (* uwaga: niejawna zmiana imperatywna w result *)
589   - LCGreductions.remove_cuts dependency_tree; (* uwaga: niejawna zmiana imperatywna w result *)
590   -(* if Array.length dependency_tree < 10000 then print_xml_dependency_tree "results/trees/" id dependency_tree; *)
591   - let paths_array = extend_paths_array paths_array dependency_tree in
592   - let result = if test_only_flag then result else {result with paths=paths_array} in
593   - let (*dependency_tree2*)(*sem*)disamb = LCGvalence.assign_frames_and_senses paths_array dependency_tree(*disamb*) in (* FIXME: wstawić jako nowy etap i na nową zmienną *)
594   - let disamb(*sem*) = DisambSelPref.fit_sel_prefs DisambSelPref.fit_node1 (*dependency_tree2*)disamb in
595   - let (*sem*)disamb = DisambLemma.disambiguate_nodes (*dependency_tree*)(*sem*)disamb in
596   - let (*sem*)disamb = DisambLemma.remove_unused(*disambiguate_nodes*) (*dependency_tree*)(*sem*)disamb in
597   - let (*sem*)disamb = DisambLemma.remove_unused_choices(*disambiguate_nodes*) (*dependency_tree*)(*sem*)disamb in
598   - let (*disamb*)sem = DisambSelPref.fit_sel_prefs DisambSelPref.fit_node2 (*dependency_tree2*)disamb in
599   - let result = if test_only_flag then result else {result with disamb=disamb} in
600   - let sem = DisambLemma.disambiguate_meanings (*dependency_tree*)sem in
601   - let sem(*disamb*) = DisambLemma.remove_unused_choices(*disambiguate_nodes*) (*dependency_tree*)sem(*disamb*) in
602   - let result = if test_only_flag then result else {result with sem=sem} in
603   - let sem2 = SemGraph.translate paths_array (*disamb*)sem in
604   - let result = if test_only_flag then result else {result with sem2=sem2} in
605   - let sem3(*disamb*) = SemGraph.make_tree(*disambiguate_nodes*) (*dependency_tree*)sem2(*disamb*) in
606   - let sem3(*disamb*) = SemGraph.simplify_tree(*disambiguate_nodes*) (*dependency_tree*)sem3(*disamb*) in
607   -(* let sem3(*disamb*) = SemGraph.manage_quantification(*disambiguate_nodes*) (*dependency_tree*)sem3(*disamb*) in *)
608   - let sem3(*disamb*) = SemGraph.simplify_gender(*disambiguate_nodes*) (*dependency_tree*)sem3(*disamb*) in
609   -(* if Array.length disamb < 10000 then print_xml_dependency_tree "results/trees/" (id ^ "dis") disamb; *)
610   - let result = if test_only_flag then result else {result with sem3=sem3} in
611   - let time6 = time_fun () in
612   - if SemGraph.validate_semantics sem3 then
613   - let trees = SemGraph.draw_trees max_n sem3 in
614   - let trees2 = Xlist.map trees SemMrl.variable_alpha_convertion in
615   - let mrls = Xlist.map trees2 SemMrl.make_mrl in
616   - let mrls = Xlist.map mrls SemMrl.move_requirements in
617   - let mrss = Xlist.map mrls SemMrl.make_mrs_of_mrl in
618   - let mrss = Xlist.map mrss SemMrl.mrs_handle_alpha_convertion in
619   - let fols = Xlist.map mrss (fun mrs ->
620   - let l = SemMrl.foll_of_mrs_greedy mrs in
621   - if l = [] then failwith "empty fol" else
622   - List.hd l) in
623   - let result = if test_only_flag then result else {result with trees=trees; mrls=fols(*mrls*)} in
624   - {result with status=Parsed; sem_time=time6 -. time5}
625   - else {result with status=NotTranslated; sem_time=time6 -. time5}
626   - with e ->
627   - let time6 = time_fun () in
628   - {result with status=SemError; msg=Printexc.to_string e; sem_time=time6 -. time5}
629   - else
630   - {result with status=NotReduced}
631   - with
632   - | SemTooBig ->
633   - let time5 = time_fun () in
634   - {result with status=TooManyNodes; reduction_time=time5 -. time4}
635   - | e ->
636   - let time5 = time_fun () in
637   - {result with status=ReductionError; msg=Printexc.to_string e; reduction_time=time5 -. time4}
638   - else {result with status=NotParsed}
639   - with
640   - Timeout t ->
641   - let time4 = time_fun () in
642   - {result with status=ParseTimeout; msg=Printf.sprintf "%f" t; parse_time=time4 -. time3}
643   - | e ->
644   - let time4 = time_fun () in
645   - {result with status=ParseError; msg=Printexc.to_string e; parse_time=time4 -. time3}
646   - with e ->
647   - let time3 = time_fun () in
648   - {result with status=LexiconError; msg=Printexc.to_string e; lex_time=time3 -. time2}*)**)
649 554  
650 555 let print_result file result =
651 556 Printf.fprintf file "query: %s\n" (extract_query_text result.input_text);
... ...
parser/makefile
1 1 OCAMLC=ocamlc
2 2 OCAMLOPT=ocamlopt
3 3 OCAMLDEP=ocamldep
4   -INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../../../Dropbox/lib/latexvis -I ../../installed/latexvis -I ../lib/xt -I ../../../Dropbox/Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre -I ../corpora
  4 +INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../../../Dropbox/lib/latexvis -I ../../installed/latexvis -I ../lib/xt -I ../../../Dropbox/Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre -I ../corpora -I ../diagnostics
5 5 #INCLUDES=-I +xml-light -I +xlib -I ../pre
6 6 OCAMLFLAGS=$(INCLUDES) -g
7 7 OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa latexvis.cmxa #nkjp.cmxa
... ... @@ -13,12 +13,12 @@ LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexO
13 13 DISAMB= disambSelPref.ml disambLemma.ml
14 14 SEM= semGraph.ml semTypes.ml semStringOf.ml semLatexOf.ml semMmlOf.ml semMrl.ml
15 15 #SEM= semGraph.ml semTypes.ml semStringOf.ml semMmlOf.ml semMrl.ml
16   -EXEC= execTypes.ml visualization.ml exec.ml
  16 +EXEC= execTypes.ml visualization.ml ../diagnostics/treeChange.ml exec.ml ../diagnostics/LCGfields.ml ../diagnostics/compTrees.ml
17 17  
18 18 all:
19   -# $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml
20   - $(OCAMLOPT) -o server2 $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml
21   - $(OCAMLOPT) -o parser2.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml
  19 + $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml
  20 +# $(OCAMLOPT) -o server2 $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml
  21 +# $(OCAMLOPT) -o parser2.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml
22 22 # $(OCAMLOPT) -o eniam.distr $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) overseer.ml
23 23 # $(OCAMLOPT) -o eniam.worker $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) worker.ml
24 24 # $(OCAMLOPT) -o parser.api $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) apiInterface.ml
... ...
parser/pipe.ml
... ... @@ -55,76 +55,14 @@ let lcg_process query =
55 55 Visualization.print_html_text path "selected_semantic_text" result.selected_semantic_text;
56 56 Visualization.print_main_result_text "aaa/" (path ^ "main/") "xxxx" result.selected_semantic_text;
57 57 Exec.print_result stdout result;
58   - (*Visualization.print_paths "results/" "paths" result.paths;
59   - Visualization.print_paths_latex "paths" result.paths;
60   - (match result.status with
61   - Parsed ->
62   -(* LCGreductions.print_references "chart" result.term; *)
63   -(* Visualization.print_tree "results/" "tree1" result.paths result.term; *)
64   -(* Visualization.print_tree "results/" "tree2" result.paths result.disamb; *)
65   -(* Visualization.print_graph "results/" "term1" result.term;
66   - Visualization.print_graph "results/" "term2" result.disamb;*)
67   - Visualization.print_graph "results/" "term3" result.sem;
68   - Visualization.print_graph "results/" "term4" result.sem2;
69   - Visualization.print_graph2 "results/" "term5" query result.sem3;
70   -(* Visualization.print_xml_graph "results/" "graph" result.term; *)
71   -(* LatexMain.latex_file_out "results/" "chart" "a0" false (fun file ->
72   - Int.iter 0 (Array.length result.sem - 1) (fun i ->
73   - Printf.fprintf file "%s\n" (LCGchart.latex_of_linear_term 0 result.sem.(i))));*)
74   -(* Printf.fprintf file "$%s$\n\n" (LCGchart.latex_of_linear_term_simple 0 result.sem); *)
75   -(* Printf.fprintf file "$%s$\n" (LCGchart.latex_of_linear_term 0 result.sem)); *)
76   -(* LatexMain.latex_compile_and_clean "results/" "chart" *)
77   - let path = "results/web/" in
78   - ignore(Xlist.fold2 result.trees result.mrls 1 (fun n tree mrl ->
79   - Visualization.print_graph2 path ("tree_" ^ string_of_int n) "" tree;
80   - Visualization.print_xml_tree path ("tree_" ^ string_of_int n) tree;
81   - let mml = SemMmlOf.mml_of_mrl mrl in
82   - Visualization.print_mml path ("formula_" ^ string_of_int n) mml;
83   - n+1));
84   -(* ignore(Xlist.fold result.trees 1 (fun n tree ->
85   - Visualization.print_graph2 "results/" ("tree_" ^ string_of_int n) query tree;
86   - n+1));
87   - SemLatexOf.print_mrls_latex "results/" "result" query result.mrls;*)
88   - ()
89   - | NotTranslated ->
90   -(* LCGreductions.print_references "chart" result.term; *)
91   -(* Visualization.print_tree "results/" "tree1" result.paths result.term; *)
92   -(* Visualization.print_tree "results/" "tree2" result.paths result.disamb; *)
93   -(* Visualization.print_graph "results/" "term1" result.term;
94   - Visualization.print_graph "results/" "term2" result.disamb;*)
95   - Visualization.print_graph "results/" "term3" result.sem;
96   - Visualization.print_graph "results/" "term4" result.sem2;
97   - Visualization.print_graph2 "results/" "term5" query result.sem3;
98   -(* Visualization.print_xml_graph "results/" "graph" result.term; *)
99   - ()
100   - | SemError ->
101   - Visualization.print_graph "results/" "term1" result.term;
102   - Visualization.print_graph "results/" "term2" result.disamb;
103   - Visualization.print_graph "results/" "term3" result.sem;
104   - Visualization.print_graph "results/" "term4" result.sem2;
105   - Visualization.print_graph2 "results/" "term5" query result.sem3;
106   - | NotParsed ->
107   - LatexMain.latex_file_out "results/" "chart" "a1" false (fun file ->
108   - Printf.fprintf file "%s\n" (LCGlatexOf.graph result.graph));
109   - LatexMain.latex_compile_and_clean "results/" "chart"
110   - | NotReduced ->
111   - LCGlatexOf.print_references "chart" result.term
112   -(* LatexMain.latex_file_out "results/" "chart" "a0" false (fun file ->
113   - Int.iter 0 (Array.length result.sem - 1) (fun i ->
114   - Printf.fprintf file "%s\n" (LCGchart.latex_of_linear_term 0 result.sem.(i))));
115   -(* Printf.fprintf file "$%s$\n\n" (LCGchart.latex_of_linear_term_simple 0 result.sem); *)
116   -(* Printf.fprintf file "$%s$\n" (LCGchart.latex_of_linear_term 0 result.sem)); *)
117   - LatexMain.latex_compile_and_clean "results/" "chart"*)
118   - | _ -> ());*)
119   - (* Printf.fprintf oc "\n%!"; *)
120 58 Marshal.to_channel oc (PreTypes.RawText "") [];
121 59 flush oc;
122 60 let _ = Unix.shutdown_connection ic in
123 61 ()
124 62  
125   -let _ =
  63 +(*let _ =
126 64 if Array.length Sys.argv < 2 then print_endline "missing argument" else
127   - lcg_process Sys.argv.(1)
  65 + lcg_process Sys.argv.(1)*)
128 66  
129 67  
130 68 (* FIXME: parser dziwnie się zachowuje dla 'ścieżki anomalia.' 'ścieżki anomalia. GG' itp. - nie parsuje '.' a jak sparsuje to nie chce redukować *)
... ... @@ -191,7 +129,7 @@ let process_id s =
191 129 let process_conll_corpus filename =
192 130 let corpus = File.file_in filename (fun file -> CONLL.match_corpus (CONLL.load_corpus file)) in
193 131 print_endline "process_conll_corpus";
194   - let corpus = [List.hd corpus] in
  132 + (* let corpus = [List.hd corpus] in *)
195 133 let ic,oc = Unix.open_connection (get_sock_addr Paths.pre_host Paths.pre_port) in
196 134 Xlist.iter corpus (fun query ->
197 135 let id = process_id (get_query_id query) in
... ... @@ -201,9 +139,14 @@ let process_conll_corpus filename =
201 139 Visualization.print_html_text path "input_text" result.input_text;
202 140 Visualization.print_html_text path "pre_text" result.pre_text;
203 141 Visualization.print_html_text path "parsed_text" result.parsed_text;
  142 + Visualization.print_html_text path "selected_sent_text" result.selected_sent_text;
  143 + Visualization.print_html_text path "semantic_text" result.semantic_text;
  144 + Visualization.print_html_text path "selected_semantic_text" result.selected_semantic_text;
204 145 (* printf "input_text:\n%s\n" (Visualization.string_of_text result.input_text);
205 146 printf "pre_text:\n%s\n" (Visualization.string_of_text result.pre_text); *)
206 147 (* Exec.print_result stdout result; *)
  148 + LCGfields.print_fields ["arole"] result.parsed_text;
  149 + (* CompTrees.compare_results result.parsed_text; *)
207 150 (* Visualization.print_paths "results/" "paths" result.paths; *)
208 151 ());
209 152 Marshal.to_channel oc (PreTypes.RawText "") [];
... ... @@ -214,7 +157,7 @@ let process_conll_corpus filename =
214 157 let _ =
215 158 (* process_conll_corpus "../../NLP resources/Skladnica-zaleznosciowa-mod_130121.conll"; *)
216 159 (* process_conll_corpus "../../NLP resources/skladnica_zaleznosciowa.conll"; *)
217   - (* process_conll_corpus "../testy/skladnica-test1.conll"; *)
  160 + process_conll_corpus "../testy/skladnica-test1.conll";
218 161 ()
219 162  
220 163 (* TO DO:
... ...
parser/visualization.ml
... ... @@ -95,7 +95,7 @@ let string_of_status = function
95 95 | ExecTypes.ParseTimeout -> "timeout"
96 96 | ExecTypes.NotParsed -> "not_parsed"
97 97 | ExecTypes.ReductionError -> "error_reduction"
98   - | ExecTypes.TooManyNodes -> "to_many_nodes"
  98 + | ExecTypes.TooManyNodes -> "too_many_nodes"
99 99 | ExecTypes.NotReduced -> "not_reduced"
100 100 | ExecTypes.SemError -> "error_sem"
101 101 | ExecTypes.NotTranslated -> "not_translated"
... ... @@ -798,13 +798,13 @@ let html_of_eniam_sentence path tokens (result : eniam_parse_result) =
798 798 | SemError -> sprintf "error_sem: %s dependency_tree_size=%d\n" result.msg result.dependency_tree_size
799 799 (* | NotTranslated -> "not_translated: \n" *)
800 800 | Parsed ->
801   - print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree;
802   - print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree;
  801 + (* print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree; *)
  802 + (* print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree; *)
803 803 (* LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree; *)
804   - sprintf "parsed: paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.paths_size result.chart_size result.dependency_tree_size ^
805   - sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^
806   - sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix (* ^
807   - sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix *)
  804 + sprintf "parsed: paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.paths_size result.chart_size result.dependency_tree_size (*^ *)
  805 + (* sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^ *)
  806 + (* sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix ^ *)
  807 + (* sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix *)
808 808 | _ -> failwith "html_of_eniam_sentence"
809 809  
810 810 let html_of_conll_sentence path tokens (result : conll_parse_result) =
... ... @@ -840,13 +840,13 @@ let html_of_conll_sentence path tokens (result : conll_parse_result) =
840 840 | SemError -> sprintf "error_sem: %s dependency_tree_size=%d\n" result.msg result.dependency_tree_size
841 841 (* | NotTranslated -> "not_translated: \n" *)
842 842 | Parsed ->
843   - print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree;
844   - print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree;
845   - LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree;
846   - sprintf "parsed: paths_size=%d dependency_tree_size=%d\n" result.paths_size result.dependency_tree_size ^
847   - sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^
848   - sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix ^
849   - sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix
  843 + (* print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree; *)
  844 + (* print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree; *)
  845 + (* LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree; *)
  846 + sprintf "parsed: paths_size=%d dependency_tree_size=%d\n" result.paths_size result.dependency_tree_size (* ^ *)
  847 + (* sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^ *)
  848 + (* sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix ^ *)
  849 + (* sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix *)
850 850 | _ -> failwith "html_of_conll_sentence"
851 851  
852 852 let html_of_sem_sentence path tokens (result : semantic_processing_result) =
... ... @@ -884,7 +884,7 @@ let rec html_of_sentence path tokens = function
884 884 | QuotedSentences sentences ->
885 885 String.concat "<BR>\n" (Xlist.map sentences (fun p ->
886 886 sprintf "pid=%s pbeg=%d plen=%d pnext=%d<BR>%s" p.pid p.pbeg p.plen p.pnext (html_of_sentence path tokens p.psentence)))
887   - | AltSentence l ->
  887 + | AltSentence l -> (*print_endline "AltSentence";*)
888 888 "<table border=1>" ^
889 889 String.concat "\n" (Xlist.map l (fun (mode,sentence) ->
890 890 sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_sentence path tokens sentence))) ^
... ... @@ -892,11 +892,11 @@ let rec html_of_sentence path tokens = function
892 892 (* | _ -> failwith "html_of_sentence: ni" *)
893 893  
894 894 let rec html_of_paragraph path tokens = function
895   - RawParagraph s -> s
896   - | StructParagraph sentences ->
  895 + RawParagraph s -> (*print_endline "RawParagraph";*) s
  896 + | StructParagraph sentences -> (*print_endline "StructParagraph";*)
897 897 String.concat "<BR>\n" (Xlist.map sentences (fun p ->
898 898 sprintf "pid=%s pbeg=%d plen=%d pnext=%d<BR>%s" p.pid p.pbeg p.plen p.pnext (html_of_sentence path tokens p.psentence)))
899   - | AltParagraph l ->
  899 + | AltParagraph l -> (*print_endline "AltParagraph";*)
900 900 "<table border=2>" ^
901 901 String.concat "\n" (Xlist.map l (fun (mode,paragraph) ->
902 902 sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_paragraph path tokens paragraph))) ^
... ...