Commit 66045a355a6348d65ec1d81fbaf26b619ec763e6
1 parent
76519478
poprawione usuwanie przecinków we wtrąceniach + zdania do testowania
Showing
5 changed files
with
251 additions
and
69 deletions
corpora/CONLL.ml
... | ... | @@ -220,13 +220,14 @@ let match_corpus corpus = |
220 | 220 | |
221 | 221 | (******************) |
222 | 222 | |
223 | +exception Comment_line | |
223 | 224 | exception Empty_line |
224 | 225 | exception Empty_sentence |
225 | 226 | exception Id_line of string |
226 | 227 | |
227 | 228 | let load_token in_channel = |
228 | 229 | let fail line = |
229 | - (* failwith ("load_token: " ^ line) *) | |
230 | + print_endline ("load_token: " ^ line); | |
230 | 231 | () in |
231 | 232 | let int_of_super = function |
232 | 233 | "_" -> -1 |
... | ... | @@ -247,7 +248,8 @@ let load_token in_channel = |
247 | 248 | else if Xstring.check_prefix "# trees/" line && Xstring.check_sufix ".xml.tree" line |
248 | 249 | then let id = Xstring.cut_prefix "# trees/" @@ Xstring.cut_sufix ".xml.tree" line in |
249 | 250 | raise (Id_line id) |
250 | - else failwith ("load_token: " ^ line) | |
251 | + else raise Comment_line | |
252 | + (* failwith ("load_token: " ^ line) *) | |
251 | 253 | else |
252 | 254 | match Xstring.split "\t" line with |
253 | 255 | [id; orth; lemma; cat; cat2; interp; super; label; "_"; "_"] -> |
... | ... | @@ -272,6 +274,7 @@ let load_sentence in_channel = |
272 | 274 | if id_a <> conll_id then failwith "load_sentence: different ids" else |
273 | 275 | pom ((id_a,super,label) :: rev_paths) id |
274 | 276 | with Id_line new_id -> pom rev_paths new_id |
277 | + | Comment_line -> pom rev_paths id | |
275 | 278 | | Empty_line -> rev_paths, id |
276 | 279 | | End_of_file -> if rev_paths = [] |
277 | 280 | then raise End_of_file |
... | ... |
corpora/CONLL_adapter.ml
... | ... | @@ -42,6 +42,34 @@ let if_interps interps token = |
42 | 42 | ) interp in |
43 | 43 | Xlist.fold interps true (fun acc (nr,value) -> acc && (if_interp nr value)) |
44 | 44 | |
45 | +let change_dep paths i (id,super,label) = | |
46 | + let id_S, super_S, label_S = paths.(super) in | |
47 | + paths.(i) <- (id,super_S,label); | |
48 | + paths.(super) <- (id_S, id, label_S) | |
49 | + | |
50 | +let correct_injection paths tokens = Array.iteri (fun i (id,super,label) -> | |
51 | + if label = "punct" then (*musi być pierwszym tokenem o tym ojcu*) | |
52 | + let j = Int.fold (i+1) (Array.length paths - 1) 0 (fun acc n -> | |
53 | + let i2,s2,l2 = paths.(n) in | |
54 | + if super = s2 | |
55 | + then if l2 = "punct" | |
56 | + then n | |
57 | + else 0 | |
58 | + else acc | |
59 | + ) in | |
60 | + let k = Int.fold_down (i-1) 1 i (fun acc n -> | |
61 | + let i2,s2,l2 = paths.(n) in | |
62 | + if super = s2 | |
63 | + then 0 | |
64 | + else acc | |
65 | + ) in | |
66 | + if k == i && j <> 0 && i < super && super < j | |
67 | + then | |
68 | + (paths.(i) <- (0,-1,""); | |
69 | + paths.(j) <- (0,-1,"")) | |
70 | + ) paths; | |
71 | + paths | |
72 | + | |
45 | 73 | let correct_coordination1 paths tokens = |
46 | 74 | let paths_ls = List.mapi (fun i (id,super,label) -> |
47 | 75 | (i,id,super,label)) (Array.to_list paths) in |
... | ... | @@ -136,15 +164,15 @@ let correct_coordination2 paths tokens = |
136 | 164 | let paths_ls () = List.mapi (fun i (id,super,label) -> |
137 | 165 | (i,id,super,label)) (Array.to_list paths_c) in |
138 | 166 | |
139 | - (* let ps a sons = | |
167 | + let ps a sons = | |
140 | 168 | print_endline a; |
141 | 169 | List.iter (fun (i,_,_,_) -> print_endline (ExtArray.get tokens i).orth) sons; |
142 | - print_endline "" in *) | |
170 | + print_endline "" in | |
143 | 171 | |
144 | 172 | let rec correct_rec (i,id,super,label) sons = |
145 | 173 | let left_s, right_s = List.partition (fun (a,b,c,d) -> a < i) sons in |
146 | - (* ps "left:" (List.rev left_s); | |
147 | - ps "right:" right_s; *) | |
174 | + ps "left:" (List.rev left_s); | |
175 | + ps "right:" right_s; | |
148 | 176 | find_father i (List.rev left_s); |
149 | 177 | find_father i right_s |
150 | 178 | |
... | ... | @@ -154,23 +182,35 @@ let correct_coordination2 paths tokens = |
154 | 182 | paths_c.(i) <- (id,i0,label); |
155 | 183 | if not (if_cat ["conj"] (ExtArray.get tokens i).token || |
156 | 184 | (ExtArray.get tokens i).orth = ",") |
157 | - then failwith "find_father"; | |
185 | + then failwith "find_father1"; | |
158 | 186 | correct_rec (i,id,super,label) (if a < i |
159 | 187 | then (a,b,c,d) :: t |
160 | 188 | else List.rev @@ (a,b,c,d) :: t) |
161 | - | _ -> failwith "find_father" in | |
189 | + | [] -> failwith "find_father2" in | |
162 | 190 | |
163 | 191 | let check_previous_for_interp i = |
164 | 192 | if i >= 0 && (ExtArray.get tokens i).orth = "," && |
165 | 193 | not (List.exists (fun (_,super,_) -> super = i) (Array.to_list paths_c)) |
166 | 194 | then paths_c.(i) <- (0,-1,"") in |
167 | 195 | |
196 | + let filter_comp_construction sons = | |
197 | + let rec pom acc = function | |
198 | + (i1,id1,super1,label1) :: (i2,id2,super2,label2) :: t -> | |
199 | + if if_cat ["interp"] (ExtArray.get tokens i1).token && | |
200 | + if_cat ["comp"] (ExtArray.get tokens i2).token | |
201 | + then pom acc t | |
202 | + else pom ((i1,id1,super1,label1) :: acc) ((i2,id2,super2,label2) :: t) | |
203 | + | h :: t -> pom (h :: acc) t | |
204 | + | [] -> List.rev acc in | |
205 | + pom [] sons in | |
206 | + | |
168 | 207 | Array.iteri (fun i (id,super,label) -> |
169 | 208 | if if_cat ["conj"] (ExtArray.get tokens i).token || |
170 | 209 | (ExtArray.get tokens i).orth = "," |
171 | 210 | then |
172 | 211 | (check_previous_for_interp (i-1); |
173 | 212 | let sons = List.filter (fun (_,_,super,_) -> super = i) (paths_ls ()) in |
213 | + (* let sons = filter_comp_construction sons in *) | |
174 | 214 | if (List.length sons > 2) |
175 | 215 | then correct_rec (i,id,super,label) sons)) paths_c; |
176 | 216 | paths_c |
... | ... | @@ -206,15 +246,16 @@ done; *) |
206 | 246 | |
207 | 247 | let brev i id super label = |
208 | 248 | let if_the_last_dot () = |
209 | - let (id_dot, s_dot, l_dot) = List.find (fun (i2,s,l) -> | |
210 | - s = i && ((ExtArray.get tokens i2).orth = "." || (ExtArray.get tokens i2).orth = "...")) (Array.to_list paths) in | |
211 | - Array.fold_left (fun acc (i2,s,l) -> | |
212 | - acc && (ExtArray.get tokens i2).beg <= (ExtArray.get tokens id_dot).beg) true paths in | |
249 | + try | |
250 | + let (id_dot, s_dot, l_dot) = List.find (fun (i2,s,l) -> | |
251 | + s = i && ((ExtArray.get tokens i2).orth = "." || (ExtArray.get tokens i2).orth = "...")) (Array.to_list paths) in | |
252 | + Array.fold_left (fun acc (i2,s,l) -> | |
253 | + acc && (ExtArray.get tokens i2).beg <= (ExtArray.get tokens id_dot).beg) true paths | |
254 | + with Not_found -> true in | |
213 | 255 | |
214 | 256 | let dot = if if_interps [0,"npun"] (ExtArray.get tokens id).token || if_the_last_dot () |
215 | 257 | then "" |
216 | 258 | else "." in |
217 | - | |
218 | 259 | let n_orth = (ExtArray.get tokens id).orth ^ dot in |
219 | 260 | paths.(i) <- (find_token n_orth,super,label) in |
220 | 261 | |
... | ... | @@ -317,6 +358,16 @@ let correct_interp_with_father_0 paths tokens = |
317 | 358 | then paths.(i1) <- (id1,0,label1)) paths) paths; |
318 | 359 | paths |
319 | 360 | |
361 | +let corect_complm paths tokens = | |
362 | + Array.iteri (fun i (id,super,label) -> | |
363 | + if label = "complm" && super > 0 | |
364 | + then | |
365 | + let i2,s2,l2 = paths.(super) in | |
366 | + if if_cat ["conj"] (ExtArray.get tokens i2).token | |
367 | + then change_dep paths i (id,super,label) | |
368 | + ) paths; | |
369 | + paths | |
370 | + | |
320 | 371 | let remove_interps interp paths tokens = |
321 | 372 | let paths_ls = Array.to_list paths in |
322 | 373 | Array.iteri (fun i (id,super,label) -> |
... | ... | @@ -339,10 +390,6 @@ let correct_passive_voice paths tokens = |
339 | 390 | paths |
340 | 391 | |
341 | 392 | let swap_dep paths tokens = |
342 | - let change_dep i (id,super,label) = | |
343 | - let id_S, super_S, label_S = paths.(super) in | |
344 | - paths.(i) <- (id,super_S,label); | |
345 | - paths.(super) <- (id_S, id, label_S) in | |
346 | 393 | let rec correct_dep i (id,super,label) = |
347 | 394 | let adv_relators = ["kto";"co";"ile";"czyj";"jaki";"który"; |
348 | 395 | "jak";"skąd";"dokąd";"gdzie";"którędy";"kiedy";"odkąd";"dlaczego";"czemu";"gdy"] in |
... | ... | @@ -356,7 +403,7 @@ let swap_dep paths tokens = |
356 | 403 | (if_lemma adv_relators (ExtArray.get tokens id).token && |
357 | 404 | if_cat ["fin"; "praet"; "winien"; "pred"; "imps"; "ppas"; "subst"] (ExtArray.get tokens super).token) |
358 | 405 | then |
359 | - change_dep i (id,super,label); | |
406 | + change_dep paths i (id,super,label); | |
360 | 407 | if (if_lemma adv_relators (ExtArray.get tokens id).token && |
361 | 408 | if_cat ["subst"; "pred"] (ExtArray.get tokens super).token) |
362 | 409 | then correct_dep i paths.(i) in |
... | ... | @@ -367,7 +414,11 @@ let swap_dep paths tokens = |
367 | 414 | nieobsługiwana na razie koordynacja strony biernej - zarówno czasowniki posiłkowe, jak i imiesłowy |
368 | 415 | nieobsługiwana na razie koordynacja podrzędników spójników podrzędnych *) |
369 | 416 | |
370 | -let convert_dep_tree id first_try paths tokens = | |
417 | +let convert_dep_tree path first_try paths tokens = | |
418 | + File.file_out (path ^ "/pre_text_unmodified_" ^ (string_of_bool first_try) ^ ".html") (fun file -> | |
419 | + Printf.fprintf file "%s\n" ENIAMvisualization.html_header; | |
420 | + Printf.fprintf file "%s\n" (ENIAMvisualization.html_of_dep_sentence tokens paths); | |
421 | + Printf.fprintf file "%s\n" ENIAMvisualization.html_trailer); | |
371 | 422 | let paths = Array.copy paths in |
372 | 423 | let paths = |
373 | 424 | if first_try |
... | ... | @@ -375,16 +426,27 @@ let convert_dep_tree id first_try paths tokens = |
375 | 426 | let pom = replace_tokens paths tokens in |
376 | 427 | let pom = (remove_interps ".") pom tokens in |
377 | 428 | let pom = replace_hyphens pom tokens in |
429 | + let pom = correct_injection pom tokens in | |
378 | 430 | let pom = correct_coordination1 pom tokens in |
379 | 431 | let pom = correct_interp_with_father_0 pom tokens in |
380 | - let pom = correct_coordination2 pom tokens in | |
381 | - let pom = remove_interps "," pom tokens in | |
432 | + (* File.file_out (path ^ "/pre_text_modified_" ^ (string_of_bool first_try) ^ ".html") (fun file -> | |
433 | + Printf.fprintf file "%s\n" ENIAMvisualization.html_header; | |
434 | + Printf.fprintf file "%s\n" (ENIAMvisualization.html_of_dep_sentence tokens paths); | |
435 | + Printf.fprintf file "%s\n" ENIAMvisualization.html_trailer); *) | |
436 | + let pom = try corect_complm pom tokens with | e -> print_endline (Printexc.to_string e); pom in | |
437 | + let pom = try | |
438 | + let pom2 = correct_coordination2 pom tokens in | |
439 | + remove_interps "," pom2 tokens | |
440 | + with | |
441 | + | _ -> (let pom2 = remove_interps "," pom tokens in | |
442 | + correct_coordination2 pom2 tokens) in | |
382 | 443 | let pom = correct_passive_voice pom tokens in |
383 | 444 | praet_qub_aglt pom tokens |
384 | 445 | else |
385 | - swap_dep paths tokens in | |
386 | - (* File.file_out ("results/" ^ id ^ "/pre_text_modified_" ^ (string_of_bool first_try) ^ ".html") (fun file -> | |
387 | - Printf.fprintf file "%s\n" Visualization.html_header; | |
388 | - Printf.fprintf file "%s\n" (Visualization.html_of_dep_sentence tokens paths); | |
389 | - Printf.fprintf file "%s\n" Visualization.html_trailer); *) | |
446 | + paths in | |
447 | + (* swap_dep paths tokens in *) | |
448 | + File.file_out (path ^ "/pre_text_modified_" ^ (string_of_bool first_try) ^ ".html") (fun file -> | |
449 | + Printf.fprintf file "%s\n" ENIAMvisualization.html_header; | |
450 | + Printf.fprintf file "%s\n" (ENIAMvisualization.html_of_dep_sentence tokens paths); | |
451 | + Printf.fprintf file "%s\n" ENIAMvisualization.html_trailer); | |
390 | 452 | paths |
... | ... |
corpora/makefile
... | ... | @@ -3,7 +3,7 @@ OCAMLOPT=ocamlopt |
3 | 3 | OCAMLDEP=ocamldep |
4 | 4 | INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam |
5 | 5 | OCAMLFLAGS=$(INCLUDES) -g |
6 | -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-integration.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa eniam-lexSemantics.cmxa | |
6 | +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-integration.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa eniam-lexSemantics.cmxa eniam-exec.cmxa | |
7 | 7 | INSTALLDIR=`ocamlc -where`/eniam |
8 | 8 | |
9 | 9 | SOURCES= types.ml CONLL.ml CONLL_adapter.ml resources.ml conllParser.ml interpsInCorpus.ml generate.ml |
... | ... |
corpora/test_conll.ml
... | ... | @@ -48,7 +48,7 @@ let clarify_categories senses token = |
48 | 48 | | ENIAMtokenizerTypes.Interp lemma -> ENIAMcategoriesPL.clarify_categories false senses (lemma,"interp",[]) |
49 | 49 | | _ -> [] |
50 | 50 | |
51 | -let create_chart tokens lex_sems paths last = | |
51 | +(* let create_chart tokens lex_sems paths last = | |
52 | 52 | ENIAM_LCGrenderer.reset_variable_numbers (); |
53 | 53 | let chart = ENIAM_LCGchart.make last in |
54 | 54 | let chart = Xlist.fold paths chart (fun chart (id,lnode,rnode) -> |
... | ... | @@ -59,7 +59,7 @@ let create_chart tokens lex_sems paths last = |
59 | 59 | let cats = clarify_categories ["X"] t in |
60 | 60 | let l = ENIAM_LCGlexicon.create_entries rules id t.ENIAMtokenizerTypes.orth cats s.ENIAMlexSemanticsTypes.schemata in |
61 | 61 | ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in |
62 | - chart | |
62 | + chart *) | |
63 | 63 | |
64 | 64 | let rec split_sons left id right = function |
65 | 65 | [] -> List.rev (List.sort compare left), List.sort compare right |
... | ... | @@ -85,7 +85,7 @@ let create_dep_chart tokens lex_sems paths = |
85 | 85 | ENIAM_LCGrenderer.reset_variable_names (); |
86 | 86 | ENIAM_LCGrenderer.add_variable_numbers (); |
87 | 87 | let cats = clarify_categories ["X"] t in |
88 | - let l = ENIAM_LCGlexicon.create_entries dep_rules id t.ENIAMtokenizerTypes.orth cats s.ENIAMlexSemanticsTypes.schemata in | |
88 | + let l = ENIAM_LCGlexicon.create_entries dep_rules id t.ENIAMtokenizerTypes.orth cats s.ENIAMlexSemanticsTypes.schemata s.ENIAMlexSemanticsTypes.lex_entries in | |
89 | 89 | IntMap.add nodes i l) in |
90 | 90 | (* print_endline "create_dep_chart 3"; *) |
91 | 91 | let x = dep_create_rec nodes sons 0 in |
... | ... | @@ -93,7 +93,7 @@ let create_dep_chart tokens lex_sems paths = |
93 | 93 | x |
94 | 94 | |
95 | 95 | |
96 | -let test_example path id tokens lex_sems paths last = | |
96 | +(* let test_example path id tokens lex_sems paths last = | |
97 | 97 | ENIAM_LCGreductions.reset_variant_label (); |
98 | 98 | let chart = create_chart tokens lex_sems paths last in |
99 | 99 | ENIAM_LCGlatexOf.print_chart path (id^"1_chart") "a1" chart; |
... | ... | @@ -119,43 +119,45 @@ let test_example path id tokens lex_sems paths last = |
119 | 119 | ENIAM_LCGgraphOf.print_simplified_dependency_tree path (id^"6_simple_dependency_tree") dependency_tree; |
120 | 120 | ()) |
121 | 121 | else print_endline "not reduced") |
122 | - else print_endline "not parsed" | |
122 | + else print_endline "not parsed" *) | |
123 | 123 | |
124 | -let test_dep_example path id tokens lex_sems paths = | |
124 | +let rec test_dep_example path id tokens lex_sems first_try paths = | |
125 | + let paths = CONLL_adapter.convert_dep_tree path first_try paths tokens in | |
125 | 126 | try |
126 | - ENIAM_LCGreductions.reset_variant_label (); | |
127 | - print_endline "test_dep_example 1"; | |
128 | - let paths = CONLL_adapter.convert_dep_tree id (*first_try*) true paths tokens in | |
129 | - print_endline "test_dep_example 2"; | |
130 | - (* ENIAMsubsyntaxHTMLof.print_dep_sentence path (id^"1_paths") tokens paths; *) | |
131 | - let chart = create_dep_chart tokens lex_sems paths in | |
132 | - (* ENIAM_LCGlatexOf.print_dep_chart path (id^"1_chart") "a1" chart; *) | |
133 | - let chart,references = ENIAM_LCGchart.dep_lazify chart in | |
134 | - (* ENIAM_LCGlatexOf.print_dep_chart path (id^"2_chart") "a4" chart; *) | |
135 | - (* ENIAM_LCGlatexOf.print_references path (id^"2_references") "a4" references; *) | |
136 | - let chart = ENIAM_LCGchart.dep_parse chart references 30. Sys.time in (* uwaga: niejawna zmiana imperatywna w references *) | |
137 | - (* ENIAM_LCGlatexOf.print_chart path (id^"3_chart") "a4" chart; *) | |
138 | - (* ENIAM_LCGlatexOf.print_references path (id^"3_references") "a4" references; *) | |
139 | - if ENIAM_LCGchart.is_dep_parsed chart then ( | |
140 | - let term = ENIAM_LCGchart.get_dep_parsed_term chart in | |
141 | - (* Xlatex.latex_file_out path (id^"4_term") "a4" false (fun file -> | |
142 | - Printf.fprintf file "\\[%s\\]\n" (ENIAM_LCGlatexOf.linear_term 0 term)); | |
143 | - Xlatex.latex_compile_and_clean path (id^"4_term"); *) | |
144 | - let dependency_tree = ENIAM_LCGreductions.reduce term references in | |
145 | - (* ENIAM_LCGlatexOf.print_dependency_tree path (id^"4_dependency_tree") "a0" dependency_tree; *) | |
146 | - if ENIAM_LCGreductions.is_reduced_dependency_tree dependency_tree then ( | |
147 | - ENIAM_LCGreductions.assign_labels dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *) | |
148 | - (* ENIAM_LCGlatexOf.print_dependency_tree path (id^"5_dependency_tree") "a4" dependency_tree; *) | |
149 | - ENIAM_LCGreductions.remove_cuts dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *) | |
150 | - (* ENIAM_LCGlatexOf.print_dependency_tree path (id^"6_dependency_tree") "a4" dependency_tree; *) | |
151 | - (* ENIAM_LCGgraphOf.print_dependency_tree path (id^"6_dependency_tree") dependency_tree; *) | |
152 | - (* ENIAM_LCGgraphOf.print_simplified_dependency_tree path (id^"6_simple_dependency_tree") dependency_tree; *) | |
153 | - ()) | |
154 | - else print_endline "not reduced") | |
155 | - else print_endline "not parsed" | |
127 | + ENIAM_LCGreductions.reset_variant_label (); | |
128 | + print_endline "test_dep_example 1"; | |
129 | + print_endline "test_dep_example 2"; | |
130 | + (* ENIAMsubsyntaxHTMLof.print_dep_sentence path (id^"1_paths") tokens paths; *) | |
131 | + let chart = create_dep_chart tokens lex_sems paths in | |
132 | + (* ENIAM_LCGlatexOf.print_dep_chart path (id^"1_chart") "a1" chart; *) | |
133 | + let chart,references = ENIAM_LCGchart.dep_lazify chart in | |
134 | + (* ENIAM_LCGlatexOf.print_dep_chart path (id^"2_chart") "a4" chart; *) | |
135 | + (* ENIAM_LCGlatexOf.print_references path (id^"2_references") "a4" references; *) | |
136 | + let chart = ENIAM_LCGchart.dep_parse chart references 30. Sys.time in (* uwaga: niejawna zmiana imperatywna w references *) | |
137 | + (* ENIAM_LCGlatexOf.print_chart path (id^"3_chart") "a4" chart; *) | |
138 | + (* ENIAM_LCGlatexOf.print_references path (id^"3_references") "a4" references; *) | |
139 | + if ENIAM_LCGchart.is_dep_parsed chart then ( | |
140 | + let term = ENIAM_LCGchart.get_dep_parsed_term chart in | |
141 | + (* Xlatex.latex_file_out path (id^"4_term") "a4" false (fun file -> | |
142 | + Printf.fprintf file "\\[%s\\]\n" (ENIAM_LCGlatexOf.linear_term 0 term)); | |
143 | + Xlatex.latex_compile_and_clean path (id^"4_term"); *) | |
144 | + let dependency_tree = ENIAM_LCGreductions.reduce term references in | |
145 | + (* ENIAM_LCGlatexOf.print_dependency_tree path (id^"4_dependency_tree") "a0" dependency_tree; *) | |
146 | + if ENIAM_LCGreductions.is_reduced_dependency_tree dependency_tree then ( | |
147 | + ENIAM_LCGreductions.assign_labels dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *) | |
148 | + (* ENIAM_LCGlatexOf.print_dependency_tree path (id^"5_dependency_tree") "a4" dependency_tree; *) | |
149 | + ENIAM_LCGreductions.remove_cuts dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *) | |
150 | + (* ENIAM_LCGlatexOf.print_dependency_tree path (id^"6_dependency_tree") "a4" dependency_tree; *) | |
151 | + (* ENIAM_LCGgraphOf.print_dependency_tree path (id^"6_dependency_tree") dependency_tree; *) | |
152 | + (* ENIAM_LCGgraphOf.print_simplified_dependency_tree path (id^"6_simple_dependency_tree") dependency_tree; *) | |
153 | + ()) | |
154 | + else print_endline "not reduced") | |
155 | + else print_endline "not parsed" | |
156 | 156 | with NotDepParsed(id_ndp,left,l,right) -> ( |
157 | - print_endline "not parsed 2"; | |
158 | - ENIAM_LCGlatexOf.print_not_parsed_dep_chart path (id^"3_not_parsed_chart") "a2" (id_ndp,left,l,right)) | |
157 | + if (first_try) | |
158 | + then test_dep_example path id tokens lex_sems false paths | |
159 | + else (print_endline "not parsed 2"; | |
160 | + ENIAM_LCGlatexOf.print_not_parsed_dep_chart path (id^"3_not_parsed_chart") "a2" (id_ndp,left,l,right))) | |
159 | 161 | |
160 | 162 | let rec parse_sentence name id tokens lex_sems = function |
161 | 163 | RawSentence s -> id |
... | ... | @@ -163,7 +165,7 @@ let rec parse_sentence name id tokens lex_sems = function |
163 | 165 | (* test_example ("results/" ^ name^"/") (string_of_int id ^ "_") tokens lex_sems paths last; *) |
164 | 166 | id + 1 |
165 | 167 | | DepSentence(paths) -> |
166 | - test_dep_example ("results/" ^ name ^ "/") (string_of_int id ^ "_") tokens lex_sems paths; | |
168 | + test_dep_example ("results/" ^ name ^ "/") (string_of_int id ^ "_") tokens lex_sems true paths; | |
167 | 169 | id + 1 |
168 | 170 | | QuotedSentences sentences -> |
169 | 171 | Xlist.fold sentences id (fun id p -> |
... | ... | @@ -212,8 +214,8 @@ let process_id s = |
212 | 214 | else failwith ("process_id: " ^ s) |
213 | 215 | |
214 | 216 | let process_conll_corpus filename = |
215 | - let corpus = File.file_in filename (fun file -> CONLL.match_corpus (ENIAM_CONLL.load_corpus file)) in | |
216 | - print_endline "process_conll_corpus"; | |
217 | + let corpus = File.file_in filename (fun file -> CONLL.match_corpus (CONLL.load_corpus file)) in | |
218 | + print_endline "process_conll_corpus"; | |
217 | 219 | (* let corpus = [List.hd corpus] in *) |
218 | 220 | Xlist.iter corpus (fun query -> try |
219 | 221 | let id = process_id (get_query_id query) in |
... | ... | @@ -244,5 +246,5 @@ let _ = |
244 | 246 | (* LCGfields.reset (); *) |
245 | 247 | (* process_conll_corpus "../../NLP resources/skladnica_zaleznosciowa.conll"; *) |
246 | 248 | (* process_conll_corpus "../testy/skladnica-test1.conll"; *) |
247 | - process_conll_corpus "../testy/skladnica-test1-Failure.conll"; | |
249 | + process_conll_corpus "../testy/skladnica-test1-Find_father.conll"; | |
248 | 250 | (* LCGfields.print_results () *) |
... | ... |
testy/skladnica-test1-Not_parsed.conll
0 → 100644
1 | +1 Cmentarz cmentarz subst subst sg|nom|m3 2 subj _ _ | |
2 | +2 jest być fin fin sg|ter|imperf 0 pred _ _ | |
3 | +3 taki taki adj adj sg|nom|m3|pos 4 adjunct _ _ | |
4 | +4 pusty pusty adj adj sg|nom|m3|pos 2 pd _ _ | |
5 | +5 ! ! interp interp _ 2 punct _ _ | |
6 | + | |
7 | +1 Mówi mówić fin fin sg|ter|imperf 0 pred _ _ | |
8 | +2 się się qub qub _ 1 refl _ _ | |
9 | +3 przecież przecież qub qub _ 1 adjunct _ _ | |
10 | +4 , , interp interp _ 7 punct _ _ | |
11 | +5 że że comp comp _ 7 complm _ _ | |
12 | +6 broń broń subst subst sg|nom|f 7 subj _ _ | |
13 | +7 była być praet praet sg|f|imperf 1 comp_fin _ _ | |
14 | +8 w w prep prep loc|nwok 7 adjunct _ _ | |
15 | +9 szkole szkoła subst subst sg|loc|f 8 comp _ _ | |
16 | +10 schowana schować ppas ppas sg|nom|f|perf|aff 7 pd _ _ | |
17 | +11 jeszcze jeszcze qub qub _ 12 adjunct _ _ | |
18 | +12 latem lato subst subst sg|inst|n 7 adjunct _ _ | |
19 | +13 w w prep prep loc|nwok 12 adjunct _ _ | |
20 | +14 czasie czas subst subst sg|loc|m3 13 mwe _ _ | |
21 | +15 remontu remont subst subst sg|gen|m3 14 comp _ _ | |
22 | +16 . . interp interp _ 1 punct _ _ | |
23 | + | |
24 | +1 Bo bo comp comp _ 9 adjunct _ _ | |
25 | +2 jak jak adv adv _ 9 adjunct _ _ | |
26 | +3 ona on ppron3 ppron3 sg|nom|f|ter|akc|npraep 9 subj _ _ | |
27 | +4 , , interp interp _ 3 punct _ _ | |
28 | +5 chora chory adj adj sg|nom|f|pos 3 adjunct _ _ | |
29 | +6 na na prep prep acc 5 adjunct _ _ | |
30 | +7 cukrzycę cukrzyca subst subst sg|acc|f 6 comp _ _ | |
31 | +8 , , interp interp _ 3 punct _ _ | |
32 | +9 przeżyła przeżyć praet praet sg|f|perf 0 pred _ _ | |
33 | +10 trzy trzy num num pl|acc|m3|congr 9 obj _ _ | |
34 | +11 dni dzień subst subst pl|acc|m3 10 comp _ _ | |
35 | +12 bez bez prep prep gen|nwok 9 comp _ _ | |
36 | +13 wody woda subst subst sg|gen|f 14 conjunct _ _ | |
37 | +14 i i conj conj _ 12 comp _ _ | |
38 | +15 jedzenia jedzenie subst subst sg|gen|n 14 conjunct _ _ | |
39 | +16 ? ? interp interp _ 9 punct _ _ | |
40 | + | |
41 | +1 Jednak jednak qub qub _ 9 adjunct _ _ | |
42 | +2 już już qub qub _ 3 adjunct _ _ | |
43 | +3 wkrótce wkrótce adv adv _ 9 adjunct _ _ | |
44 | +4 Nizioł Nizioł subst subst sg|nom|m1 5 conjunct _ _ | |
45 | +5 i i conj conj _ 9 subj _ _ | |
46 | +6 Wapiński Wapiński subst subst sg|nom|m1 5 conjunct _ _ | |
47 | +7 ze z prep prep inst|wok 9 adjunct _ _ | |
48 | +8 zdumieniem zdumienie subst subst sg|inst|n 7 comp _ _ | |
49 | +9 odkryli odkryć praet praet pl|m1|perf 0 pred _ _ | |
50 | +10 , , interp interp _ 14 punct _ _ | |
51 | +11 że że comp comp _ 14 complm _ _ | |
52 | +12 Łapiński Łapiński subst subst sg|nom|m1 14 subj _ _ | |
53 | +13 nie nie qub qub _ 14 neg _ _ | |
54 | +14 dotrzymuje dotrzymywać fin fin sg|ter|imperf 9 comp_fin _ _ | |
55 | +15 wcześniej wcześnie adv adv com 16 adjunct _ _ | |
56 | +16 danego dać ppas ppas sg|gen|n|perf|aff 17 adjunct _ _ | |
57 | +17 słowa słowo subst subst sg|gen|n 14 obj _ _ | |
58 | +18 . . interp interp _ 9 punct _ _ | |
59 | + | |
60 | +1 A a qub qub _ 8 adjunct _ _ | |
61 | +2 pan pan subst subst sg|nom|m1 8 subj _ _ | |
62 | +3 nigdy nigdy adv adv _ 8 adjunct _ _ | |
63 | +4 się się qub qub _ 8 refl _ _ | |
64 | +5 z z prep prep inst|nwok 8 comp _ _ | |
65 | +6 nimi on ppron3 ppron3 pl|inst|m1|ter|akc|praep 5 comp _ _ | |
66 | +7 nie nie qub qub _ 8 neg _ _ | |
67 | +8 zetknął zetknąć praet praet sg|m1|perf 0 pred _ _ | |
68 | +9 ? ? interp interp _ 8 punct _ _ | |
69 | + | |
70 | +1 Załapać załapać inf inf perf 3 comp_inf _ _ | |
71 | +2 się się qub qub _ 1 refl _ _ | |
72 | +3 trzeba trzeba pred pred _ 0 pred _ _ | |
73 | +4 teraz teraz adv adv _ 3 adjunct _ _ | |
74 | +5 , , interp interp _ 3 punct _ _ | |
75 | +6 bo bo comp comp _ 3 adjunct _ _ | |
76 | +7 potem potem adv adv _ 8 adjunct _ _ | |
77 | +8 będzie być bedzie bedzie sg|ter|imperf 6 comp_fin _ _ | |
78 | +9 trudniej trudno adv adv com 8 pd _ _ | |
79 | +10 . . interp interp _ 3 punct _ _ | |
80 | + | |
81 | +1 Medykamenty medykament subst subst pl|nom|m3 4 subj _ _ | |
82 | +2 współczesne współczesny adj adj pl|nom|m3|pos 1 adjunct _ _ | |
83 | +3 dostępne dostępny adj adj pl|nom|m3|pos 4 pd _ _ | |
84 | +4 są być fin fin pl|ter|imperf 0 pred _ _ | |
85 | +5 na na prep prep loc 4 adjunct _ _ | |
86 | +6 czarnym czarny adj adj sg|loc|m3|pos 7 adjunct _ _ | |
87 | +7 rynku rynek subst subst sg|loc|m3 5 comp _ _ | |
88 | +8 . . interp interp _ 4 punct _ _ | |
89 | + | |
90 | +1 To to subst subst sg|nom|n 3 subj _ _ | |
91 | +2 samo sam adj adj sg|nom|n|pos 1 adjunct _ _ | |
92 | +3 dotyczy dotyczyć fin fin sg|ter|imperf 5 conjunct _ _ | |
93 | +4 leczenia leczenie subst subst sg|gen|n 3 obj_th _ _ | |
94 | +5 , , interp interp _ 0 coord_punct _ _ | |
95 | +6 służba służba subst subst sg|nom|f 9 subj _ _ | |
96 | +7 zdrowia zdrowie subst subst sg|gen|n 6 adjunct _ _ | |
97 | +8 praktycznie praktycznie adv adv pos 9 adjunct _ _ | |
98 | +9 przestała przestać praet praet sg|f|perf 5 conjunct _ _ | |
99 | +10 istnieć istnieć inf inf imperf 9 comp_inf _ _ | |
100 | +11 . . interp interp _ 5 punct _ _ | |
101 | + | |
102 | +1 Zwykły zwykły adj adj sg|nom|m1|pos 2 adjunct _ _ | |
103 | +2 mieszkaniec mieszkaniec subst subst sg|nom|m1 4 subj _ _ | |
104 | +3 kraju kraj subst subst sg|gen|m3 2 adjunct _ _ | |
105 | +4 ma mieć fin fin sg|ter|imperf 0 pred _ _ | |
106 | +5 się się qub qub _ 6 refl _ _ | |
107 | +6 leczyć leczyć inf inf imperf 4 comp_inf _ _ | |
108 | +7 ziołami ziele subst subst pl|inst|n 6 obj_th _ _ | |
109 | +8 , , interp interp _ 10 punct _ _ | |
110 | +9 które który adj adj pl|acc|n|pos 10 obj _ _ | |
111 | +10 zaleca zalecać fin fin sg|ter|imperf 7 adjunct _ _ | |
112 | +11 tradycyjna tradycyjny adj adj sg|nom|f|pos 12 adjunct _ _ | |
113 | +12 medycyna medycyna subst subst sg|nom|f 10 subj _ _ | |
114 | +13 koreańska koreański adj adj sg|nom|f|pos 12 adjunct _ _ | |
115 | +14 . . interp interp _ 4 punct _ _ | |
... | ... |