Commit 66045a355a6348d65ec1d81fbaf26b619ec763e6

Authored by Daniel Oklesiński
1 parent 76519478

poprawione usuwanie przecinków we wtrąceniach + zdania do testowania

corpora/CONLL.ml
... ... @@ -220,13 +220,14 @@ let match_corpus corpus =
220 220  
221 221 (******************)
222 222  
  223 +exception Comment_line
223 224 exception Empty_line
224 225 exception Empty_sentence
225 226 exception Id_line of string
226 227  
227 228 let load_token in_channel =
228 229 let fail line =
229   - (* failwith ("load_token: " ^ line) *)
  230 + print_endline ("load_token: " ^ line);
230 231 () in
231 232 let int_of_super = function
232 233 "_" -> -1
... ... @@ -247,7 +248,8 @@ let load_token in_channel =
247 248 else if Xstring.check_prefix "# trees/" line && Xstring.check_sufix ".xml.tree" line
248 249 then let id = Xstring.cut_prefix "# trees/" @@ Xstring.cut_sufix ".xml.tree" line in
249 250 raise (Id_line id)
250   - else failwith ("load_token: " ^ line)
  251 + else raise Comment_line
  252 + (* failwith ("load_token: " ^ line) *)
251 253 else
252 254 match Xstring.split "\t" line with
253 255 [id; orth; lemma; cat; cat2; interp; super; label; "_"; "_"] ->
... ... @@ -272,6 +274,7 @@ let load_sentence in_channel =
272 274 if id_a <> conll_id then failwith "load_sentence: different ids" else
273 275 pom ((id_a,super,label) :: rev_paths) id
274 276 with Id_line new_id -> pom rev_paths new_id
  277 + | Comment_line -> pom rev_paths id
275 278 | Empty_line -> rev_paths, id
276 279 | End_of_file -> if rev_paths = []
277 280 then raise End_of_file
... ...
corpora/CONLL_adapter.ml
... ... @@ -42,6 +42,34 @@ let if_interps interps token =
42 42 ) interp in
43 43 Xlist.fold interps true (fun acc (nr,value) -> acc && (if_interp nr value))
44 44  
  45 +let change_dep paths i (id,super,label) =
  46 + let id_S, super_S, label_S = paths.(super) in
  47 + paths.(i) <- (id,super_S,label);
  48 + paths.(super) <- (id_S, id, label_S)
  49 +
  50 +let correct_injection paths tokens = Array.iteri (fun i (id,super,label) ->
  51 + if label = "punct" then (*musi być pierwszym tokenem o tym ojcu*)
  52 + let j = Int.fold (i+1) (Array.length paths - 1) 0 (fun acc n ->
  53 + let i2,s2,l2 = paths.(n) in
  54 + if super = s2
  55 + then if l2 = "punct"
  56 + then n
  57 + else 0
  58 + else acc
  59 + ) in
  60 + let k = Int.fold_down (i-1) 1 i (fun acc n ->
  61 + let i2,s2,l2 = paths.(n) in
  62 + if super = s2
  63 + then 0
  64 + else acc
  65 + ) in
  66 + if k == i && j <> 0 && i < super && super < j
  67 + then
  68 + (paths.(i) <- (0,-1,"");
  69 + paths.(j) <- (0,-1,""))
  70 + ) paths;
  71 + paths
  72 +
45 73 let correct_coordination1 paths tokens =
46 74 let paths_ls = List.mapi (fun i (id,super,label) ->
47 75 (i,id,super,label)) (Array.to_list paths) in
... ... @@ -136,15 +164,15 @@ let correct_coordination2 paths tokens =
136 164 let paths_ls () = List.mapi (fun i (id,super,label) ->
137 165 (i,id,super,label)) (Array.to_list paths_c) in
138 166  
139   - (* let ps a sons =
  167 + let ps a sons =
140 168 print_endline a;
141 169 List.iter (fun (i,_,_,_) -> print_endline (ExtArray.get tokens i).orth) sons;
142   - print_endline "" in *)
  170 + print_endline "" in
143 171  
144 172 let rec correct_rec (i,id,super,label) sons =
145 173 let left_s, right_s = List.partition (fun (a,b,c,d) -> a < i) sons in
146   - (* ps "left:" (List.rev left_s);
147   - ps "right:" right_s; *)
  174 + ps "left:" (List.rev left_s);
  175 + ps "right:" right_s;
148 176 find_father i (List.rev left_s);
149 177 find_father i right_s
150 178  
... ... @@ -154,23 +182,35 @@ let correct_coordination2 paths tokens =
154 182 paths_c.(i) <- (id,i0,label);
155 183 if not (if_cat ["conj"] (ExtArray.get tokens i).token ||
156 184 (ExtArray.get tokens i).orth = ",")
157   - then failwith "find_father";
  185 + then failwith "find_father1";
158 186 correct_rec (i,id,super,label) (if a < i
159 187 then (a,b,c,d) :: t
160 188 else List.rev @@ (a,b,c,d) :: t)
161   - | _ -> failwith "find_father" in
  189 + | [] -> failwith "find_father2" in
162 190  
163 191 let check_previous_for_interp i =
164 192 if i >= 0 && (ExtArray.get tokens i).orth = "," &&
165 193 not (List.exists (fun (_,super,_) -> super = i) (Array.to_list paths_c))
166 194 then paths_c.(i) <- (0,-1,"") in
167 195  
  196 + let filter_comp_construction sons =
  197 + let rec pom acc = function
  198 + (i1,id1,super1,label1) :: (i2,id2,super2,label2) :: t ->
  199 + if if_cat ["interp"] (ExtArray.get tokens i1).token &&
  200 + if_cat ["comp"] (ExtArray.get tokens i2).token
  201 + then pom acc t
  202 + else pom ((i1,id1,super1,label1) :: acc) ((i2,id2,super2,label2) :: t)
  203 + | h :: t -> pom (h :: acc) t
  204 + | [] -> List.rev acc in
  205 + pom [] sons in
  206 +
168 207 Array.iteri (fun i (id,super,label) ->
169 208 if if_cat ["conj"] (ExtArray.get tokens i).token ||
170 209 (ExtArray.get tokens i).orth = ","
171 210 then
172 211 (check_previous_for_interp (i-1);
173 212 let sons = List.filter (fun (_,_,super,_) -> super = i) (paths_ls ()) in
  213 + (* let sons = filter_comp_construction sons in *)
174 214 if (List.length sons > 2)
175 215 then correct_rec (i,id,super,label) sons)) paths_c;
176 216 paths_c
... ... @@ -206,15 +246,16 @@ done; *)
206 246  
207 247 let brev i id super label =
208 248 let if_the_last_dot () =
209   - let (id_dot, s_dot, l_dot) = List.find (fun (i2,s,l) ->
210   - s = i && ((ExtArray.get tokens i2).orth = "." || (ExtArray.get tokens i2).orth = "...")) (Array.to_list paths) in
211   - Array.fold_left (fun acc (i2,s,l) ->
212   - acc && (ExtArray.get tokens i2).beg <= (ExtArray.get tokens id_dot).beg) true paths in
  249 + try
  250 + let (id_dot, s_dot, l_dot) = List.find (fun (i2,s,l) ->
  251 + s = i && ((ExtArray.get tokens i2).orth = "." || (ExtArray.get tokens i2).orth = "...")) (Array.to_list paths) in
  252 + Array.fold_left (fun acc (i2,s,l) ->
  253 + acc && (ExtArray.get tokens i2).beg <= (ExtArray.get tokens id_dot).beg) true paths
  254 + with Not_found -> true in
213 255  
214 256 let dot = if if_interps [0,"npun"] (ExtArray.get tokens id).token || if_the_last_dot ()
215 257 then ""
216 258 else "." in
217   -
218 259 let n_orth = (ExtArray.get tokens id).orth ^ dot in
219 260 paths.(i) <- (find_token n_orth,super,label) in
220 261  
... ... @@ -317,6 +358,16 @@ let correct_interp_with_father_0 paths tokens =
317 358 then paths.(i1) <- (id1,0,label1)) paths) paths;
318 359 paths
319 360  
  361 +let corect_complm paths tokens =
  362 + Array.iteri (fun i (id,super,label) ->
  363 + if label = "complm" && super > 0
  364 + then
  365 + let i2,s2,l2 = paths.(super) in
  366 + if if_cat ["conj"] (ExtArray.get tokens i2).token
  367 + then change_dep paths i (id,super,label)
  368 + ) paths;
  369 + paths
  370 +
320 371 let remove_interps interp paths tokens =
321 372 let paths_ls = Array.to_list paths in
322 373 Array.iteri (fun i (id,super,label) ->
... ... @@ -339,10 +390,6 @@ let correct_passive_voice paths tokens =
339 390 paths
340 391  
341 392 let swap_dep paths tokens =
342   - let change_dep i (id,super,label) =
343   - let id_S, super_S, label_S = paths.(super) in
344   - paths.(i) <- (id,super_S,label);
345   - paths.(super) <- (id_S, id, label_S) in
346 393 let rec correct_dep i (id,super,label) =
347 394 let adv_relators = ["kto";"co";"ile";"czyj";"jaki";"który";
348 395 "jak";"skąd";"dokąd";"gdzie";"którędy";"kiedy";"odkąd";"dlaczego";"czemu";"gdy"] in
... ... @@ -356,7 +403,7 @@ let swap_dep paths tokens =
356 403 (if_lemma adv_relators (ExtArray.get tokens id).token &&
357 404 if_cat ["fin"; "praet"; "winien"; "pred"; "imps"; "ppas"; "subst"] (ExtArray.get tokens super).token)
358 405 then
359   - change_dep i (id,super,label);
  406 + change_dep paths i (id,super,label);
360 407 if (if_lemma adv_relators (ExtArray.get tokens id).token &&
361 408 if_cat ["subst"; "pred"] (ExtArray.get tokens super).token)
362 409 then correct_dep i paths.(i) in
... ... @@ -367,7 +414,11 @@ let swap_dep paths tokens =
367 414 nieobsługiwana na razie koordynacja strony biernej - zarówno czasowniki posiłkowe, jak i imiesłowy
368 415 nieobsługiwana na razie koordynacja podrzędników spójników podrzędnych *)
369 416  
370   -let convert_dep_tree id first_try paths tokens =
  417 +let convert_dep_tree path first_try paths tokens =
  418 + File.file_out (path ^ "/pre_text_unmodified_" ^ (string_of_bool first_try) ^ ".html") (fun file ->
  419 + Printf.fprintf file "%s\n" ENIAMvisualization.html_header;
  420 + Printf.fprintf file "%s\n" (ENIAMvisualization.html_of_dep_sentence tokens paths);
  421 + Printf.fprintf file "%s\n" ENIAMvisualization.html_trailer);
371 422 let paths = Array.copy paths in
372 423 let paths =
373 424 if first_try
... ... @@ -375,16 +426,27 @@ let convert_dep_tree id first_try paths tokens =
375 426 let pom = replace_tokens paths tokens in
376 427 let pom = (remove_interps ".") pom tokens in
377 428 let pom = replace_hyphens pom tokens in
  429 + let pom = correct_injection pom tokens in
378 430 let pom = correct_coordination1 pom tokens in
379 431 let pom = correct_interp_with_father_0 pom tokens in
380   - let pom = correct_coordination2 pom tokens in
381   - let pom = remove_interps "," pom tokens in
  432 + (* File.file_out (path ^ "/pre_text_modified_" ^ (string_of_bool first_try) ^ ".html") (fun file ->
  433 + Printf.fprintf file "%s\n" ENIAMvisualization.html_header;
  434 + Printf.fprintf file "%s\n" (ENIAMvisualization.html_of_dep_sentence tokens paths);
  435 + Printf.fprintf file "%s\n" ENIAMvisualization.html_trailer); *)
  436 + let pom = try corect_complm pom tokens with | e -> print_endline (Printexc.to_string e); pom in
  437 + let pom = try
  438 + let pom2 = correct_coordination2 pom tokens in
  439 + remove_interps "," pom2 tokens
  440 + with
  441 + | _ -> (let pom2 = remove_interps "," pom tokens in
  442 + correct_coordination2 pom2 tokens) in
382 443 let pom = correct_passive_voice pom tokens in
383 444 praet_qub_aglt pom tokens
384 445 else
385   - swap_dep paths tokens in
386   - (* File.file_out ("results/" ^ id ^ "/pre_text_modified_" ^ (string_of_bool first_try) ^ ".html") (fun file ->
387   - Printf.fprintf file "%s\n" Visualization.html_header;
388   - Printf.fprintf file "%s\n" (Visualization.html_of_dep_sentence tokens paths);
389   - Printf.fprintf file "%s\n" Visualization.html_trailer); *)
  446 + paths in
  447 + (* swap_dep paths tokens in *)
  448 + File.file_out (path ^ "/pre_text_modified_" ^ (string_of_bool first_try) ^ ".html") (fun file ->
  449 + Printf.fprintf file "%s\n" ENIAMvisualization.html_header;
  450 + Printf.fprintf file "%s\n" (ENIAMvisualization.html_of_dep_sentence tokens paths);
  451 + Printf.fprintf file "%s\n" ENIAMvisualization.html_trailer);
390 452 paths
... ...
corpora/makefile
... ... @@ -3,7 +3,7 @@ OCAMLOPT=ocamlopt
3 3 OCAMLDEP=ocamldep
4 4 INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam
5 5 OCAMLFLAGS=$(INCLUDES) -g
6   -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-integration.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa eniam-lexSemantics.cmxa
  6 +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-integration.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa eniam-lexSemantics.cmxa eniam-exec.cmxa
7 7 INSTALLDIR=`ocamlc -where`/eniam
8 8  
9 9 SOURCES= types.ml CONLL.ml CONLL_adapter.ml resources.ml conllParser.ml interpsInCorpus.ml generate.ml
... ...
corpora/test_conll.ml
... ... @@ -48,7 +48,7 @@ let clarify_categories senses token =
48 48 | ENIAMtokenizerTypes.Interp lemma -> ENIAMcategoriesPL.clarify_categories false senses (lemma,"interp",[])
49 49 | _ -> []
50 50  
51   -let create_chart tokens lex_sems paths last =
  51 +(* let create_chart tokens lex_sems paths last =
52 52 ENIAM_LCGrenderer.reset_variable_numbers ();
53 53 let chart = ENIAM_LCGchart.make last in
54 54 let chart = Xlist.fold paths chart (fun chart (id,lnode,rnode) ->
... ... @@ -59,7 +59,7 @@ let create_chart tokens lex_sems paths last =
59 59 let cats = clarify_categories ["X"] t in
60 60 let l = ENIAM_LCGlexicon.create_entries rules id t.ENIAMtokenizerTypes.orth cats s.ENIAMlexSemanticsTypes.schemata in
61 61 ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in
62   - chart
  62 + chart *)
63 63  
64 64 let rec split_sons left id right = function
65 65 [] -> List.rev (List.sort compare left), List.sort compare right
... ... @@ -85,7 +85,7 @@ let create_dep_chart tokens lex_sems paths =
85 85 ENIAM_LCGrenderer.reset_variable_names ();
86 86 ENIAM_LCGrenderer.add_variable_numbers ();
87 87 let cats = clarify_categories ["X"] t in
88   - let l = ENIAM_LCGlexicon.create_entries dep_rules id t.ENIAMtokenizerTypes.orth cats s.ENIAMlexSemanticsTypes.schemata in
  88 + let l = ENIAM_LCGlexicon.create_entries dep_rules id t.ENIAMtokenizerTypes.orth cats s.ENIAMlexSemanticsTypes.schemata s.ENIAMlexSemanticsTypes.lex_entries in
89 89 IntMap.add nodes i l) in
90 90 (* print_endline "create_dep_chart 3"; *)
91 91 let x = dep_create_rec nodes sons 0 in
... ... @@ -93,7 +93,7 @@ let create_dep_chart tokens lex_sems paths =
93 93 x
94 94  
95 95  
96   -let test_example path id tokens lex_sems paths last =
  96 +(* let test_example path id tokens lex_sems paths last =
97 97 ENIAM_LCGreductions.reset_variant_label ();
98 98 let chart = create_chart tokens lex_sems paths last in
99 99 ENIAM_LCGlatexOf.print_chart path (id^"1_chart") "a1" chart;
... ... @@ -119,43 +119,45 @@ let test_example path id tokens lex_sems paths last =
119 119 ENIAM_LCGgraphOf.print_simplified_dependency_tree path (id^"6_simple_dependency_tree") dependency_tree;
120 120 ())
121 121 else print_endline "not reduced")
122   - else print_endline "not parsed"
  122 + else print_endline "not parsed" *)
123 123  
124   -let test_dep_example path id tokens lex_sems paths =
  124 +let rec test_dep_example path id tokens lex_sems first_try paths =
  125 + let paths = CONLL_adapter.convert_dep_tree path first_try paths tokens in
125 126 try
126   - ENIAM_LCGreductions.reset_variant_label ();
127   - print_endline "test_dep_example 1";
128   - let paths = CONLL_adapter.convert_dep_tree id (*first_try*) true paths tokens in
129   - print_endline "test_dep_example 2";
130   - (* ENIAMsubsyntaxHTMLof.print_dep_sentence path (id^"1_paths") tokens paths; *)
131   - let chart = create_dep_chart tokens lex_sems paths in
132   - (* ENIAM_LCGlatexOf.print_dep_chart path (id^"1_chart") "a1" chart; *)
133   - let chart,references = ENIAM_LCGchart.dep_lazify chart in
134   - (* ENIAM_LCGlatexOf.print_dep_chart path (id^"2_chart") "a4" chart; *)
135   - (* ENIAM_LCGlatexOf.print_references path (id^"2_references") "a4" references; *)
136   - let chart = ENIAM_LCGchart.dep_parse chart references 30. Sys.time in (* uwaga: niejawna zmiana imperatywna w references *)
137   - (* ENIAM_LCGlatexOf.print_chart path (id^"3_chart") "a4" chart; *)
138   - (* ENIAM_LCGlatexOf.print_references path (id^"3_references") "a4" references; *)
139   - if ENIAM_LCGchart.is_dep_parsed chart then (
140   - let term = ENIAM_LCGchart.get_dep_parsed_term chart in
141   - (* Xlatex.latex_file_out path (id^"4_term") "a4" false (fun file ->
142   - Printf.fprintf file "\\[%s\\]\n" (ENIAM_LCGlatexOf.linear_term 0 term));
143   - Xlatex.latex_compile_and_clean path (id^"4_term"); *)
144   - let dependency_tree = ENIAM_LCGreductions.reduce term references in
145   - (* ENIAM_LCGlatexOf.print_dependency_tree path (id^"4_dependency_tree") "a0" dependency_tree; *)
146   - if ENIAM_LCGreductions.is_reduced_dependency_tree dependency_tree then (
147   - ENIAM_LCGreductions.assign_labels dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
148   - (* ENIAM_LCGlatexOf.print_dependency_tree path (id^"5_dependency_tree") "a4" dependency_tree; *)
149   - ENIAM_LCGreductions.remove_cuts dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
150   - (* ENIAM_LCGlatexOf.print_dependency_tree path (id^"6_dependency_tree") "a4" dependency_tree; *)
151   - (* ENIAM_LCGgraphOf.print_dependency_tree path (id^"6_dependency_tree") dependency_tree; *)
152   - (* ENIAM_LCGgraphOf.print_simplified_dependency_tree path (id^"6_simple_dependency_tree") dependency_tree; *)
153   - ())
154   - else print_endline "not reduced")
155   - else print_endline "not parsed"
  127 + ENIAM_LCGreductions.reset_variant_label ();
  128 + print_endline "test_dep_example 1";
  129 + print_endline "test_dep_example 2";
  130 + (* ENIAMsubsyntaxHTMLof.print_dep_sentence path (id^"1_paths") tokens paths; *)
  131 + let chart = create_dep_chart tokens lex_sems paths in
  132 + (* ENIAM_LCGlatexOf.print_dep_chart path (id^"1_chart") "a1" chart; *)
  133 + let chart,references = ENIAM_LCGchart.dep_lazify chart in
  134 + (* ENIAM_LCGlatexOf.print_dep_chart path (id^"2_chart") "a4" chart; *)
  135 + (* ENIAM_LCGlatexOf.print_references path (id^"2_references") "a4" references; *)
  136 + let chart = ENIAM_LCGchart.dep_parse chart references 30. Sys.time in (* uwaga: niejawna zmiana imperatywna w references *)
  137 + (* ENIAM_LCGlatexOf.print_chart path (id^"3_chart") "a4" chart; *)
  138 + (* ENIAM_LCGlatexOf.print_references path (id^"3_references") "a4" references; *)
  139 + if ENIAM_LCGchart.is_dep_parsed chart then (
  140 + let term = ENIAM_LCGchart.get_dep_parsed_term chart in
  141 + (* Xlatex.latex_file_out path (id^"4_term") "a4" false (fun file ->
  142 + Printf.fprintf file "\\[%s\\]\n" (ENIAM_LCGlatexOf.linear_term 0 term));
  143 + Xlatex.latex_compile_and_clean path (id^"4_term"); *)
  144 + let dependency_tree = ENIAM_LCGreductions.reduce term references in
  145 + (* ENIAM_LCGlatexOf.print_dependency_tree path (id^"4_dependency_tree") "a0" dependency_tree; *)
  146 + if ENIAM_LCGreductions.is_reduced_dependency_tree dependency_tree then (
  147 + ENIAM_LCGreductions.assign_labels dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
  148 + (* ENIAM_LCGlatexOf.print_dependency_tree path (id^"5_dependency_tree") "a4" dependency_tree; *)
  149 + ENIAM_LCGreductions.remove_cuts dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
  150 + (* ENIAM_LCGlatexOf.print_dependency_tree path (id^"6_dependency_tree") "a4" dependency_tree; *)
  151 + (* ENIAM_LCGgraphOf.print_dependency_tree path (id^"6_dependency_tree") dependency_tree; *)
  152 + (* ENIAM_LCGgraphOf.print_simplified_dependency_tree path (id^"6_simple_dependency_tree") dependency_tree; *)
  153 + ())
  154 + else print_endline "not reduced")
  155 + else print_endline "not parsed"
156 156 with NotDepParsed(id_ndp,left,l,right) -> (
157   - print_endline "not parsed 2";
158   - ENIAM_LCGlatexOf.print_not_parsed_dep_chart path (id^"3_not_parsed_chart") "a2" (id_ndp,left,l,right))
  157 + if (first_try)
  158 + then test_dep_example path id tokens lex_sems false paths
  159 + else (print_endline "not parsed 2";
  160 + ENIAM_LCGlatexOf.print_not_parsed_dep_chart path (id^"3_not_parsed_chart") "a2" (id_ndp,left,l,right)))
159 161  
160 162 let rec parse_sentence name id tokens lex_sems = function
161 163 RawSentence s -> id
... ... @@ -163,7 +165,7 @@ let rec parse_sentence name id tokens lex_sems = function
163 165 (* test_example ("results/" ^ name^"/") (string_of_int id ^ "_") tokens lex_sems paths last; *)
164 166 id + 1
165 167 | DepSentence(paths) ->
166   - test_dep_example ("results/" ^ name ^ "/") (string_of_int id ^ "_") tokens lex_sems paths;
  168 + test_dep_example ("results/" ^ name ^ "/") (string_of_int id ^ "_") tokens lex_sems true paths;
167 169 id + 1
168 170 | QuotedSentences sentences ->
169 171 Xlist.fold sentences id (fun id p ->
... ... @@ -212,8 +214,8 @@ let process_id s =
212 214 else failwith ("process_id: " ^ s)
213 215  
214 216 let process_conll_corpus filename =
215   - let corpus = File.file_in filename (fun file -> CONLL.match_corpus (ENIAM_CONLL.load_corpus file)) in
216   - print_endline "process_conll_corpus";
  217 + let corpus = File.file_in filename (fun file -> CONLL.match_corpus (CONLL.load_corpus file)) in
  218 + print_endline "process_conll_corpus";
217 219 (* let corpus = [List.hd corpus] in *)
218 220 Xlist.iter corpus (fun query -> try
219 221 let id = process_id (get_query_id query) in
... ... @@ -244,5 +246,5 @@ let _ =
244 246 (* LCGfields.reset (); *)
245 247 (* process_conll_corpus "../../NLP resources/skladnica_zaleznosciowa.conll"; *)
246 248 (* process_conll_corpus "../testy/skladnica-test1.conll"; *)
247   - process_conll_corpus "../testy/skladnica-test1-Failure.conll";
  249 + process_conll_corpus "../testy/skladnica-test1-Find_father.conll";
248 250 (* LCGfields.print_results () *)
... ...
testy/skladnica-test1-Not_parsed.conll 0 → 100644
  1 +1 Cmentarz cmentarz subst subst sg|nom|m3 2 subj _ _
  2 +2 jest być fin fin sg|ter|imperf 0 pred _ _
  3 +3 taki taki adj adj sg|nom|m3|pos 4 adjunct _ _
  4 +4 pusty pusty adj adj sg|nom|m3|pos 2 pd _ _
  5 +5 ! ! interp interp _ 2 punct _ _
  6 +
  7 +1 Mówi mówić fin fin sg|ter|imperf 0 pred _ _
  8 +2 się się qub qub _ 1 refl _ _
  9 +3 przecież przecież qub qub _ 1 adjunct _ _
  10 +4 , , interp interp _ 7 punct _ _
  11 +5 że że comp comp _ 7 complm _ _
  12 +6 broń broń subst subst sg|nom|f 7 subj _ _
  13 +7 była być praet praet sg|f|imperf 1 comp_fin _ _
  14 +8 w w prep prep loc|nwok 7 adjunct _ _
  15 +9 szkole szkoła subst subst sg|loc|f 8 comp _ _
  16 +10 schowana schować ppas ppas sg|nom|f|perf|aff 7 pd _ _
  17 +11 jeszcze jeszcze qub qub _ 12 adjunct _ _
  18 +12 latem lato subst subst sg|inst|n 7 adjunct _ _
  19 +13 w w prep prep loc|nwok 12 adjunct _ _
  20 +14 czasie czas subst subst sg|loc|m3 13 mwe _ _
  21 +15 remontu remont subst subst sg|gen|m3 14 comp _ _
  22 +16 . . interp interp _ 1 punct _ _
  23 +
  24 +1 Bo bo comp comp _ 9 adjunct _ _
  25 +2 jak jak adv adv _ 9 adjunct _ _
  26 +3 ona on ppron3 ppron3 sg|nom|f|ter|akc|npraep 9 subj _ _
  27 +4 , , interp interp _ 3 punct _ _
  28 +5 chora chory adj adj sg|nom|f|pos 3 adjunct _ _
  29 +6 na na prep prep acc 5 adjunct _ _
  30 +7 cukrzycę cukrzyca subst subst sg|acc|f 6 comp _ _
  31 +8 , , interp interp _ 3 punct _ _
  32 +9 przeżyła przeżyć praet praet sg|f|perf 0 pred _ _
  33 +10 trzy trzy num num pl|acc|m3|congr 9 obj _ _
  34 +11 dni dzień subst subst pl|acc|m3 10 comp _ _
  35 +12 bez bez prep prep gen|nwok 9 comp _ _
  36 +13 wody woda subst subst sg|gen|f 14 conjunct _ _
  37 +14 i i conj conj _ 12 comp _ _
  38 +15 jedzenia jedzenie subst subst sg|gen|n 14 conjunct _ _
  39 +16 ? ? interp interp _ 9 punct _ _
  40 +
  41 +1 Jednak jednak qub qub _ 9 adjunct _ _
  42 +2 już już qub qub _ 3 adjunct _ _
  43 +3 wkrótce wkrótce adv adv _ 9 adjunct _ _
  44 +4 Nizioł Nizioł subst subst sg|nom|m1 5 conjunct _ _
  45 +5 i i conj conj _ 9 subj _ _
  46 +6 Wapiński Wapiński subst subst sg|nom|m1 5 conjunct _ _
  47 +7 ze z prep prep inst|wok 9 adjunct _ _
  48 +8 zdumieniem zdumienie subst subst sg|inst|n 7 comp _ _
  49 +9 odkryli odkryć praet praet pl|m1|perf 0 pred _ _
  50 +10 , , interp interp _ 14 punct _ _
  51 +11 że że comp comp _ 14 complm _ _
  52 +12 Łapiński Łapiński subst subst sg|nom|m1 14 subj _ _
  53 +13 nie nie qub qub _ 14 neg _ _
  54 +14 dotrzymuje dotrzymywać fin fin sg|ter|imperf 9 comp_fin _ _
  55 +15 wcześniej wcześnie adv adv com 16 adjunct _ _
  56 +16 danego dać ppas ppas sg|gen|n|perf|aff 17 adjunct _ _
  57 +17 słowa słowo subst subst sg|gen|n 14 obj _ _
  58 +18 . . interp interp _ 9 punct _ _
  59 +
  60 +1 A a qub qub _ 8 adjunct _ _
  61 +2 pan pan subst subst sg|nom|m1 8 subj _ _
  62 +3 nigdy nigdy adv adv _ 8 adjunct _ _
  63 +4 się się qub qub _ 8 refl _ _
  64 +5 z z prep prep inst|nwok 8 comp _ _
  65 +6 nimi on ppron3 ppron3 pl|inst|m1|ter|akc|praep 5 comp _ _
  66 +7 nie nie qub qub _ 8 neg _ _
  67 +8 zetknął zetknąć praet praet sg|m1|perf 0 pred _ _
  68 +9 ? ? interp interp _ 8 punct _ _
  69 +
  70 +1 Załapać załapać inf inf perf 3 comp_inf _ _
  71 +2 się się qub qub _ 1 refl _ _
  72 +3 trzeba trzeba pred pred _ 0 pred _ _
  73 +4 teraz teraz adv adv _ 3 adjunct _ _
  74 +5 , , interp interp _ 3 punct _ _
  75 +6 bo bo comp comp _ 3 adjunct _ _
  76 +7 potem potem adv adv _ 8 adjunct _ _
  77 +8 będzie być bedzie bedzie sg|ter|imperf 6 comp_fin _ _
  78 +9 trudniej trudno adv adv com 8 pd _ _
  79 +10 . . interp interp _ 3 punct _ _
  80 +
  81 +1 Medykamenty medykament subst subst pl|nom|m3 4 subj _ _
  82 +2 współczesne współczesny adj adj pl|nom|m3|pos 1 adjunct _ _
  83 +3 dostępne dostępny adj adj pl|nom|m3|pos 4 pd _ _
  84 +4 są być fin fin pl|ter|imperf 0 pred _ _
  85 +5 na na prep prep loc 4 adjunct _ _
  86 +6 czarnym czarny adj adj sg|loc|m3|pos 7 adjunct _ _
  87 +7 rynku rynek subst subst sg|loc|m3 5 comp _ _
  88 +8 . . interp interp _ 4 punct _ _
  89 +
  90 +1 To to subst subst sg|nom|n 3 subj _ _
  91 +2 samo sam adj adj sg|nom|n|pos 1 adjunct _ _
  92 +3 dotyczy dotyczyć fin fin sg|ter|imperf 5 conjunct _ _
  93 +4 leczenia leczenie subst subst sg|gen|n 3 obj_th _ _
  94 +5 , , interp interp _ 0 coord_punct _ _
  95 +6 służba służba subst subst sg|nom|f 9 subj _ _
  96 +7 zdrowia zdrowie subst subst sg|gen|n 6 adjunct _ _
  97 +8 praktycznie praktycznie adv adv pos 9 adjunct _ _
  98 +9 przestała przestać praet praet sg|f|perf 5 conjunct _ _
  99 +10 istnieć istnieć inf inf imperf 9 comp_inf _ _
  100 +11 . . interp interp _ 5 punct _ _
  101 +
  102 +1 Zwykły zwykły adj adj sg|nom|m1|pos 2 adjunct _ _
  103 +2 mieszkaniec mieszkaniec subst subst sg|nom|m1 4 subj _ _
  104 +3 kraju kraj subst subst sg|gen|m3 2 adjunct _ _
  105 +4 ma mieć fin fin sg|ter|imperf 0 pred _ _
  106 +5 się się qub qub _ 6 refl _ _
  107 +6 leczyć leczyć inf inf imperf 4 comp_inf _ _
  108 +7 ziołami ziele subst subst pl|inst|n 6 obj_th _ _
  109 +8 , , interp interp _ 10 punct _ _
  110 +9 które który adj adj pl|acc|n|pos 10 obj _ _
  111 +10 zaleca zalecać fin fin sg|ter|imperf 7 adjunct _ _
  112 +11 tradycyjna tradycyjny adj adj sg|nom|f|pos 12 adjunct _ _
  113 +12 medycyna medycyna subst subst sg|nom|f 10 subj _ _
  114 +13 koreańska koreański adj adj sg|nom|f|pos 12 adjunct _ _
  115 +14 . . interp interp _ 4 punct _ _
... ...