Commit 0b0d4af3a9070341c24d391ddcd082f4cf5e15c9
Merge branch 'dep_trees' into integration
Showing
13 changed files
with
586 additions
and
662 deletions
LCGlexicon/resources/lexicon-pl.dic
@@ -7,7 +7,7 @@ | @@ -7,7 +7,7 @@ | ||
7 | month-lex month-interval year-interval roman roman-interval | 7 | month-lex month-interval year-interval roman roman-interval |
8 | hour-minute-interval hour-interval obj-id match-result | 8 | hour-minute-interval hour-interval obj-id match-result |
9 | url email day-month day year date hour hour-minute | 9 | url email day-month day year date hour hour-minute |
10 | - się nie by s <root> or or2 <colon> <speaker> <speaker-end> <squery> | 10 | + się nie by s <root> <conll_root> or or2 <colon> <speaker> <speaker-end> <squery> |
11 | 11 | ||
12 | @WEIGHTS | 12 | @WEIGHTS |
13 | symbol_weight=1 | 13 | symbol_weight=1 |
@@ -272,6 +272,8 @@ pos=unk: np*number*case*gender*person; | @@ -272,6 +272,8 @@ pos=unk: np*number*case*gender*person; | ||
272 | # [LCGrenderer.make_frame false tokens lex_sems [] schema_list ["<conll_root>"] d batrs] | 272 | # [LCGrenderer.make_frame false tokens lex_sems [] schema_list ["<conll_root>"] d batrs] |
273 | # | lemma,c,l -> failwith ("process_interp: " ^ lemma ^ ":" ^ c ^ ":" ^ (String.concat ":" (Xlist.map l (String.concat ".")))) in | 273 | # | lemma,c,l -> failwith ("process_interp: " ^ lemma ^ ":" ^ c ^ ":" ^ (String.concat ":" (Xlist.map l (String.concat ".")))) in |
274 | 274 | ||
275 | +lemma=<conll_root>,pos=interp: <conll_root>/(ip*T*T*T+cp*int*T+np*sg*voc*T*T+interj); | ||
276 | + | ||
275 | pos=sinterj: BRACKET interj; | 277 | pos=sinterj: BRACKET interj; |
276 | 278 | ||
277 | lemma=</sentence>,pos=interp: BRACKET s\?(ip*T*T*T+cp*int*T+np*sg*voc*T*T+interj); | 279 | lemma=</sentence>,pos=interp: BRACKET s\?(ip*T*T*T+cp*int*T+np*sg*voc*T*T+interj); |
LCGparser/ENIAM_LCGrules.ml
@@ -446,8 +446,8 @@ let backward_cross_composition references args functs = | @@ -446,8 +446,8 @@ let backward_cross_composition references args functs = | ||
446 | let rules = [ | 446 | let rules = [ |
447 | backward_application; | 447 | backward_application; |
448 | forward_application; | 448 | forward_application; |
449 | - backward_cross_composition; | ||
450 | - forward_cross_composition; | 449 | + (* backward_cross_composition; *) |
450 | + (* forward_cross_composition; *) | ||
451 | ] | 451 | ] |
452 | 452 | ||
453 | let rec flatten_functor2 l seml = function | 453 | let rec flatten_functor2 l seml = function |
corpora/CONLL.ml
@@ -3,133 +3,55 @@ open ENIAMsubsyntaxTypes | @@ -3,133 +3,55 @@ open ENIAMsubsyntaxTypes | ||
3 | open ENIAMtokenizerTypes | 3 | open ENIAMtokenizerTypes |
4 | 4 | ||
5 | let alternative_string f mode alts = if List.exists (fun (m,_) -> mode = m) alts | 5 | let alternative_string f mode alts = if List.exists (fun (m,_) -> mode = m) alts |
6 | - then f (snd @@ List.find (fun (m,_) -> m = mode) alts) | ||
7 | - else f (snd @@ List.find (fun (m,_) -> m = Struct) alts) | ||
8 | - | ||
9 | -let string_of_token mode token conll_id super label = | ||
10 | - let decompose_lemma = function | ||
11 | - | Lemma(a,b,c) -> a,b,if c = [[]] | ||
12 | - then "_" | ||
13 | - else String.concat "][" @@ Xlist.map c (fun x -> | ||
14 | - String.concat "|" @@ Xlist.map x ( fun y -> | ||
15 | - String.concat "." y)) | ||
16 | - | t -> failwith ("string_of_token: not Lemma") in | ||
17 | - match mode with | ||
18 | - | Raw -> token.orth | ||
19 | - | Struct -> failwith ("function string_of_token for mode Struct is not defined") | ||
20 | - | CONLL -> let lemma,cat,interp = decompose_lemma token.token in | ||
21 | - String.concat "\t" [string_of_int conll_id; | ||
22 | - token.orth; lemma; cat; cat; interp; "_"; "_"; | ||
23 | - string_of_int token.beg; string_of_int token.len] | ||
24 | - | Mate -> let lemma,cat,interp = decompose_lemma token.token in | ||
25 | - String.concat "\t" [string_of_int conll_id; | ||
26 | - token.orth; lemma; lemma; cat; cat; interp; interp; "_"; "_"; "_"; "_"; "_"; "_"] | ||
27 | - | _ -> failwith "string_of_token: ni" | ||
28 | - | ||
29 | -let string_of_paths mode tokens paths = | ||
30 | - let l = Int.fold 1 (Array.length paths - 1) [] (fun l conll_id -> | ||
31 | - let id,super,label = paths.(conll_id) in | ||
32 | - (string_of_token mode (ExtArray.get tokens id) conll_id super label) :: l) in | ||
33 | - String.concat "\n" (List.rev l) ^ "\n\n" | ||
34 | - | ||
35 | -let rec string_of_sentence mode tokens = function | ||
36 | - RawSentence s -> if mode = Raw then s else "" | ||
37 | - | StructSentence (tokens, _) -> failwith ("string_of_sentence: StructSentence") (*String.concat "\n" @@ Xlist.map tokens (fun x -> string_of_token mode x)*) | ||
38 | - | DepSentence (paths) -> string_of_paths mode tokens paths | ||
39 | - | QuotedSentences _ -> failwith ("string_of_sentence: QuotedSentences") | ||
40 | - | AltSentence alts -> alternative_string (string_of_sentence mode tokens) mode alts | ||
41 | - | ||
42 | -let string_of_p_record mode tokens p_record = | ||
43 | - (if p_record.id = "" then "" else p_record.id ^ "\n") ^ | ||
44 | - string_of_sentence mode tokens p_record.sentence | ||
45 | - | ||
46 | -(*let rec string_of_paragraph mode tokens = function | ||
47 | - RawParagraph s -> if mode = Raw then s else "" | ||
48 | - | StructParagraph p_records -> String.concat "\n\n" @@ Xlist.map p_records (string_of_p_record mode tokens) | ||
49 | - | AltParagraph alts -> alternative_string (string_of_paragraph mode) mode alts | ||
50 | - | ||
51 | -let rec string_of_text mode tokens = function | ||
52 | - RawText s -> if mode = Raw then s else "" | ||
53 | - | StructText (paragraphs,_) -> String.concat "\n\n" @@ Xlist.map paragraphs (string_of_paragraph mode tokens) | ||
54 | - | AltText alts -> alternative_string (string_of_text mode) mode alts*) | ||
55 | - | ||
56 | - | ||
57 | -(******************) | ||
58 | -(*** | ||
59 | -let establish_next tokens paths = | ||
60 | - let n = ExtArray.size tokens in | ||
61 | - Int.iter 1 (n - 2) (fun i -> | ||
62 | - let f = ExtArray.get tokens i in | ||
63 | - let s = ExtArray.get tokens (i+1) in | ||
64 | - ExtArray.set tokens i {f with next = s.beg}); | ||
65 | - let last = ExtArray.get tokens (n-1) in | ||
66 | - ExtArray.set tokens (n-1) {last with next = last.beg + last.len} | ||
67 | - | ||
68 | - | ||
69 | - (*let rec pom res = function | ||
70 | - h :: t -> let next = if res = [] | ||
71 | - then h.beg+h.len | ||
72 | - else (List.hd res).beg in | ||
73 | - pom ({h with next = next} :: res) t | ||
74 | - | [] -> res in | ||
75 | - pom [] rev_tokens*) | ||
76 | - | ||
77 | -let rec establish_for_token i text tokens = function | ||
78 | - (id,_,_) :: t as l-> | ||
79 | - let h = ExtArray.get tokens id in | ||
80 | - if Xstring.check_prefix " " text | ||
81 | - then establish_for_token (i+100) (Xstring.cut_prefix " " text) tokens l | ||
82 | - else if Xstring.check_prefix h.orth text | ||
83 | - then | ||
84 | - let n = (List.length @@ Xunicode.utf8_chars_of_utf8_string h.orth) * 100 in | ||
85 | - let n_h = {h with beg = i ; len = n} in | ||
86 | - ExtArray.set tokens id n_h; | ||
87 | - establish_for_token (i+n) (Xstring.cut_prefix h.orth text) tokens t | ||
88 | - else failwith ("establish_for_token :" ^ h.orth ^ " " ^ text) | ||
89 | - | [] -> 100, i | ||
90 | - | ||
91 | -let rec establish_lengths text paths tokens = | ||
92 | - let pbeg, plen = establish_for_token 100 text tokens (List.tl (Array.to_list paths)) in | ||
93 | - establish_next tokens paths; | ||
94 | - pbeg, plen-100 | ||
95 | - | ||
96 | -(******************) | ||
97 | - | ||
98 | -exception ErrorInfoFile of string | ||
99 | - | ||
100 | -let info_file = "../corpora/info_sentences.txt" | ||
101 | - | ||
102 | -let info = Xstring.split "\n\n" @@ File.load_file_gen info_file | ||
103 | - | ||
104 | -let add_to_map map info_str = | ||
105 | - match Xstring.split "\n" info_str with | ||
106 | - [id; text; info_token] -> StringMap.add map info_token (id, text) | ||
107 | - | _ -> raise (ErrorInfoFile info_str) | ||
108 | - | ||
109 | -let info_map = | ||
110 | - Xlist.fold info StringMap.empty add_to_map | ||
111 | - | ||
112 | -let match_sentence (p_record,tokens) = | ||
113 | - let rec info_token s = match s with | ||
114 | - RawSentence text -> failwith ("match_sentence: " ^ text) | ||
115 | - | StructSentence (tokens, n) -> failwith ("match_sentence: StructSentence") (*String.concat " " @@ List.map (fun x -> x.orth) tokens*) | ||
116 | - | DepSentence (paths) -> String.concat " " @@ List.map (fun (id,_,_) -> (ExtArray.get tokens id).orth) (List.tl (Array.to_list paths)), paths | ||
117 | - | QuotedSentences _ -> failwith ("match_sentence: QuotedSentences") | ||
118 | - | AltSentence alts -> failwith ("match_sentence: AltSentence") | ||
119 | - (*if List.exists (fun (mode, s) -> mode = CONLL) alts | ||
120 | - then info_token (snd (List.find (fun (mode, s) -> mode = CONLL) alts)) | ||
121 | - else failwith ("match_sentence: no CONLL mode in AltSentence")*) in | ||
122 | - let info_token, paths = info_token p_record.psentence in | ||
123 | - try | ||
124 | - let id, text = StringMap.find info_map info_token in | ||
125 | - let beg, len = establish_lengths text paths tokens (* -1, -1, p_record.psentence *) in | ||
126 | - AltText[Raw,RawText text;CONLL,StructText([StructParagraph[{pid = id; pbeg = beg; plen = len; pnext = beg+len; pfile_prefix=""; | ||
127 | - psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence paths]}]],tokens)] | ||
128 | -(* {s_id = id; s_text = text; s_tokens = sentence.s_tokens} *) | ||
129 | - with _ -> AltText[CONLL,StructText([StructParagraph[p_record]],tokens)] | ||
130 | - | ||
131 | -let match_corpus corpus = | ||
132 | - Xlist.map corpus match_sentence***) | 6 | + then f (snd @@ List.find (fun (m,_) -> m = mode) alts) |
7 | + else f (snd @@ List.find (fun (m,_) -> m = Struct) alts) | ||
8 | + | ||
9 | +let string_of_token mode token conll_id super label = | ||
10 | + let decompose_lemma = function | ||
11 | + | Lemma(a,b,c) -> a,b,if c = [[]] | ||
12 | + then "_" | ||
13 | + else String.concat "][" @@ Xlist.map c (fun x -> | ||
14 | + String.concat "|" @@ Xlist.map x ( fun y -> | ||
15 | + String.concat "." y)) | ||
16 | + | t -> failwith ("string_of_token: not Lemma") in | ||
17 | + match mode with | ||
18 | + | Raw -> token.orth | ||
19 | + | Struct -> failwith ("function string_of_token for mode Struct is not defined") | ||
20 | + | CONLL -> let lemma,cat,interp = decompose_lemma token.token in | ||
21 | + String.concat "\t" [string_of_int conll_id; | ||
22 | + token.orth; lemma; cat; cat; interp; "_"; "_"; | ||
23 | + string_of_int token.beg; string_of_int token.len] | ||
24 | + | Mate -> let lemma,cat,interp = decompose_lemma token.token in | ||
25 | + String.concat "\t" [string_of_int conll_id; | ||
26 | + token.orth; lemma; lemma; cat; cat; interp; interp; "_"; "_"; "_"; "_"; "_"; "_"] | ||
27 | + | _ -> failwith "string_of_token: ni" | ||
28 | + | ||
29 | +let string_of_paths mode tokens paths = | ||
30 | + let l = Int.fold 1 (Array.length paths - 1) [] (fun l conll_id -> | ||
31 | + let id,super,label = paths.(conll_id) in | ||
32 | + (string_of_token mode (ExtArray.get tokens id) conll_id super label) :: l) in | ||
33 | + String.concat "\n" (List.rev l) ^ "\n\n" | ||
34 | + | ||
35 | +let rec string_of_sentence mode tokens = function | ||
36 | + RawSentence s -> if mode = Raw then s else "" | ||
37 | + | StructSentence (tokens, _) -> failwith ("string_of_sentence: StructSentence") (*String.concat "\n" @@ Xlist.map tokens (fun x -> string_of_token mode x)*) | ||
38 | + | DepSentence (paths) -> string_of_paths mode tokens paths | ||
39 | + | QuotedSentences _ -> failwith ("string_of_sentence: QuotedSentences") | ||
40 | + | AltSentence alts -> alternative_string (string_of_sentence mode tokens) mode alts | ||
41 | + | ||
42 | +let string_of_p_record mode tokens p_record = | ||
43 | + (if p_record.id = "" then "" else p_record.id ^ "\n") ^ | ||
44 | + string_of_sentence mode tokens p_record.sentence | ||
45 | + | ||
46 | +(*let rec string_of_paragraph mode tokens = function | ||
47 | + RawParagraph s -> if mode = Raw then s else "" | ||
48 | + | StructParagraph p_records -> String.concat "\n\n" @@ Xlist.map p_records (string_of_p_record mode tokens) | ||
49 | + | AltParagraph alts -> alternative_string (string_of_paragraph mode) mode alts | ||
50 | + | ||
51 | +let rec string_of_text mode tokens = function | ||
52 | + RawText s -> if mode = Raw then s else "" | ||
53 | + | StructText (paragraphs,_) -> String.concat "\n\n" @@ Xlist.map paragraphs (string_of_paragraph mode tokens) | ||
54 | + | AltText alts -> alternative_string (string_of_text mode) mode alts*) | ||
133 | 55 | ||
134 | (******************) | 56 | (******************) |
135 | 57 | ||
@@ -207,15 +129,6 @@ let establish_next tokens paths = | @@ -207,15 +129,6 @@ let establish_next tokens paths = | ||
207 | let last = ExtArray.get tokens (n-1) in | 129 | let last = ExtArray.get tokens (n-1) in |
208 | ExtArray.set tokens (n-1) {last with next = last.beg + last.len} | 130 | ExtArray.set tokens (n-1) {last with next = last.beg + last.len} |
209 | 131 | ||
210 | - | ||
211 | - (*let rec pom res = function | ||
212 | - h :: t -> let next = if res = [] | ||
213 | - then h.beg+h.len | ||
214 | - else (List.hd res).beg in | ||
215 | - pom ({h with next = next} :: res) t | ||
216 | - | [] -> res in | ||
217 | - pom [] rev_tokens*) | ||
218 | - | ||
219 | let rec establish_for_token i text tokens = function | 132 | let rec establish_for_token i text tokens = function |
220 | (id,_,_) :: t as l-> | 133 | (id,_,_) :: t as l-> |
221 | let h = ExtArray.get tokens id in | 134 | let h = ExtArray.get tokens id in |
@@ -245,15 +158,15 @@ exception ErrorInfoFile of string | @@ -245,15 +158,15 @@ exception ErrorInfoFile of string | ||
245 | 158 | ||
246 | let info_file = "../corpora/info_sentences2.txt" | 159 | let info_file = "../corpora/info_sentences2.txt" |
247 | 160 | ||
248 | -let info = Xstring.split "\n\n" @@ File.load_file_gen info_file | 161 | +let info () = Xstring.split "\n\n" @@ File.load_file_gen info_file |
249 | 162 | ||
250 | let add_to_map map info_str = | 163 | let add_to_map map info_str = |
251 | match Xstring.split "\n" info_str with | 164 | match Xstring.split "\n" info_str with |
252 | [id; text; info_token] -> StringMap.add map info_token (id, text) | 165 | [id; text; info_token] -> StringMap.add map info_token (id, text) |
253 | | _ -> raise (ErrorInfoFile info_str) | 166 | | _ -> raise (ErrorInfoFile info_str) |
254 | 167 | ||
255 | -let info_map = | ||
256 | - Xlist.fold (List.tl info) StringMap.empty add_to_map | 168 | +let info_map () = |
169 | + Xlist.fold (List.tl (info ())) StringMap.empty add_to_map | ||
257 | 170 | ||
258 | let match_sentence (p_record,tokens) = | 171 | let match_sentence (p_record,tokens) = |
259 | let rec info_token s = match s with | 172 | let rec info_token s = match s with |
@@ -268,7 +181,7 @@ let match_sentence (p_record,tokens) = | @@ -268,7 +181,7 @@ let match_sentence (p_record,tokens) = | ||
268 | let info_token, paths = info_token p_record.sentence in | 181 | let info_token, paths = info_token p_record.sentence in |
269 | (* try *) | 182 | (* try *) |
270 | let id, text = try | 183 | let id, text = try |
271 | - StringMap.find info_map info_token | 184 | + StringMap.find (info_map ()) info_token |
272 | with | 185 | with |
273 | | _ -> p_record.id, get_text tokens in | 186 | | _ -> p_record.id, get_text tokens in |
274 | let beg, len = establish_lengths text paths tokens (* -1, -1, p_record.psentence *) in | 187 | let beg, len = establish_lengths text paths tokens (* -1, -1, p_record.psentence *) in |
@@ -282,7 +195,7 @@ let match_corpus corpus = | @@ -282,7 +195,7 @@ let match_corpus corpus = | ||
282 | [] -> [] | 195 | [] -> [] |
283 | | a::l -> try | 196 | | a::l -> try |
284 | let r = f a in r :: pom f l | 197 | let r = f a in r :: pom f l |
285 | - with e -> (*print_endline (Printexc.to_string e);*) pom f l in | 198 | + with e -> pom f l in |
286 | pom match_sentence corpus | 199 | pom match_sentence corpus |
287 | 200 | ||
288 | (******************) | 201 | (******************) |
@@ -304,7 +217,6 @@ let load_token in_channel = | @@ -304,7 +217,6 @@ let load_token in_channel = | ||
304 | else [Xlist.map (Xstring.split_delim "|" interp) (fun tag -> [tag])] in | 217 | else [Xlist.map (Xstring.split_delim "|" interp) (fun tag -> [tag])] in |
305 | {empty_token_env with orth = orth; token = Lemma(lemma,cat,interp);}, int_of_string id, int_of_super super, label in | 218 | {empty_token_env with orth = orth; token = Lemma(lemma,cat,interp);}, int_of_string id, int_of_super super, label in |
306 | let line = input_line in_channel in | 219 | let line = input_line in_channel in |
307 | - (* print_endline ("load_token: " ^ line); *) | ||
308 | if line = "" | 220 | if line = "" |
309 | then raise Empty_line | 221 | then raise Empty_line |
310 | else if line.[0] = '#' | 222 | else if line.[0] = '#' |
@@ -329,30 +241,19 @@ let load_token in_channel = | @@ -329,30 +241,19 @@ let load_token in_channel = | ||
329 | let label = Xstring.cut_sufix "_" label_err in | 241 | let label = Xstring.cut_sufix "_" label_err in |
330 | n_token id orth lemma cat interp super label) | 242 | n_token id orth lemma cat interp super label) |
331 | | _ -> failwith ("load_token: " ^ line) | 243 | | _ -> failwith ("load_token: " ^ line) |
332 | -(* {c_id = List.nth pom 1; | ||
333 | - c_lemma = List.nth pom 2; | ||
334 | - c_cat = List.nth pom 3; | ||
335 | - c_interp = (let interp = List.nth pom 5 in | ||
336 | - if interp = "_" | ||
337 | - then [] | ||
338 | - else Str.split (Str.regexp "|") interp); | ||
339 | - c_super = -1; c_label = ""; c_beg = -1; c_len = -1} *) | ||
340 | 244 | ||
341 | let load_sentence in_channel = | 245 | let load_sentence in_channel = |
342 | let tokens = ExtArray.make 100 empty_token_env in | 246 | let tokens = ExtArray.make 100 empty_token_env in |
343 | let _ = ExtArray.add tokens {empty_token_env with token = Interp "<conll_root>"} in | 247 | let _ = ExtArray.add tokens {empty_token_env with token = Interp "<conll_root>"} in |
344 | let rec pom rev_paths id = | 248 | let rec pom rev_paths id = |
345 | - (* print_endline "pom 1"; *) | ||
346 | try | 249 | try |
347 | - (* print_endline "pom 2"; *) | ||
348 | let token, conll_id, super, label = load_token in_channel in | 250 | let token, conll_id, super, label = load_token in_channel in |
349 | let id_a = ExtArray.add tokens token in | 251 | let id_a = ExtArray.add tokens token in |
350 | if id_a <> conll_id then failwith "load_sentence: different ids" else | 252 | if id_a <> conll_id then failwith "load_sentence: different ids" else |
351 | - (* print_endline "pom 3"; *) | ||
352 | pom ((id_a,super,label) :: rev_paths) id | 253 | pom ((id_a,super,label) :: rev_paths) id |
353 | - with Id_line new_id -> (*print_endline "pom 4";*)pom rev_paths new_id | ||
354 | - | Empty_line -> (*print_endline "pom 5";*)rev_paths, id | ||
355 | - | End_of_file -> (*print_endline "pom 6";*)if rev_paths = [] | 254 | + with Id_line new_id -> pom rev_paths new_id |
255 | + | Empty_line -> rev_paths, id | ||
256 | + | End_of_file -> if rev_paths = [] | ||
356 | then raise End_of_file | 257 | then raise End_of_file |
357 | else rev_paths, id in | 258 | else rev_paths, id in |
358 | let rev_paths, id = pom [] "" in | 259 | let rev_paths, id = pom [] "" in |
@@ -366,4 +267,4 @@ let load_corpus in_channel = | @@ -366,4 +267,4 @@ let load_corpus in_channel = | ||
366 | pom ((conll_sentence, tokens) :: res) | 267 | pom ((conll_sentence, tokens) :: res) |
367 | with End_of_file -> res | 268 | with End_of_file -> res |
368 | | e -> prerr_endline (Printexc.to_string e); res in | 269 | | e -> prerr_endline (Printexc.to_string e); res in |
369 | - (* match_corpus @@ *) List.rev @@ pom [] | 270 | + List.rev @@ pom [] |
corpora/CONLL_adapter.ml
1 | +open Xstd | ||
2 | +open ENIAMsubsyntaxTypes | ||
3 | +open ENIAMtokenizerTypes | ||
1 | 4 | ||
2 | -let convert_dep_tree id first_try paths tokens lex_sems = | ||
3 | - let do_if cond f paths = if cond then f paths tokens else paths in | 5 | +let if_lemma lemmas = function |
6 | + Lemma(l,_,_) -> List.exists (fun x -> x = l) lemmas | ||
7 | + | _ -> false | ||
8 | + | ||
9 | +let if_cat cats = function | ||
10 | + Lemma(_,cat,_) -> List.exists (fun x -> x = cat) cats | ||
11 | + | _ -> false | ||
12 | + | ||
13 | +let if_interps interps token = | ||
14 | + let interp = match token with | ||
15 | + Lemma(_,_,i) -> i | ||
16 | + | _ -> [[[]]] in | ||
17 | + let if_interp nr value = | ||
18 | + List.exists (fun x -> | ||
19 | + try | ||
20 | + List.exists (fun y -> | ||
21 | + y = value) (List.nth x nr) | ||
22 | + with _ -> false | ||
23 | + ) interp in | ||
24 | + Xlist.fold interps true (fun acc (nr,value) -> acc && (if_interp nr value)) | ||
25 | + | ||
26 | +let correct_coordination1 paths tokens = | ||
27 | + let paths_ls = List.mapi (fun i (id,super,label) -> | ||
28 | + (i,id,super,label)) (Array.to_list paths) in | ||
29 | + | ||
30 | + let l = [("subst:nom",0),(["fin";"praet"],0); | ||
31 | + ("subst:acc",0),(["inf"],0); | ||
32 | + ("ppron3:nom",0),(["fin";"praet"],0); | ||
33 | + ("ppron3:acc",0),(["fin";"praet"],0); | ||
34 | + ("adv",0),(["fin";"praet"],0); | ||
35 | + ("adv",0),(["inf"],0); | ||
36 | + ("adv",0),(["adj"],0); | ||
37 | + ("prep",0),(["fin";"praet"],0); | ||
38 | + ("prep",0),(["inf"],0); | ||
39 | + ("prep",0),(["ppas"],0); | ||
40 | + ("prep",0),(["subst"],0); | ||
41 | + ("prep:gen",0),(["subst:gen"],0); | ||
42 | + ("adj:nom",0),(["fin";"praet"],0); | ||
43 | + ("adj:nom",0),(["subst:nom"],0); | ||
44 | + ("adj:gen",0),(["subst:gen"],0); | ||
45 | + ("adj:dat",0),(["subst:dat"],0); | ||
46 | + ("adj:acc",0),(["subst:acc"],0); | ||
47 | + ("adj:inst",0),(["subst:inst"],0); | ||
48 | + ("adj:loc",0),(["subst:loc"],0); | ||
49 | + ("subst:gen",0),(["subst:nom"],0); | ||
50 | + (* ("subst:gen",0),(["subst:gen"],0); *) | ||
51 | + ("subst:gen",0),(["subst:dat"],0); | ||
52 | + ("subst:gen",0),(["subst:acc"],0); | ||
53 | + ("subst:gen",0),(["subst:inst"],0); | ||
54 | + ("subst:gen",0),(["subst:loc"],0); | ||
55 | + ("ppron3:gen",0),(["subst:nom"],0); | ||
56 | + ("ppron3:gen",0),(["subst:dat"],0); | ||
57 | + ("ppron3:gen",0),(["subst:acc"],0); | ||
58 | + ("ppron3:gen",0),(["subst:inst"],0); | ||
59 | + ("ppron3:gen",0),(["subst:loc"],0); | ||
60 | + ("qub",0),(["fin";"praet"],0); | ||
61 | + ("qub",0),(["subst"],0); | ||
62 | + ("qub",0),(["adj"],0); | ||
63 | + ("pact",0),(["subst"],0); | ||
64 | + ("ppas",0),(["subst"],0) | ||
65 | + ] in | ||
66 | + | ||
67 | + let find_dependents sons = | ||
68 | + | ||
69 | + let is (i,id,super,label) pattern = match Xstring.split ":" pattern with | ||
70 | + ["prep";case] -> if_cat ["prep"] (ExtArray.get tokens id).token && | ||
71 | + if_interps [0,case] (ExtArray.get tokens id).token | ||
72 | + | [cat;case] -> if_cat [cat] (ExtArray.get tokens id).token && | ||
73 | + if_interps [1,case] (ExtArray.get tokens id).token | ||
74 | + | [cat] -> if_cat [cat] (ExtArray.get tokens id).token | ||
75 | + | _ -> failwith "is (in correct_coordination1)" in | ||
76 | + | ||
77 | + let incr_representative acc son = Xlist.map acc (fun ((one,a),(rest,b)) -> | ||
78 | + if is son one | ||
79 | + then (one,a + 1), (rest,b) | ||
80 | + else if List.exists (is son) rest | ||
81 | + then (one,a), (rest,b + 1) | ||
82 | + else (one,a), (rest,b)) in | ||
83 | + | ||
84 | + let get_from sons pattern = List.find (fun x -> is x pattern) sons in | ||
85 | + | ||
86 | + let l = Xlist.fold sons l incr_representative in | ||
87 | + let results = List.filter (fun ((_,a),(_,b)) -> a = 1 && b > 1) l in | ||
88 | + Xlist.map results (fun result -> | ||
89 | + get_from sons @@ fst @@ fst result, | ||
90 | + List.filter (fun son -> | ||
91 | + List.exists (fun one -> is son one) (fst (snd result))) sons) in | ||
92 | + | ||
93 | + let establish_neighbour super ((i_d,id_d,super_d,label_d),sons) = | ||
94 | + let not_between (i_s,_,_,_) = | ||
95 | + (super < i_d && super < i_s) || | ||
96 | + (super > i_d && super > i_s) in | ||
97 | + let (i_n,id_n,super_n,label_n) = List.find (fun son -> | ||
98 | + not_between son) sons in | ||
99 | + paths.(i_d) <- (id_d, i_n, label_d) in | ||
100 | + | ||
101 | + let examine_coords (i,id,super,label) sons = | ||
102 | + try | ||
103 | + let dependents = find_dependents sons in | ||
104 | + Xlist.iter dependents (establish_neighbour super) | ||
105 | + with | ||
106 | + | _ -> () in | ||
107 | + | ||
108 | + Array.iteri (fun i (id,super,label) -> | ||
109 | + if if_cat ["conj"] (ExtArray.get tokens id).token | ||
110 | + then (let sons = List.filter (fun (_,_,super,_) -> super = i) paths_ls in | ||
111 | + if (List.length sons > 2) | ||
112 | + then examine_coords (i,id,super,label) sons)) paths; | ||
113 | + paths | ||
114 | + | ||
115 | +let correct_coordination2 paths tokens = | ||
116 | + let paths_c = Array.copy paths in | ||
117 | + let paths_ls () = List.mapi (fun i (id,super,label) -> | ||
118 | + (i,id,super,label)) (Array.to_list paths_c) in | ||
119 | + | ||
120 | + (* let ps a sons = | ||
121 | + print_endline a; | ||
122 | + List.iter (fun (i,_,_,_) -> print_endline (ExtArray.get tokens i).orth) sons; | ||
123 | + print_endline "" in *) | ||
124 | + | ||
125 | + let rec correct_rec (i,id,super,label) sons = | ||
126 | + let left_s, right_s = List.partition (fun (a,b,c,d) -> a < i) sons in | ||
127 | + (* ps "left:" (List.rev left_s); | ||
128 | + ps "right:" right_s; *) | ||
129 | + find_father i (List.rev left_s); | ||
130 | + find_father i right_s | ||
131 | + | ||
132 | + and find_father i0 = function | ||
133 | + [(i,id,super,label)] -> paths_c.(i) <- (id,i0,label) | ||
134 | + | (a,b,c,d) :: (i,id,super,label) :: t -> | ||
135 | + paths_c.(i) <- (id,i0,label); | ||
136 | + if not (if_cat ["conj"] (ExtArray.get tokens i).token || | ||
137 | + (ExtArray.get tokens i).orth = ",") | ||
138 | + then failwith "find_father"; | ||
139 | + correct_rec (i,id,super,label) (if a < i | ||
140 | + then (a,b,c,d) :: t | ||
141 | + else List.rev @@ (a,b,c,d) :: t) | ||
142 | + | _ -> failwith "find_father" in | ||
143 | + | ||
144 | + let check_previous_for_interp i = | ||
145 | + if i >= 0 && (ExtArray.get tokens i).orth = "," && | ||
146 | + not (List.exists (fun (_,super,_) -> super = i) (Array.to_list paths_c)) | ||
147 | + then paths_c.(i) <- (0,-1,"") in | ||
148 | + | ||
149 | + Array.iteri (fun i (id,super,label) -> | ||
150 | + if if_cat ["conj"] (ExtArray.get tokens i).token || | ||
151 | + (ExtArray.get tokens i).orth = "," | ||
152 | + then | ||
153 | + (check_previous_for_interp (i-1); | ||
154 | + let sons = List.filter (fun (_,_,super,_) -> super = i) (paths_ls ()) in | ||
155 | + if (List.length sons > 2) | ||
156 | + then correct_rec (i,id,super,label) sons)) paths_c; | ||
157 | + paths_c | ||
158 | + | ||
159 | +let praet_qub_aglt paths tokens = | ||
160 | + Array.iteri (fun i (id,super,label) -> | ||
161 | + if super >= 0 then | ||
162 | + (let id_s, super_s, label_s = paths.(super) in | ||
163 | + if if_cat ["aglt"] (ExtArray.get tokens id).token && | ||
164 | + (ExtArray.get tokens id_s).orth = "by" | ||
165 | + then let id_gf,super_gf,label_gf = paths.(super_s) in | ||
166 | + if if_cat ["praet"] (ExtArray.get tokens id_gf).token | ||
167 | + then paths.(i) <- (id,super_s,label))) paths; | ||
168 | + paths | ||
169 | + | ||
170 | +let replace_tokens paths tokens = | ||
171 | +(* for i = 0 to ExtArray.size tokens - 1 do | ||
172 | + print_endline (string_of_int i ^ ": "^ (ExtArray.get tokens i).orth) | ||
173 | +done; *) | ||
174 | + let find_token orth = Int.fold 0 (ExtArray.size tokens - 1) 0 (fun acc i -> | ||
175 | + if (ExtArray.get tokens i).orth = orth then i else acc) in | ||
176 | + | ||
177 | + let multidot i id0 super0 label0 = | ||
178 | + let id1, super1, label1 = paths.(super0) in | ||
179 | + if super1 >= 0 then | ||
180 | + let id2, super2, label2 = paths.(super1) in | ||
181 | + if (ExtArray.get tokens id1).orth = "." && | ||
182 | + (ExtArray.get tokens id2).orth = "." | ||
183 | + then | ||
184 | + (paths.(super1) <- (find_token "..." ,super2, label2); | ||
185 | + paths.(super0) <- (0,-1,""); | ||
186 | + paths.(i) <- (0,-1,"")) in | ||
187 | + | ||
188 | + let brev i id super label = | ||
189 | + let if_the_last_dot () = | ||
190 | + let (id_dot, s_dot, l_dot) = List.find (fun (i2,s,l) -> | ||
191 | + s = i && ((ExtArray.get tokens i2).orth = "." || (ExtArray.get tokens i2).orth = "...")) (Array.to_list paths) in | ||
192 | + Array.fold_left (fun acc (i2,s,l) -> | ||
193 | + acc && (ExtArray.get tokens i2).beg <= (ExtArray.get tokens id_dot).beg) true paths in | ||
194 | + | ||
195 | + let dot = if if_interps [0,"npun"] (ExtArray.get tokens id).token || if_the_last_dot () | ||
196 | + then "" | ||
197 | + else "." in | ||
198 | + | ||
199 | + let n_orth = (ExtArray.get tokens id).orth ^ dot in | ||
200 | + paths.(i) <- (find_token n_orth,super,label) in | ||
201 | + | ||
202 | + Array.iteri (fun i (id,super,label) -> | ||
203 | + if (ExtArray.get tokens id).orth = "." | ||
204 | + then multidot i id super label; | ||
205 | + if if_cat ["brev"] (ExtArray.get tokens id).token | ||
206 | + then brev i id super label) | ||
207 | + paths; | ||
208 | + paths | ||
209 | + | ||
210 | +let replace_hyphens paths tokens = | ||
211 | + let ref_paths = ref paths in | ||
212 | + let find_token token = Int.fold 0 (ExtArray.size tokens - 1) 0 (fun acc i -> | ||
213 | + if (ExtArray.get tokens i).token = token then i else acc) in | ||
214 | + let find_specific_token token beg next = Int.fold 0 (ExtArray.size tokens - 1) 0 (fun acc i -> | ||
215 | + if (ExtArray.get tokens i).token = token && | ||
216 | + beg <= (ExtArray.get tokens i).beg && | ||
217 | + (ExtArray.get tokens i).next <= next | ||
218 | + then i else acc) in | ||
219 | + | ||
220 | + let correct_last sons_of_zero = (* TODO: synowie zamiast syna *) | ||
221 | + let i1,s1,l1 = !ref_paths.(Array.length !ref_paths - 1) in | ||
222 | + if (ExtArray.get tokens i1).orth = "." | ||
223 | + then | ||
224 | + !ref_paths.(Array.length !ref_paths - 1) <- (find_token (Interp "</sentence>"),1,l1) | ||
225 | + else | ||
226 | + (ref_paths := Array.append !ref_paths [| (find_token (Interp "</sentence>"),1,"-") |]; | ||
227 | + !ref_paths.(Array.length !ref_paths - 2) <- (i1,Array.length !ref_paths - 1,l1)); | ||
228 | + Xlist.iter sons_of_zero (fun son_of_zero -> | ||
229 | + let i2,s2,l2 = !ref_paths.(son_of_zero) in | ||
230 | + !ref_paths.(son_of_zero) <- (i2,Array.length !ref_paths - 1,l2)) in | ||
231 | + | ||
232 | + let one_hyphen sons_of_zero = | ||
233 | + let i2,s2,l2 = !ref_paths.(1) in | ||
234 | + Xlist.iter sons_of_zero (fun son_of_zero -> | ||
235 | + let i1,s1,l1 = !ref_paths.(son_of_zero) in | ||
236 | + !ref_paths.(son_of_zero) <- (i1,1,l1)); | ||
237 | + !ref_paths.(1) <- (find_token (Interp "<or-sentence>"),0,l2); | ||
238 | + correct_last sons_of_zero in | ||
239 | + | ||
240 | + let two_hyphens first second son parent = | ||
241 | + let i1,s1,l1 = !ref_paths.(first) in | ||
242 | + let i2,s2,l2 = !ref_paths.(second) in | ||
243 | + let beg, next = (ExtArray.get tokens i2).beg, (ExtArray.get tokens i2).next in | ||
244 | + let i3,s3,l3 = !ref_paths.(son) in | ||
245 | + let i4,s4,l4 = !ref_paths.(parent) in | ||
246 | + ref_paths := Array.append !ref_paths [| (find_token (Interp "</sentence>"),first,"-") |]; | ||
247 | + !ref_paths.(first) <- (find_token (Interp "<or-sentence>"),0,l1); | ||
248 | + !ref_paths.(second) <- (find_specific_token (Interp "</or-sentence>") beg next,first,l2); | ||
249 | + !ref_paths.(son) <- (i3,second,l3); | ||
250 | + !ref_paths.(parent) <- (i4,first,l4) in | ||
251 | + | ||
252 | + let rec is_dep_correct a b out zero res i (id,super,label) = (* out = how many words in (a,b) have parent outside [a,b]*) | ||
253 | + (* print_endline ((string_of_int a) ^ " " ^ (string_of_int b) ^ " " ^ (string_of_int out) ^ " " ^ (string_of_int zero) ^ " " ^ (string_of_int i)); *) | ||
254 | + if out > 1 || zero > 1 || (* zero = how many words (not interps) have parent 0 *) | ||
255 | + (a < i && i < b && super < a && label <> "interp") || | ||
256 | + (a < super && super < b && (i < a || b < i)) | ||
257 | + then false, res | ||
258 | + else | ||
259 | + if i+1 = Array.length !ref_paths | ||
260 | + then out = 1 && zero = 1, res | ||
261 | + else | ||
262 | + if a < i && i < b && b < super | ||
263 | + then is_dep_correct a b (out+1) zero (i,super) (i+1) !ref_paths.(i+1) | ||
264 | + else | ||
265 | + if super = 0 && not (if_cat ["interp"] (ExtArray.get tokens id).token) | ||
266 | + then is_dep_correct a b out (zero+1) res (i+1) !ref_paths.(i+1) | ||
267 | + else is_dep_correct a b out zero res (i+1) !ref_paths.(i+1) in | ||
268 | + | ||
269 | + let hyphens = snd @@ Array.fold_left (fun (i,acc) (id,super,label) -> | ||
270 | + if (ExtArray.get tokens id).orth = "-" | ||
271 | + then i+1, i :: acc | ||
272 | + else i+1, acc) (0,[]) !ref_paths in | ||
273 | + | ||
274 | + let sons_of_zero = snd @@ Array.fold_left (fun (i,acc) (id,super,label) -> | ||
275 | + if super = 0 && not (if_cat ["interp"] (ExtArray.get tokens id).token) | ||
276 | + then i+1, i :: acc | ||
277 | + else i+1, acc) (0,[]) !ref_paths in | ||
278 | + | ||
279 | + (if List.length sons_of_zero = 1 | ||
280 | + then | ||
281 | + if List.length hyphens = 1 && hyphens = [1] | ||
282 | + then one_hyphen sons_of_zero | ||
283 | + else | ||
284 | + if List.length hyphens = 2 | ||
285 | + then let a, b = List.nth hyphens 1, List.nth hyphens 0 in | ||
286 | + let is_good, (son,parent) = is_dep_correct a b 0 0 (0,0) 1 !ref_paths.(1) in | ||
287 | + if a = 1 && is_good | ||
288 | + then two_hyphens a b son parent); | ||
289 | + !ref_paths | ||
290 | + | ||
291 | +let correct_interp_with_father_0 paths tokens = | ||
292 | + Array.iteri (fun i (id,super,label) -> | ||
293 | + if (super = 0 || | ||
294 | + (ExtArray.get tokens id).token = Interp "<or-sentence>" || | ||
295 | + (ExtArray.get tokens id).token = Interp "</or-sentence>") && (ExtArray.get tokens id).orth = "," | ||
296 | + then Array.iteri (fun i1 (id1,super1,label1) -> | ||
297 | + if super1 = i | ||
298 | + then paths.(i1) <- (id1,0,label1)) paths) paths; | ||
299 | + paths | ||
300 | + | ||
301 | +let remove_interps interp paths tokens = | ||
302 | + let paths_ls = Array.to_list paths in | ||
303 | + Array.iteri (fun i (id,super,label) -> | ||
304 | + if (ExtArray.get tokens id).orth = interp && | ||
305 | + not (List.exists (fun (_,super,_) -> super = i) paths_ls) | ||
306 | + then paths.(i) <- (0,-1,"")) paths; | ||
307 | + paths | ||
308 | + | ||
309 | +let correct_passive_voice paths tokens = | ||
310 | + Array.iteri (fun i (id,super,label) -> | ||
311 | + if super >= 0 then | ||
312 | + (let id_s, super_s, label_s = paths.(super) in | ||
313 | + if (if_cat ["praet"] (ExtArray.get tokens id).token && | ||
314 | + if_cat ["ppas"] (ExtArray.get tokens id_s).token) | ||
315 | + then (paths.(i) <- (id,super_s,label); | ||
316 | + paths.(super) <- (id_s,i,label_s); | ||
317 | + Array.iteri (fun i_p (id_p,super_p,label_p) -> | ||
318 | + if super_p = super | ||
319 | + then paths.(i_p) <- (id_p,i,label_p)) paths))) paths; | ||
320 | + paths | ||
321 | + | ||
322 | +let swap_dep paths tokens = | ||
323 | + let change_dep i (id,super,label) = | ||
324 | + let id_S, super_S, label_S = paths.(super) in | ||
325 | + paths.(i) <- (id,super_S,label); | ||
326 | + paths.(super) <- (id_S, id, label_S) in | ||
327 | + let rec correct_dep i (id,super,label) = | ||
328 | + let adv_relators = ["kto";"co";"ile";"czyj";"jaki";"który"; | ||
329 | + "jak";"skąd";"dokąd";"gdzie";"którędy";"kiedy";"odkąd";"dlaczego";"czemu";"gdy"] in | ||
330 | + if (if_cat ["comp"] (ExtArray.get tokens id).token && | ||
331 | + if_cat ["fin"; "praet"; "winien"; "pred"; "imps"; "ppas"] (ExtArray.get tokens super).token) || | ||
332 | + (if_cat ["conj"] (ExtArray.get tokens id).token && | ||
333 | + if_cat ["fin"; "praet"; "winien"; "pred"; "imps"; "ppas"] (ExtArray.get tokens super).token && | ||
334 | + not (List.exists (fun (_,super,_) -> super = i) (Array.to_list paths))) || | ||
335 | + (if_cat ["ppron3"] (ExtArray.get tokens id).token && | ||
336 | + if_interps [5,"praep"] (ExtArray.get tokens id).token) || | ||
337 | + (if_lemma adv_relators (ExtArray.get tokens id).token && | ||
338 | + if_cat ["fin"; "praet"; "winien"; "pred"; "imps"; "ppas"; "subst"] (ExtArray.get tokens super).token) | ||
339 | + then | ||
340 | + change_dep i (id,super,label); | ||
341 | + if (if_lemma adv_relators (ExtArray.get tokens id).token && | ||
342 | + if_cat ["subst"; "pred"] (ExtArray.get tokens super).token) | ||
343 | + then correct_dep i paths.(i) in | ||
344 | + Array.iteri correct_dep paths; paths | ||
345 | + | ||
346 | + (* | ||
347 | + correct_coordination1 -> sąsiad słowem najbliższym po prawej, jeśli pomiędzy nim a mną spójnik, to najbliższym po lewej | ||
348 | + nieobsługiwana na razie koordynacja strony biernej - zarówno czasowniki posiłkowe, jak i imiesłowy | ||
349 | + nieobsługiwana na razie koordynacja podrzędników spójników podrzędnych *) | ||
350 | + | ||
351 | +let convert_dep_tree id first_try paths tokens = | ||
4 | let paths = Array.copy paths in | 352 | let paths = Array.copy paths in |
5 | - let paths = do_if first_try TreeChange.replace_tokens paths in | ||
6 | - let paths = do_if first_try (TreeChange.remove_interps ".") paths in | ||
7 | - let paths = do_if first_try TreeChange.replace_hyphens paths in | ||
8 | - let paths = do_if first_try TreeChange.correct_coordination1 paths in | ||
9 | - let paths = do_if first_try TreeChange.correct_interp_with_father_0 paths in | ||
10 | - let paths = do_if first_try TreeChange.correct_coordination2 paths in | ||
11 | - let paths = do_if first_try (TreeChange.remove_interps ",") paths in | ||
12 | - let paths = do_if first_try TreeChange.correct_passive_voice paths in | ||
13 | - let paths = do_if first_try TreeChange.praet_qub_aglt paths in | ||
14 | - let paths = do_if (not first_try) TreeChange.swap_dep paths in | ||
15 | - File.file_out ("results/" ^ id ^ "/pre_text_modified_" ^ (string_of_bool first_try) ^ ".html") (fun file -> | ||
16 | - fprintf file "%s\n" Visualization.html_header; | ||
17 | - fprintf file "%s\n" (Visualization.html_of_dep_sentence tokens paths); | ||
18 | - fprintf file "%s\n" Visualization.html_trailer); | ||
19 | - (* let paths = do_if first_try TreeChange.replace_tokens paths in | ||
20 | - let paths = do_if first_try TreeChange.replace_hyphens paths in | ||
21 | - let paths = do_if first_try (TreeChange.remove_interps ".") paths in | ||
22 | - let paths = do_if (not first_try) TreeChange.swap_dep paths in | ||
23 | - let paths = do_if first_try TreeChange.correct_coordination1 paths in | ||
24 | - let paths = try | ||
25 | - do_if first_try TreeChange.correct_coordination2 paths | ||
26 | - with | ||
27 | - | _ -> ( | ||
28 | - File.file_out ("results/" ^ id ^ "/pre_text_modified_" ^ (string_of_bool first_try) ^ ".html") (fun file -> | ||
29 | - fprintf file "%s\n" Visualization.html_header; | ||
30 | - fprintf file "%s\n" (Visualization.html_of_dep_sentence tokens paths); | ||
31 | - fprintf file "%s\n" Visualization.html_trailer); | ||
32 | - do_if first_try TreeChange.correct_interp_with_father_0 paths; | ||
33 | - do_if first_try (TreeChange.remove_interps ",") paths; | ||
34 | - File.file_out ("results/" ^ id ^ "/pre_text_modified_" ^ (string_of_bool first_try) ^ "2.html") (fun file -> | ||
35 | - fprintf file "%s\n" Visualization.html_header; | ||
36 | - fprintf file "%s\n" (Visualization.html_of_dep_sentence tokens paths); | ||
37 | - fprintf file "%s\n" Visualization.html_trailer); | ||
38 | - do_if first_try TreeChange.correct_coordination2 paths) in | ||
39 | - let paths = do_if first_try TreeChange.praet_qub_aglt paths in | ||
40 | - let paths = do_if first_try TreeChange.correct_interp_with_father_0 paths in | ||
41 | - let paths = do_if first_try (TreeChange.remove_interps ",") paths in | ||
42 | - let paths = do_if first_try (TreeChange.remove_interps "-") paths in | ||
43 | - let paths = do_if first_try TreeChange.correct_passive_voice paths in | ||
44 | - File.file_out ("results/" ^ id ^ "/pre_text_modified_" ^ (string_of_bool first_try) ^ ".html") (fun file -> | ||
45 | - fprintf file "%s\n" Visualization.html_header; | ||
46 | - fprintf file "%s\n" (Visualization.html_of_dep_sentence tokens paths); | ||
47 | - fprintf file "%s\n" Visualization.html_trailer); *) | 353 | + let paths = |
354 | + if first_try | ||
355 | + then | ||
356 | + let pom = replace_tokens paths tokens in | ||
357 | + let pom = (remove_interps ".") pom tokens in | ||
358 | + let pom = replace_hyphens pom tokens in | ||
359 | + let pom = correct_coordination1 pom tokens in | ||
360 | + let pom = correct_interp_with_father_0 pom tokens in | ||
361 | + let pom = correct_coordination2 pom tokens in | ||
362 | + let pom = remove_interps "," pom tokens in | ||
363 | + let pom = correct_passive_voice pom tokens in | ||
364 | + praet_qub_aglt pom tokens | ||
365 | + else | ||
366 | + swap_dep paths tokens in | ||
367 | + (* File.file_out ("results/" ^ id ^ "/pre_text_modified_" ^ (string_of_bool first_try) ^ ".html") (fun file -> | ||
368 | + Printf.fprintf file "%s\n" Visualization.html_header; | ||
369 | + Printf.fprintf file "%s\n" (Visualization.html_of_dep_sentence tokens paths); | ||
370 | + Printf.fprintf file "%s\n" Visualization.html_trailer); *) | ||
371 | + paths |
diagnostics/LCGfields.ml renamed to corpora/LCGfields.ml
@@ -83,7 +83,7 @@ let field_of_dependency_tree str_node fields dep_tree = | @@ -83,7 +83,7 @@ let field_of_dependency_tree str_node fields dep_tree = | ||
83 | Array.fold_left (fun acc x -> | 83 | Array.fold_left (fun acc x -> |
84 | acc ^ (field_of_linear_term str_node field x) ^ "\n\t\t" ) "" dep_tree)) | 84 | acc ^ (field_of_linear_term str_node field x) ^ "\n\t\t" ) "" dep_tree)) |
85 | 85 | ||
86 | -let field_of_eniam_sentence fields tokens (result : eniam_parse_result) = | 86 | +let field_of_eniam_sentence fields (result : eniam_parse_result) = |
87 | match result.status with | 87 | match result.status with |
88 | Idle -> "Idle" | 88 | Idle -> "Idle" |
89 | (* | PreprocessingError -> "PreprocessingError" *) | 89 | (* | PreprocessingError -> "PreprocessingError" *) |
@@ -99,7 +99,7 @@ let field_of_eniam_sentence fields tokens (result : eniam_parse_result) = | @@ -99,7 +99,7 @@ let field_of_eniam_sentence fields tokens (result : eniam_parse_result) = | ||
99 | | Parsed -> ignore ("Parsed\n\t\t" ^ (field_of_dependency_tree eniam fields result.dependency_tree)); "Parsed\n" | 99 | | Parsed -> ignore ("Parsed\n\t\t" ^ (field_of_dependency_tree eniam fields result.dependency_tree)); "Parsed\n" |
100 | | _ -> failwith "field_of_eniam_sentence" | 100 | | _ -> failwith "field_of_eniam_sentence" |
101 | 101 | ||
102 | -let field_of_conll_sentence fields tokens (result : conll_parse_result) = | 102 | +let field_of_conll_sentence fields (result : conll_parse_result) = |
103 | stat_map := StatMap.add !stat_map result.status; | 103 | stat_map := StatMap.add !stat_map result.status; |
104 | match result.status with | 104 | match result.status with |
105 | Idle -> "Idle" | 105 | Idle -> "Idle" |
@@ -117,33 +117,36 @@ let field_of_conll_sentence fields tokens (result : conll_parse_result) = | @@ -117,33 +117,36 @@ let field_of_conll_sentence fields tokens (result : conll_parse_result) = | ||
117 | | _ -> failwith "field_of_conll_sentence" | 117 | | _ -> failwith "field_of_conll_sentence" |
118 | 118 | ||
119 | 119 | ||
120 | -let rec field_of_sentence fields tokens = function | 120 | +let rec field_of_sentence fields = function |
121 | RawSentence s -> s | 121 | RawSentence s -> s |
122 | | StructSentence _ -> "StructSentence" | 122 | | StructSentence _ -> "StructSentence" |
123 | | DepSentence _ -> "DepSentence" | 123 | | DepSentence _ -> "DepSentence" |
124 | - | ENIAMSentence result -> field_of_eniam_sentence fields tokens result | ||
125 | - | CONLLSentence result -> field_of_conll_sentence fields tokens result | 124 | + | ENIAMSentence result -> field_of_eniam_sentence fields result |
125 | + | CONLLSentence result -> field_of_conll_sentence fields result | ||
126 | | QuotedSentences sentences -> "QuotedSentences" | 126 | | QuotedSentences sentences -> "QuotedSentences" |
127 | | AltSentence l -> String.concat "\n\t" (Xlist.map l (fun (m, s) -> | 127 | | AltSentence l -> String.concat "\n\t" (Xlist.map l (fun (m, s) -> |
128 | - Visualization.string_of_mode m ^ "\t" ^ (field_of_sentence fields tokens s))) | 128 | + Visualization.string_of_mode m ^ "\t" ^ (field_of_sentence fields s))) |
129 | | _ -> failwith "field_of_sentence: ni" | 129 | | _ -> failwith "field_of_sentence: ni" |
130 | 130 | ||
131 | -let rec field_of_paragraph fields tokens = function | 131 | +let rec field_of_paragraph fields = function |
132 | RawParagraph s -> print_endline "no fields detected: only raw paragraph"; s | 132 | RawParagraph s -> print_endline "no fields detected: only raw paragraph"; s |
133 | | StructParagraph sentences -> | 133 | | StructParagraph sentences -> |
134 | - String.concat "\n\t" (Xlist.map sentences (fun p -> field_of_sentence fields tokens p.psentence)) | 134 | + String.concat "\n\t" (Xlist.map sentences (fun p -> field_of_sentence fields p.psentence)) |
135 | | AltParagraph l -> | 135 | | AltParagraph l -> |
136 | String.concat "\n" (Xlist.map (List.filter (fun (m,t) -> (*m = ENIAM ||*) m = CONLL) l) (fun (m,t) -> | 136 | String.concat "\n" (Xlist.map (List.filter (fun (m,t) -> (*m = ENIAM ||*) m = CONLL) l) (fun (m,t) -> |
137 | - Visualization.string_of_mode m ^ "\n\t" ^ (field_of_paragraph fields tokens t))) | ||
138 | - (* field_of_paragraph fields tokens (snd @@ List.find (fun (mode,text) -> mode = ENIAM || mode = CONLL) l) *) | 137 | + Visualization.string_of_mode m ^ "\n\t" ^ (field_of_paragraph fields t))) |
138 | + (* field_of_paragraph fields (snd @@ List.find (fun (mode,text) -> mode = ENIAM || mode = CONLL) l) *) | ||
139 | 139 | ||
140 | let rec print_fields_rec fields = function | 140 | let rec print_fields_rec fields = function |
141 | - RawText s -> print_endline "no fields detected: only raw text"; | ||
142 | -| StructText(paragraphs,tokens) -> | ||
143 | - print_endline (String.concat "\n\n" (Xlist.map paragraphs (field_of_paragraph fields tokens)) ^ "\n") | 141 | + RawText s -> s |
142 | + (* print_endline "no fields detected: only raw text"; *) | ||
143 | +| StructText(paragraphs) -> | ||
144 | + String.concat "\n\n" (Xlist.map paragraphs (field_of_paragraph fields)) ^ "\n" | ||
144 | | AltText l -> | 145 | | AltText l -> |
145 | - print_fields_rec fields (snd @@ List.find (fun (m,t) -> m = Struct (*|| m = ENIAM*) || m = CONLL) l) | 146 | + String.concat "\n" (Xlist.map (List.filter (fun (m,t) -> m = Struct || m = CONLL) l) (fun (m,t) -> |
147 | + Visualization.string_of_mode m ^ "\n\t" ^ (print_fields_rec fields t))) | ||
148 | + (* print_fields_rec fields (snd @@ List.find (fun (m,t) -> m = Struct (*|| m = ENIAM*) || m = CONLL) l) *) | ||
146 | 149 | ||
147 | let print_fields fields text = | 150 | let print_fields fields text = |
148 | - print_fields_rec fields text | 151 | + print_endline @@ print_fields_rec fields text |
149 | (* ; print_field_map () *) | 152 | (* ; print_field_map () *) |
corpora/makefile
@@ -16,9 +16,9 @@ lib: | @@ -16,9 +16,9 @@ lib: | ||
16 | freq_test: | 16 | freq_test: |
17 | $(OCAMLOPT) -o freq_test $(OCAMLOPTFLAGS) $(MODS) freq_test.ml | 17 | $(OCAMLOPT) -o freq_test $(OCAMLOPTFLAGS) $(MODS) freq_test.ml |
18 | 18 | ||
19 | -test: CONLL.ml test_conll2.ml | 19 | +test: CONLL.ml CONLL_adapter.ml test_conll2.ml |
20 | mkdir -p results | 20 | mkdir -p results |
21 | - $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) CONLL.ml test_conll2.ml | 21 | + $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) $^ |
22 | 22 | ||
23 | 23 | ||
24 | .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx | 24 | .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx |
corpora/test_conll.ml
1 | +open Xstd | ||
2 | +open ENIAMsubsyntaxTypes | ||
3 | +open ENIAMtokenizerTypes | ||
4 | +open LCGtypes | ||
5 | +open ExecTypes | ||
1 | 6 | ||
2 | let empty_result = { | 7 | let empty_result = { |
3 | input_text=RawText ""; | 8 | input_text=RawText ""; |
@@ -146,7 +151,7 @@ let eniam_parse_sentence timeout test_only_flag paths last tokens lex_sems = | @@ -146,7 +151,7 @@ let eniam_parse_sentence timeout test_only_flag paths last tokens lex_sems = | ||
146 | let rec conll_parse_sentence timeout test_only_flag id first_try paths tokens lex_sems = | 151 | let rec conll_parse_sentence timeout test_only_flag id first_try paths tokens lex_sems = |
147 | let result = empty_conll_parse_result in | 152 | let result = empty_conll_parse_result in |
148 | let time2 = time_fun () in | 153 | let time2 = time_fun () in |
149 | - let paths = CONLL_adapter.convert_dep_tree id first_try paths tokens lex_sems | 154 | + let paths = CONLL_adapter.convert_dep_tree id first_try paths tokens lex_sems in |
150 | try | 155 | try |
151 | let dep_chart = LCGlexicon.dep_create paths tokens lex_sems in | 156 | let dep_chart = LCGlexicon.dep_create paths tokens lex_sems in |
152 | let dep_chart,references = LCGchart.dep_lazify dep_chart in | 157 | let dep_chart,references = LCGchart.dep_lazify dep_chart in |
@@ -193,7 +198,7 @@ let rec conll_parse_sentence timeout test_only_flag id first_try paths tokens le | @@ -193,7 +198,7 @@ let rec conll_parse_sentence timeout test_only_flag id first_try paths tokens le | ||
193 | let time5 = time_fun () in | 198 | let time5 = time_fun () in |
194 | {result with status=ReductionError; msg=Printexc.to_string e; reduction_time=time5 -. time4} | 199 | {result with status=ReductionError; msg=Printexc.to_string e; reduction_time=time5 -. time4} |
195 | else if first_try | 200 | else if first_try |
196 | - then conll_parse_sentence timeout test_only_flag id false paths tokens | 201 | + then conll_parse_sentence timeout test_only_flag id false paths tokens lex_sems |
197 | else {result with status=NotParsed} | 202 | else {result with status=NotParsed} |
198 | with | 203 | with |
199 | Timeout t -> | 204 | Timeout t -> |
@@ -201,7 +206,7 @@ let rec conll_parse_sentence timeout test_only_flag id first_try paths tokens le | @@ -201,7 +206,7 @@ let rec conll_parse_sentence timeout test_only_flag id first_try paths tokens le | ||
201 | {result with status=ParseTimeout; msg=Printf.sprintf "%f" t; parse_time=time4 -. time3} | 206 | {result with status=ParseTimeout; msg=Printf.sprintf "%f" t; parse_time=time4 -. time3} |
202 | | NotDepParsed(id_ndp,left,l,right) -> | 207 | | NotDepParsed(id_ndp,left,l,right) -> |
203 | if first_try | 208 | if first_try |
204 | - then conll_parse_sentence timeout test_only_flag id false paths tokens | 209 | + then conll_parse_sentence timeout test_only_flag id false paths tokens lex_sems |
205 | else let time4 = time_fun () in | 210 | else let time4 = time_fun () in |
206 | {result with status=NotParsed; not_parsed_dep_chart=(id_ndp,left,l,right); parse_time=time4 -. time3} | 211 | {result with status=NotParsed; not_parsed_dep_chart=(id_ndp,left,l,right); parse_time=time4 -. time3} |
207 | | e -> | 212 | | e -> |
@@ -210,7 +215,7 @@ let rec conll_parse_sentence timeout test_only_flag id first_try paths tokens le | @@ -210,7 +215,7 @@ let rec conll_parse_sentence timeout test_only_flag id first_try paths tokens le | ||
210 | with e -> (*print_endline (Printexc.to_string e);*) | 215 | with e -> (*print_endline (Printexc.to_string e);*) |
211 | let time3 = time_fun () in | 216 | let time3 = time_fun () in |
212 | if first_try | 217 | if first_try |
213 | - then conll_parse_sentence timeout test_only_flag id false paths tokens | 218 | + then conll_parse_sentence timeout test_only_flag id false paths tokens lex_sems |
214 | else {result with status=LexiconError; msg=Printexc.to_string e; lex_time=time3 -. time2} | 219 | else {result with status=LexiconError; msg=Printexc.to_string e; lex_time=time3 -. time2} |
215 | 220 | ||
216 | 221 | ||
@@ -243,11 +248,7 @@ let get_paths old_paths = function | @@ -243,11 +248,7 @@ let get_paths old_paths = function | ||
243 | paths | 248 | paths |
244 | | _ -> failwith "get_paths" | 249 | | _ -> failwith "get_paths" |
245 | 250 | ||
246 | -<<<<<<< HEAD | ||
247 | -let rec parse_sentence timeout test_only_flag mode file_prefix tokens lex_sems = function | ||
248 | -======= | ||
249 | -let rec parse_sentence timeout test_only_flag mode id file_prefix tokens = function | ||
250 | ->>>>>>> dep_trees | 251 | +let rec parse_sentence timeout test_only_flag mode id file_prefix tokens lex_sems = function |
251 | RawSentence s -> | 252 | RawSentence s -> |
252 | (match mode with | 253 | (match mode with |
253 | Swigra -> | 254 | Swigra -> |
@@ -259,23 +260,15 @@ let rec parse_sentence timeout test_only_flag mode id file_prefix tokens = funct | @@ -259,23 +260,15 @@ let rec parse_sentence timeout test_only_flag mode id file_prefix tokens = funct | ||
259 | | StructSentence(paths,last) -> | 260 | | StructSentence(paths,last) -> |
260 | (match mode with | 261 | (match mode with |
261 | ENIAM -> | 262 | ENIAM -> |
262 | -<<<<<<< HEAD | ||
263 | let result = eniam_parse_sentence timeout test_only_flag paths last tokens lex_sems in | 263 | let result = eniam_parse_sentence timeout test_only_flag paths last tokens lex_sems in |
264 | -======= | ||
265 | - let result = empty_eniam_parse_result in | ||
266 | - (* let result = print_endline "eniam_parse_sentence"; eniam_parse_sentence timeout test_only_flag paths last tokens in *) | ||
267 | ->>>>>>> dep_trees | 264 | + (* let result = empty_eniam_parse_result in *) |
268 | let result = {result with file_prefix = file_prefix_of_mode mode ^ file_prefix} in | 265 | let result = {result with file_prefix = file_prefix_of_mode mode ^ file_prefix} in |
269 | ENIAMSentence result | 266 | ENIAMSentence result |
270 | | _ -> failwith "parse_sentence") | 267 | | _ -> failwith "parse_sentence") |
271 | | DepSentence(paths) -> | 268 | | DepSentence(paths) -> |
272 | (match mode with | 269 | (match mode with |
273 | CONLL -> | 270 | CONLL -> |
274 | -<<<<<<< HEAD | ||
275 | - let result = conll_parse_sentence timeout test_only_flag paths tokens lex_sems in | ||
276 | -======= | ||
277 | - let result = (*print_endline "conll_parse_sentence";*) conll_parse_sentence timeout test_only_flag id true paths tokens in | ||
278 | ->>>>>>> dep_trees | 271 | + let result = conll_parse_sentence timeout test_only_flag id true paths tokens lex_sems in |
279 | let result = {result with | 272 | let result = {result with |
280 | file_prefix = file_prefix_of_mode mode ^ file_prefix; | 273 | file_prefix = file_prefix_of_mode mode ^ file_prefix; |
281 | paths = paths} in | 274 | paths = paths} in |
@@ -289,19 +282,15 @@ let rec parse_sentence timeout test_only_flag mode id file_prefix tokens = funct | @@ -289,19 +282,15 @@ let rec parse_sentence timeout test_only_flag mode id file_prefix tokens = funct | ||
289 | if not Paths.config.Paths.mate_parser_enabled then DepSentence paths else ( | 282 | if not Paths.config.Paths.mate_parser_enabled then DepSentence paths else ( |
290 | print_endline "parse_sentence 1"; | 283 | print_endline "parse_sentence 1"; |
291 | (* print_endline (Visualization.html_of_dep_sentence tokens paths); *) | 284 | (* print_endline (Visualization.html_of_dep_sentence tokens paths); *) |
292 | - let conll = ENIAM_CONLL.string_of_paths ENIAMsubsyntaxTypes.Mate tokens paths in | 285 | + let conll = CONLL.string_of_paths ENIAMsubsyntaxTypes.Mate tokens paths in |
293 | print_endline "parse_sentence 2"; | 286 | print_endline "parse_sentence 2"; |
294 | (* printf "|%s|\n" conll; *) | 287 | (* printf "|%s|\n" conll; *) |
295 | Printf.fprintf mate_out "%s%!" conll; | 288 | Printf.fprintf mate_out "%s%!" conll; |
296 | print_endline "parse_sentence 3"; | 289 | print_endline "parse_sentence 3"; |
297 | - let new_paths = get_paths paths (ENIAM_CONLL.load_sentence mate_in) in | 290 | + let new_paths = get_paths paths (CONLL.load_sentence mate_in) in |
298 | print_endline "parse_sentence 4"; | 291 | print_endline "parse_sentence 4"; |
299 | (* print_endline (Visualization.html_of_dep_sentence tokens new_paths); *) | 292 | (* print_endline (Visualization.html_of_dep_sentence tokens new_paths); *) |
300 | -<<<<<<< HEAD | ||
301 | - let result = conll_parse_sentence timeout test_only_flag new_paths tokens lex_sems in | ||
302 | -======= | ||
303 | - let result = conll_parse_sentence timeout test_only_flag id true new_paths tokens in | ||
304 | ->>>>>>> dep_trees | 293 | + let result = conll_parse_sentence timeout test_only_flag id true new_paths tokens lex_sems in |
305 | let result = {result with | 294 | let result = {result with |
306 | file_prefix = file_prefix_of_mode mode ^ file_prefix; | 295 | file_prefix = file_prefix_of_mode mode ^ file_prefix; |
307 | paths=new_paths} in | 296 | paths=new_paths} in |
@@ -309,66 +298,94 @@ let rec parse_sentence timeout test_only_flag mode id file_prefix tokens = funct | @@ -309,66 +298,94 @@ let rec parse_sentence timeout test_only_flag mode id file_prefix tokens = funct | ||
309 | | _ -> failwith "parse_sentence") | 298 | | _ -> failwith "parse_sentence") |
310 | | QuotedSentences sentences -> | 299 | | QuotedSentences sentences -> |
311 | let sentences = Xlist.rev_map sentences (fun p -> | 300 | let sentences = Xlist.rev_map sentences (fun p -> |
312 | -<<<<<<< HEAD | ||
313 | - let sentence = parse_sentence timeout test_only_flag mode p.pfile_prefix tokens lex_sems p.psentence in | ||
314 | -======= | ||
315 | - let sentence = parse_sentence timeout test_only_flag mode id p.pfile_prefix tokens p.psentence in | ||
316 | ->>>>>>> dep_trees | 301 | + let sentence = parse_sentence timeout test_only_flag mode id p.pfile_prefix tokens lex_sems p.psentence in |
317 | {p with psentence=sentence}) in | 302 | {p with psentence=sentence}) in |
318 | QuotedSentences(List.rev sentences) | 303 | QuotedSentences(List.rev sentences) |
319 | | AltSentence l -> | 304 | | AltSentence l -> |
320 | let l = Xlist.rev_map l (fun (mode,sentence) -> | 305 | let l = Xlist.rev_map l (fun (mode,sentence) -> |
321 | -<<<<<<< HEAD | ||
322 | - mode, parse_sentence timeout test_only_flag mode file_prefix tokens lex_sems sentence) in | 306 | + mode, parse_sentence timeout test_only_flag mode id file_prefix tokens lex_sems sentence) in |
323 | AltSentence(List.rev l) | 307 | AltSentence(List.rev l) |
324 | | _ -> failwith "parse_sentence" | 308 | | _ -> failwith "parse_sentence" |
325 | 309 | ||
326 | -let rec parse_paragraph timeout test_only_flag mode tokens lex_sems = function | 310 | +let rec parse_paragraph timeout test_only_flag mode id tokens lex_sems = function |
327 | RawParagraph s -> RawParagraph s | 311 | RawParagraph s -> RawParagraph s |
328 | | StructParagraph sentences -> | 312 | | StructParagraph sentences -> |
329 | let sentences = Xlist.rev_map sentences (fun p -> | 313 | let sentences = Xlist.rev_map sentences (fun p -> |
330 | - let sentence = parse_sentence timeout test_only_flag mode p.pfile_prefix tokens lex_sems p.psentence in | ||
331 | -======= | ||
332 | - mode, parse_sentence timeout test_only_flag mode id file_prefix tokens sentence) in | ||
333 | - AltSentence(List.rev l) | ||
334 | - | _ -> failwith "parse_sentence" | 314 | + let sentence = parse_sentence timeout test_only_flag mode id p.pfile_prefix tokens lex_sems p.psentence in |
315 | + {p with psentence=sentence}) in | ||
316 | + StructParagraph(List.rev sentences) | ||
317 | + | AltParagraph l -> | ||
318 | + let l = Xlist.rev_map l (fun (mode,paragraph) -> | ||
319 | + mode, parse_paragraph timeout test_only_flag mode id tokens lex_sems paragraph) in | ||
320 | + AltParagraph(List.rev l) | ||
321 | + | ||
322 | +let rec parse_text timeout test_only_flag mode id tokens lex_sems = function | ||
323 | + RawText s -> RawText s | ||
324 | + | StructText paragraphs -> | ||
325 | + let paragraphs = Xlist.rev_map paragraphs (fun paragraph -> | ||
326 | + parse_paragraph timeout test_only_flag mode id tokens lex_sems paragraph) in | ||
327 | + StructText(List.rev paragraphs) | ||
328 | + | AltText l -> AltText(Xlist.map l (fun (mode,text) -> | ||
329 | + mode, parse_text timeout test_only_flag mode id tokens lex_sems text)) | ||
330 | + | ||
331 | +let select_mode = function | ||
332 | + (Raw,_),_ -> failwith "select_mode" | ||
333 | + | _,(Raw,_) -> failwith "select_mode" | ||
334 | + | (Struct,_),_ -> failwith "select_mode" | ||
335 | + | _,(Struct,_) -> failwith "select_mode" | ||
336 | + | (CONLL,s),_ -> CONLL,s | ||
337 | + | _,(CONLL,s) -> CONLL,s | ||
338 | + | (ENIAM,s),_ -> ENIAM,s | ||
339 | + | _,(ENIAM,s) -> ENIAM,s | ||
340 | + | (Swigra,s),_ -> Swigra,s | ||
341 | + | _,(Swigra,s) -> Swigra,s | ||
342 | + | (Mate,s),_ -> Mate,s | ||
343 | + | _,(Mate,s) -> Mate,s | ||
344 | + | _ -> failwith "select_mode: ni" | ||
335 | 345 | ||
336 | -let rec parse_paragraph timeout test_only_flag mode id tokens = function | 346 | +let rec select_sentences_sentence = function |
347 | + RawSentence s -> failwith "select_sentences_sentence" | ||
348 | + | StructSentence(paths,last) -> failwith "select_sentences_sentence" | ||
349 | + | DepSentence paths -> failwith "select_sentences_sentence" | ||
350 | + | QuotedSentences sentences -> | ||
351 | + let sentences = Xlist.rev_map sentences (fun p -> | ||
352 | + let sentence,_ = select_sentences_sentence p.psentence in | ||
353 | + {p with psentence=sentence}) in | ||
354 | + QuotedSentences(List.rev sentences), Parsed | ||
355 | + | AltSentence l -> | ||
356 | + let raw,selected = Xlist.fold l ([],[]) (fun (raw,selected) (mode,sentence) -> | ||
357 | + if mode = Raw then (mode,sentence) :: raw, selected else | ||
358 | + let sentence,status = select_sentences_sentence sentence in | ||
359 | + if status <> Parsed && status <> NotTranslated then raw,selected else | ||
360 | + match selected with | ||
361 | + [] -> raw,[mode,sentence] | ||
362 | + | [mode2,sentence2] -> raw,[select_mode ((mode,sentence),(mode2,sentence2))] | ||
363 | + | _ -> failwith "select_sentences_sentence") in | ||
364 | + AltSentence(raw @ selected), Parsed | ||
365 | + | ENIAMSentence result -> ENIAMSentence result, result.status | ||
366 | + | CONLLSentence result -> CONLLSentence result, result.status | ||
367 | + | SemSentence result -> SemSentence result, result.status | ||
368 | + | ||
369 | +let rec select_sentences_paragraph = function | ||
337 | RawParagraph s -> RawParagraph s | 370 | RawParagraph s -> RawParagraph s |
338 | | StructParagraph sentences -> | 371 | | StructParagraph sentences -> |
339 | let sentences = Xlist.rev_map sentences (fun p -> | 372 | let sentences = Xlist.rev_map sentences (fun p -> |
340 | - let sentence = parse_sentence timeout test_only_flag mode id p.pfile_prefix tokens p.psentence in | ||
341 | ->>>>>>> dep_trees | 373 | + let sentence,_ = select_sentences_sentence p.psentence in |
342 | {p with psentence=sentence}) in | 374 | {p with psentence=sentence}) in |
343 | StructParagraph(List.rev sentences) | 375 | StructParagraph(List.rev sentences) |
344 | | AltParagraph l -> | 376 | | AltParagraph l -> |
345 | let l = Xlist.rev_map l (fun (mode,paragraph) -> | 377 | let l = Xlist.rev_map l (fun (mode,paragraph) -> |
346 | -<<<<<<< HEAD | ||
347 | - mode, parse_paragraph timeout test_only_flag mode tokens lex_sems paragraph) in | ||
348 | - AltParagraph(List.rev l) | ||
349 | - | ||
350 | -let rec parse_text timeout test_only_flag mode tokens lex_sems = function | ||
351 | -======= | ||
352 | - mode, parse_paragraph timeout test_only_flag mode id tokens paragraph) in | 378 | + mode, select_sentences_paragraph paragraph) in |
353 | AltParagraph(List.rev l) | 379 | AltParagraph(List.rev l) |
354 | 380 | ||
355 | -let rec parse_text timeout test_only_flag mode id = function | ||
356 | ->>>>>>> dep_trees | 381 | +let rec select_sentences_text = function |
357 | RawText s -> RawText s | 382 | RawText s -> RawText s |
358 | | StructText paragraphs -> | 383 | | StructText paragraphs -> |
359 | let paragraphs = Xlist.rev_map paragraphs (fun paragraph -> | 384 | let paragraphs = Xlist.rev_map paragraphs (fun paragraph -> |
360 | -<<<<<<< HEAD | ||
361 | - parse_paragraph timeout test_only_flag mode tokens lex_sems paragraph) in | 385 | + select_sentences_paragraph paragraph) in |
362 | StructText(List.rev paragraphs) | 386 | StructText(List.rev paragraphs) |
363 | | AltText l -> AltText(Xlist.map l (fun (mode,text) -> | 387 | | AltText l -> AltText(Xlist.map l (fun (mode,text) -> |
364 | - mode, parse_text timeout test_only_flag mode tokens lex_sems text)) | ||
365 | -======= | ||
366 | - parse_paragraph timeout test_only_flag mode id tokens paragraph) in | ||
367 | - StructText(List.rev paragraphs, tokens) | ||
368 | - | AltText l -> AltText(Xlist.map l (fun (mode,text) -> | ||
369 | - mode, parse_text timeout test_only_flag mode id text)) | ||
370 | ->>>>>>> dep_trees | ||
371 | - | 388 | + mode, select_sentences_text text)) |
372 | 389 | ||
373 | let rec extract_query_text = function | 390 | let rec extract_query_text = function |
374 | RawText s -> s | 391 | RawText s -> s |
@@ -392,11 +409,7 @@ let process_query pre_in pre_out timeout test_only_flag id full_query max_n = | @@ -392,11 +409,7 @@ let process_query pre_in pre_out timeout test_only_flag id full_query max_n = | ||
392 | let result = {result with pre_time1=pre_time1; pre_time2=time2 -. time1} in | 409 | let result = {result with pre_time1=pre_time1; pre_time2=time2 -. time1} in |
393 | if msg <> "" then {result with status=PreprocessingError; msg=msg} else ( | 410 | if msg <> "" then {result with status=PreprocessingError; msg=msg} else ( |
394 | (* print_endline "process_query 3"; *) | 411 | (* print_endline "process_query 3"; *) |
395 | -<<<<<<< HEAD | ||
396 | - let parsed_text = parse_text timeout test_only_flag Struct tokens lex_sems (translate_text pre_text) in | ||
397 | -======= | ||
398 | - let parsed_text = parse_text timeout test_only_flag Struct id (translate_text pre_text) in | ||
399 | ->>>>>>> dep_trees | 412 | + let parsed_text = parse_text timeout test_only_flag Struct id tokens lex_sems (translate_text pre_text) in |
400 | (* print_endline "process_query 4"; *) | 413 | (* print_endline "process_query 4"; *) |
401 | let time3 = time_fun () in | 414 | let time3 = time_fun () in |
402 | let result = if test_only_flag then result else {result with status=Parsed; parsed_text=parsed_text} in | 415 | let result = if test_only_flag then result else {result with status=Parsed; parsed_text=parsed_text} in |
@@ -421,23 +434,50 @@ let process_query pre_in pre_out timeout test_only_flag id full_query max_n = | @@ -421,23 +434,50 @@ let process_query pre_in pre_out timeout test_only_flag id full_query max_n = | ||
421 | let result = {result with semantic_time=time4 -. time3} in | 434 | let result = {result with semantic_time=time4 -. time3} in |
422 | result) | 435 | result) |
423 | 436 | ||
437 | +let get_sock_addr host_name port = | ||
438 | + let he = Unix.gethostbyname host_name in | ||
439 | + let addr = he.Unix.h_addr_list in | ||
440 | + Unix.ADDR_INET(addr.(0),port) | ||
441 | + | ||
442 | +let id_counter = ref 0 | ||
443 | + | ||
444 | +let get_id () = | ||
445 | + incr id_counter; | ||
446 | + "ID_" ^ (string_of_int !id_counter) | ||
447 | + | ||
448 | +let get_query_id = function | ||
449 | + ENIAMsubsyntaxTypes.AltText[_;ENIAMsubsyntaxTypes.CONLL,ENIAMsubsyntaxTypes.StructText([ENIAMsubsyntaxTypes.StructParagraph[p]])] -> if p.ENIAMsubsyntaxTypes.pid = "" then get_id () else p.ENIAMsubsyntaxTypes.pid | ||
450 | + | ENIAMsubsyntaxTypes.AltText[ENIAMsubsyntaxTypes.CONLL,ENIAMsubsyntaxTypes.StructText([ENIAMsubsyntaxTypes.StructParagraph[p]])] -> if p.ENIAMsubsyntaxTypes.pid = "" then get_id () else p.ENIAMsubsyntaxTypes.pid | ||
451 | + | _ -> failwith "get_query_id" | ||
452 | + | ||
453 | +let process_id s = | ||
454 | + if Xstring.check_prefix "ID_" s then s else | ||
455 | + let a,b,c = match Xstring.split_delim "/" s with | ||
456 | + [a;b;c] -> a,b,c | ||
457 | + | _ -> failwith ("process_id: " ^ s) in | ||
458 | + if Xstring.check_prefix "NKJP_1M_" a && Xstring.check_prefix "morph_" b && Xstring.check_sufix "-p" b && | ||
459 | + Xstring.check_prefix "morph_" c && Xstring.check_sufix "-s" c then | ||
460 | + Xstring.cut_prefix "NKJP_1M_" a ^ "." ^ Xstring.cut_sufix "-s" (Xstring.cut_prefix "morph_" c) | ||
461 | + else failwith ("process_id: " ^ s) | ||
424 | 462 | ||
425 | let process_conll_corpus filename = | 463 | let process_conll_corpus filename = |
464 | + print_endline "process_conll_corpus: START"; | ||
426 | let corpus = File.file_in filename (fun file -> CONLL.match_corpus (CONLL.load_corpus file)) in | 465 | let corpus = File.file_in filename (fun file -> CONLL.match_corpus (CONLL.load_corpus file)) in |
427 | - print_endline "process_conll_corpus"; | ||
428 | - let corpus = [List.hd corpus] in | 466 | + print_endline "process_conll_corpus: DONE"; |
467 | + (* let corpus = [List.hd corpus] in *) | ||
429 | let ic,oc = Unix.open_connection (get_sock_addr Paths.pre_host Paths.pre_port) in | 468 | let ic,oc = Unix.open_connection (get_sock_addr Paths.pre_host Paths.pre_port) in |
430 | - Xlist.iter corpus (fun query -> | 469 | + print_endline "connection_opened"; |
470 | + Xlist.iter corpus (fun (query,tokens) -> | ||
431 | let id = process_id (get_query_id query) in | 471 | let id = process_id (get_query_id query) in |
432 | let path = "results/" ^ id ^ "/" in | 472 | let path = "results/" ^ id ^ "/" in |
433 | ignore (Sys.command ("mkdir -p " ^ path)); | 473 | ignore (Sys.command ("mkdir -p " ^ path)); |
434 | - let result = process_query ic oc 30. false "x" query 10 in | ||
435 | - Visualization.print_html_text path "input_text" result.input_text; | ||
436 | - Visualization.print_html_text path "pre_text" result.pre_text; | ||
437 | - Visualization.print_html_text path "parsed_text" result.parsed_text; | ||
438 | - Visualization.print_html_text path "selected_sent_text" result.selected_sent_text; | ||
439 | - Visualization.print_html_text path "semantic_text" result.semantic_text; | ||
440 | - Visualization.print_html_text path "selected_semantic_text" result.selected_semantic_text; | 474 | + let result = process_query ic oc 30. false "x" (query,tokens) 10 in |
475 | + (* Visualization.print_html_text path "input_text" result.input_text tokens; | ||
476 | + Visualization.print_html_text path "pre_text" result.pre_text tokens; | ||
477 | + Visualization.print_html_text path "parsed_text" result.parsed_text tokens; | ||
478 | + Visualization.print_html_text path "selected_sent_text" result.selected_sent_text tokens; | ||
479 | + Visualization.print_html_text path "semantic_text" result.semantic_text tokens; | ||
480 | + Visualization.print_html_text path "selected_semantic_text" result.selected_semantic_text tokens; *) | ||
441 | (* printf "input_text:\n%s\n" (Visualization.string_of_text result.input_text); | 481 | (* printf "input_text:\n%s\n" (Visualization.string_of_text result.input_text); |
442 | printf "pre_text:\n%s\n" (Visualization.string_of_text result.pre_text); *) | 482 | printf "pre_text:\n%s\n" (Visualization.string_of_text result.pre_text); *) |
443 | (* Exec.print_result stdout result; *) | 483 | (* Exec.print_result stdout result; *) |
@@ -445,13 +485,15 @@ let process_conll_corpus filename = | @@ -445,13 +485,15 @@ let process_conll_corpus filename = | ||
445 | (* CompTrees.compare_results result.parsed_text; *) | 485 | (* CompTrees.compare_results result.parsed_text; *) |
446 | (* Visualization.print_paths "results/" "paths" result.paths; *) | 486 | (* Visualization.print_paths "results/" "paths" result.paths; *) |
447 | ()); | 487 | ()); |
448 | - Marshal.to_channel oc (PreTypes.RawText "",ExtArray.make 1 ENIAMtokenizerTypes.empty_token) []; | 488 | + Marshal.to_channel oc (ENIAMsubsyntaxTypes.RawText "",ExtArray.make 1 ENIAMtokenizerTypes.empty_token) []; |
449 | flush oc; | 489 | flush oc; |
450 | let _ = Unix.shutdown_connection ic in | 490 | let _ = Unix.shutdown_connection ic in |
451 | () | 491 | () |
452 | 492 | ||
453 | let _ = | 493 | let _ = |
494 | + LCGfields.reset(); | ||
454 | (* process_conll_corpus "../../NLP resources/Skladnica-zaleznosciowa-mod_130121.conll"; *) | 495 | (* process_conll_corpus "../../NLP resources/Skladnica-zaleznosciowa-mod_130121.conll"; *) |
455 | - (* process_conll_corpus "../../NLP resources/skladnica_zaleznosciowa.conll"; *) | ||
456 | - process_conll_corpus "../testy/skladnica-test1.conll"; | 496 | + process_conll_corpus "../../NLP resources/skladnica_zaleznosciowa.conll"; |
497 | + (* process_conll_corpus "../testy/skladnica-test1.conll"; *) | ||
498 | + LCGfields.print_results(); | ||
457 | () | 499 | () |
corpora/test_conll2.ml
@@ -116,7 +116,7 @@ let test_example path id tokens lex_sems paths last = | @@ -116,7 +116,7 @@ let test_example path id tokens lex_sems paths last = | ||
116 | let test_dep_example path id tokens lex_sems paths = | 116 | let test_dep_example path id tokens lex_sems paths = |
117 | try | 117 | try |
118 | ENIAM_LCGreductions.reset_variant_label (); | 118 | ENIAM_LCGreductions.reset_variant_label (); |
119 | - (* let paths = CONLL_adapter.convert_dep_tree id first_try paths tokens lex_sems in *) | 119 | + let paths = CONLL_adapter.convert_dep_tree id (*first_try*) true paths tokens in |
120 | ENIAMsubsyntaxHTMLof.print_dep_sentence path (id^"1_paths") tokens paths; | 120 | ENIAMsubsyntaxHTMLof.print_dep_sentence path (id^"1_paths") tokens paths; |
121 | let chart = create_dep_chart tokens lex_sems paths in | 121 | let chart = create_dep_chart tokens lex_sems paths in |
122 | ENIAM_LCGlatexOf.print_dep_chart path (id^"1_chart") "a1" chart; | 122 | ENIAM_LCGlatexOf.print_dep_chart path (id^"1_chart") "a1" chart; |
@@ -150,7 +150,7 @@ let test_dep_example path id tokens lex_sems paths = | @@ -150,7 +150,7 @@ let test_dep_example path id tokens lex_sems paths = | ||
150 | let rec parse_sentence name id tokens lex_sems = function | 150 | let rec parse_sentence name id tokens lex_sems = function |
151 | RawSentence s -> id | 151 | RawSentence s -> id |
152 | | StructSentence(paths,last) -> | 152 | | StructSentence(paths,last) -> |
153 | - test_example ("results/" ^ name^"/") (string_of_int id ^ "_") tokens lex_sems paths last; | 153 | + (* test_example ("results/" ^ name^"/") (string_of_int id ^ "_") tokens lex_sems paths last; *) |
154 | id + 1 | 154 | id + 1 |
155 | | DepSentence(paths) -> | 155 | | DepSentence(paths) -> |
156 | test_dep_example ("results/" ^ name ^ "/") (string_of_int id ^ "_") tokens lex_sems paths; | 156 | test_dep_example ("results/" ^ name ^ "/") (string_of_int id ^ "_") tokens lex_sems paths; |
diagnostics/treeChange.ml deleted
1 | -open Xstd | ||
2 | -open PreTypes | ||
3 | - | ||
4 | -let if_lemma lemmas = function | ||
5 | - Lemma(l,_,_) -> List.exists (fun x -> x = l) lemmas | ||
6 | - | _ -> false | ||
7 | - | ||
8 | -let if_cat cats = function | ||
9 | - Lemma(_,cat,_) -> List.exists (fun x -> x = cat) cats | ||
10 | - | _ -> false | ||
11 | - | ||
12 | -let if_interps interps token = | ||
13 | - let interp = match token with | ||
14 | - Lemma(_,_,i) -> i | ||
15 | - | _ -> [[[]]] in | ||
16 | - let if_interp nr value = | ||
17 | - List.exists (fun x -> | ||
18 | - try | ||
19 | - List.exists (fun y -> | ||
20 | - y = value) (List.nth x nr) | ||
21 | - with _ -> false | ||
22 | - ) interp in | ||
23 | - Xlist.fold interps true (fun acc (nr,value) -> acc && (if_interp nr value)) | ||
24 | - | ||
25 | -let correct_coordination1 paths tokens = | ||
26 | - let paths_ls = List.mapi (fun i (id,super,label) -> | ||
27 | - (i,id,super,label)) (Array.to_list paths) in | ||
28 | - | ||
29 | - let l = [("subst:nom",0),(["fin";"praet"],0); | ||
30 | - ("subst:acc",0),(["inf"],0); | ||
31 | - ("ppron3:nom",0),(["fin";"praet"],0); | ||
32 | - ("ppron3:acc",0),(["fin";"praet"],0); | ||
33 | - ("adv",0),(["fin";"praet"],0); | ||
34 | - ("adv",0),(["inf"],0); | ||
35 | - ("adv",0),(["adj"],0); | ||
36 | - ("prep",0),(["fin";"praet"],0); | ||
37 | - ("prep",0),(["inf"],0); | ||
38 | - ("prep",0),(["ppas"],0); | ||
39 | - ("prep",0),(["subst"],0); | ||
40 | - ("prep:gen",0),(["subst:gen"],0); | ||
41 | - ("adj:nom",0),(["fin";"praet"],0); | ||
42 | - ("adj:nom",0),(["subst:nom"],0); | ||
43 | - ("adj:gen",0),(["subst:gen"],0); | ||
44 | - ("adj:dat",0),(["subst:dat"],0); | ||
45 | - ("adj:acc",0),(["subst:acc"],0); | ||
46 | - ("adj:inst",0),(["subst:inst"],0); | ||
47 | - ("adj:loc",0),(["subst:loc"],0); | ||
48 | - ("subst:gen",0),(["subst:nom"],0); | ||
49 | - (* ("subst:gen",0),(["subst:gen"],0); *) | ||
50 | - ("subst:gen",0),(["subst:dat"],0); | ||
51 | - ("subst:gen",0),(["subst:acc"],0); | ||
52 | - ("subst:gen",0),(["subst:inst"],0); | ||
53 | - ("subst:gen",0),(["subst:loc"],0); | ||
54 | - ("ppron3:gen",0),(["subst:nom"],0); | ||
55 | - ("ppron3:gen",0),(["subst:dat"],0); | ||
56 | - ("ppron3:gen",0),(["subst:acc"],0); | ||
57 | - ("ppron3:gen",0),(["subst:inst"],0); | ||
58 | - ("ppron3:gen",0),(["subst:loc"],0); | ||
59 | - ("qub",0),(["fin";"praet"],0); | ||
60 | - ("qub",0),(["subst"],0); | ||
61 | - ("qub",0),(["adj"],0); | ||
62 | - ("pact",0),(["subst"],0); | ||
63 | - ("ppas",0),(["subst"],0) | ||
64 | - ] in | ||
65 | - | ||
66 | - let find_dependents sons = | ||
67 | - | ||
68 | - let is (i,id,super,label) pattern = match Xstring.split ":" pattern with | ||
69 | - ["prep";case] -> if_cat ["prep"] (ExtArray.get tokens id).token && | ||
70 | - if_interps [0,case] (ExtArray.get tokens id).token | ||
71 | - | [cat;case] -> if_cat [cat] (ExtArray.get tokens id).token && | ||
72 | - if_interps [1,case] (ExtArray.get tokens id).token | ||
73 | - | [cat] -> if_cat [cat] (ExtArray.get tokens id).token | ||
74 | - | _ -> failwith "is (in correct_coordination1)" in | ||
75 | - | ||
76 | - let incr_representative acc son = Xlist.map acc (fun ((one,a),(rest,b)) -> | ||
77 | - if is son one | ||
78 | - then (one,a + 1), (rest,b) | ||
79 | - else if List.exists (is son) rest | ||
80 | - then (one,a), (rest,b + 1) | ||
81 | - else (one,a), (rest,b)) in | ||
82 | - | ||
83 | - let get_from sons pattern = List.find (fun x -> is x pattern) sons in | ||
84 | - | ||
85 | - let l = Xlist.fold sons l incr_representative in | ||
86 | - let results = List.filter (fun ((_,a),(_,b)) -> a = 1 && b > 1) l in | ||
87 | - Xlist.map results (fun result -> | ||
88 | - get_from sons @@ fst @@ fst result, | ||
89 | - List.filter (fun son -> | ||
90 | - List.exists (fun one -> is son one) (fst (snd result))) sons) in | ||
91 | - | ||
92 | - let establish_neighbour super ((i_d,id_d,super_d,label_d),sons) = | ||
93 | - let not_between (i_s,_,_,_) = | ||
94 | - (super < i_d && super < i_s) || | ||
95 | - (super > i_d && super > i_s) in | ||
96 | - let (i_n,id_n,super_n,label_n) = List.find (fun son -> | ||
97 | - not_between son) sons in | ||
98 | - paths.(i_d) <- (id_d, i_n, label_d) in | ||
99 | - | ||
100 | - let examine_coords (i,id,super,label) sons = | ||
101 | - try | ||
102 | - let dependents = find_dependents sons in | ||
103 | - Xlist.iter dependents (establish_neighbour super) | ||
104 | - with | ||
105 | - | _ -> () in | ||
106 | - | ||
107 | - Array.iteri (fun i (id,super,label) -> | ||
108 | - if if_cat ["conj"] (ExtArray.get tokens id).token | ||
109 | - then (let sons = List.filter (fun (_,_,super,_) -> super = i) paths_ls in | ||
110 | - if (List.length sons > 2) | ||
111 | - then examine_coords (i,id,super,label) sons)) paths; | ||
112 | - paths | ||
113 | - | ||
114 | -let correct_coordination2 paths tokens = | ||
115 | - let paths_c = Array.copy paths in | ||
116 | - let paths_ls () = List.mapi (fun i (id,super,label) -> | ||
117 | - (i,id,super,label)) (Array.to_list paths_c) in | ||
118 | - | ||
119 | - (* let ps a sons = | ||
120 | - print_endline a; | ||
121 | - List.iter (fun (i,_,_,_) -> print_endline (ExtArray.get tokens i).orth) sons; | ||
122 | - print_endline "" in *) | ||
123 | - | ||
124 | - let rec correct_rec (i,id,super,label) sons = | ||
125 | - let left_s, right_s = List.partition (fun (a,b,c,d) -> a < i) sons in | ||
126 | - (* ps "left:" (List.rev left_s); | ||
127 | - ps "right:" right_s; *) | ||
128 | - find_father i (List.rev left_s); | ||
129 | - find_father i right_s | ||
130 | - | ||
131 | - and find_father i0 = function | ||
132 | - [(i,id,super,label)] -> paths_c.(i) <- (id,i0,label) | ||
133 | - | (a,b,c,d) :: (i,id,super,label) :: t -> | ||
134 | - paths_c.(i) <- (id,i0,label); | ||
135 | - if not (if_cat ["conj"] (ExtArray.get tokens i).token || | ||
136 | - (ExtArray.get tokens i).orth = ",") | ||
137 | - then failwith "find_father"; | ||
138 | - correct_rec (i,id,super,label) (if a < i | ||
139 | - then (a,b,c,d) :: t | ||
140 | - else List.rev @@ (a,b,c,d) :: t) | ||
141 | - | _ -> failwith "find_father" in | ||
142 | - | ||
143 | - let check_previous_for_interp i = | ||
144 | - if i >= 0 && (ExtArray.get tokens i).orth = "," && | ||
145 | - not (List.exists (fun (_,super,_) -> super = i) (Array.to_list paths_c)) | ||
146 | - then paths_c.(i) <- (0,-1,"") in | ||
147 | - | ||
148 | - Array.iteri (fun i (id,super,label) -> | ||
149 | - if if_cat ["conj"] (ExtArray.get tokens i).token || | ||
150 | - (ExtArray.get tokens i).orth = "," | ||
151 | - then | ||
152 | - (check_previous_for_interp (i-1); | ||
153 | - let sons = List.filter (fun (_,_,super,_) -> super = i) (paths_ls ()) in | ||
154 | - if (List.length sons > 2) | ||
155 | - then correct_rec (i,id,super,label) sons)) paths_c; | ||
156 | - paths_c | ||
157 | - | ||
158 | -let praet_qub_aglt paths tokens = | ||
159 | - Array.iteri (fun i (id,super,label) -> | ||
160 | - if super >= 0 then | ||
161 | - (let id_s, super_s, label_s = paths.(super) in | ||
162 | - if if_cat ["aglt"] (ExtArray.get tokens id).token && | ||
163 | - (ExtArray.get tokens id_s).orth = "by" | ||
164 | - then let id_gf,super_gf,label_gf = paths.(super_s) in | ||
165 | - if if_cat ["praet"] (ExtArray.get tokens id_gf).token | ||
166 | - then paths.(i) <- (id,super_s,label))) paths; | ||
167 | - paths | ||
168 | - | ||
169 | -let replace_tokens paths tokens = | ||
170 | -(* for i = 0 to ExtArray.size tokens - 1 do | ||
171 | - print_endline (string_of_int i ^ ": "^ (ExtArray.get tokens i).orth) | ||
172 | -done; *) | ||
173 | - let find_token orth = Int.fold 0 (ExtArray.size tokens - 1) 0 (fun acc i -> | ||
174 | - if (ExtArray.get tokens i).orth = orth then i else acc) in | ||
175 | - | ||
176 | - let multidot i id0 super0 label0 = | ||
177 | - let id1, super1, label1 = paths.(super0) in | ||
178 | - if super1 >= 0 then | ||
179 | - let id2, super2, label2 = paths.(super1) in | ||
180 | - if (ExtArray.get tokens id1).orth = "." && | ||
181 | - (ExtArray.get tokens id2).orth = "." | ||
182 | - then | ||
183 | - (paths.(super1) <- (find_token "..." ,super2, label2); | ||
184 | - paths.(super0) <- (0,-1,""); | ||
185 | - paths.(i) <- (0,-1,"")) in | ||
186 | - | ||
187 | - let brev i id super label = | ||
188 | - let if_the_last_dot () = | ||
189 | - let (id_dot, s_dot, l_dot) = List.find (fun (i2,s,l) -> | ||
190 | - s = i && ((ExtArray.get tokens i2).orth = "." || (ExtArray.get tokens i2).orth = "...")) (Array.to_list paths) in | ||
191 | - Array.fold_left (fun acc (i2,s,l) -> | ||
192 | - acc && (ExtArray.get tokens i2).beg <= (ExtArray.get tokens id_dot).beg) true paths in | ||
193 | - | ||
194 | - let dot = if if_interps [0,"npun"] (ExtArray.get tokens id).token || if_the_last_dot () | ||
195 | - then "" | ||
196 | - else "." in | ||
197 | - | ||
198 | - let n_orth = (ExtArray.get tokens id).orth ^ dot in | ||
199 | - paths.(i) <- (find_token n_orth,super,label) in | ||
200 | - | ||
201 | - Array.iteri (fun i (id,super,label) -> | ||
202 | - if (ExtArray.get tokens id).orth = "." | ||
203 | - then multidot i id super label; | ||
204 | - if if_cat ["brev"] (ExtArray.get tokens id).token | ||
205 | - then brev i id super label) | ||
206 | - paths; | ||
207 | - paths | ||
208 | - | ||
209 | -let replace_hyphens paths tokens = | ||
210 | - let ref_paths = ref paths in | ||
211 | - let find_token token = Int.fold 0 (ExtArray.size tokens - 1) 0 (fun acc i -> | ||
212 | - if (ExtArray.get tokens i).token = token then i else acc) in | ||
213 | - let find_specific_token token beg next = Int.fold 0 (ExtArray.size tokens - 1) 0 (fun acc i -> | ||
214 | - if (ExtArray.get tokens i).token = token && | ||
215 | - beg <= (ExtArray.get tokens i).beg && | ||
216 | - (ExtArray.get tokens i).next <= next | ||
217 | - then i else acc) in | ||
218 | - | ||
219 | - let correct_last son_of_zero = | ||
220 | - let i1,s1,l1 = !ref_paths.(Array.length !ref_paths - 1) in | ||
221 | - let i2,s2,l2 = !ref_paths.(son_of_zero) in | ||
222 | - if (ExtArray.get tokens i1).orth = "." | ||
223 | - then | ||
224 | - (!ref_paths.(Array.length !ref_paths - 1) <- (find_token (Interp "</sentence>"),1,l1); | ||
225 | - !ref_paths.(son_of_zero) <- (i2,Array.length !ref_paths - 1,l2)) | ||
226 | - else | ||
227 | - (ref_paths := Array.append !ref_paths [| (find_token (Interp "</sentence>"),1,"-") |]; | ||
228 | - !ref_paths.(Array.length !ref_paths - 2) <- (i1,Array.length !ref_paths - 1,l1); | ||
229 | - !ref_paths.(son_of_zero) <- (i2,Array.length !ref_paths - 1,l2)) in | ||
230 | - | ||
231 | - let one_hyphen sons_of_zero = | ||
232 | - let i2,s2,l2 = !ref_paths.(1) in | ||
233 | - Xlist.iter sons_of_zero (fun son_of_zero -> | ||
234 | - let i1,s1,l1 = !ref_paths.(son_of_zero) in | ||
235 | - !ref_paths.(son_of_zero) <- (i1,1,l1)); | ||
236 | - !ref_paths.(1) <- (find_token (Interp "<or-sentence>"),0,l2); | ||
237 | - correct_last son_of_zero in | ||
238 | - | ||
239 | - let two_hyphens first second son parent = | ||
240 | - let i1,s1,l1 = !ref_paths.(first) in | ||
241 | - let i2,s2,l2 = !ref_paths.(second) in | ||
242 | - let beg, next = (ExtArray.get tokens i2).beg, (ExtArray.get tokens i2).next in | ||
243 | - let i3,s3,l3 = !ref_paths.(son) in | ||
244 | - let i4,s4,l4 = !ref_paths.(parent) in | ||
245 | - ref_paths := Array.append !ref_paths [| (find_token (Interp "</sentence>"),first,"-") |]; | ||
246 | - !ref_paths.(first) <- (find_token (Interp "<or-sentence>"),0,l1); | ||
247 | - !ref_paths.(second) <- (find_specific_token (Interp "</or-sentence>") beg next,first,l2); | ||
248 | - !ref_paths.(son) <- (i3,second,l3); | ||
249 | - !ref_paths.(parent) <- (i4,first,l4) in | ||
250 | - | ||
251 | - let rec is_dep_correct a b out zero res i (id,super,label) = (* out = how many words in (a,b) have parent outside [a,b]*) | ||
252 | - (* print_endline ((string_of_int a) ^ " " ^ (string_of_int b) ^ " " ^ (string_of_int out) ^ " " ^ (string_of_int zero) ^ " " ^ (string_of_int i)); *) | ||
253 | - if out > 1 || zero > 1 || (* zero = how many words (not interps) have parent 0 *) | ||
254 | - (a < i && i < b && super < a && label <> "interp") || | ||
255 | - (a < super && super < b && (i < a || b < i)) | ||
256 | - then false, res | ||
257 | - else | ||
258 | - if i+1 = Array.length !ref_paths | ||
259 | - then out = 1 && zero = 1, res | ||
260 | - else | ||
261 | - if a < i && i < b && b < super | ||
262 | - then is_dep_correct a b (out+1) zero (i,super) (i+1) !ref_paths.(i+1) | ||
263 | - else | ||
264 | - if super = 0 && not (if_cat ["interp"] (ExtArray.get tokens id).token) | ||
265 | - then is_dep_correct a b out (zero+1) res (i+1) !ref_paths.(i+1) | ||
266 | - else is_dep_correct a b out zero res (i+1) !ref_paths.(i+1) in | ||
267 | - | ||
268 | - let hyphens = snd @@ Array.fold_left (fun (i,acc) (id,super,label) -> | ||
269 | - if (ExtArray.get tokens id).orth = "-" | ||
270 | - then i+1, i :: acc | ||
271 | - else i+1, acc) (0,[]) !ref_paths in | ||
272 | - | ||
273 | - let sons_of_zero = snd @@ Array.fold_left (fun (i,acc) (id,super,label) -> | ||
274 | - if super = 0 && not (if_cat ["interp"] (ExtArray.get tokens id).token) | ||
275 | - then i+1, i :: acc | ||
276 | - else i+1, acc) (0,[]) !ref_paths in | ||
277 | - | ||
278 | - (if List.length sons_of_zero = 1 | ||
279 | - then | ||
280 | - if List.length hyphens = 1 && hyphens = [1] | ||
281 | - then one_hyphen sons_of_zero | ||
282 | - else | ||
283 | - if List.length hyphens = 2 | ||
284 | - then let a, b = List.nth hyphens 1, List.nth hyphens 0 in | ||
285 | - let is_good, (son,parent) = is_dep_correct a b 0 0 (0,0) 1 !ref_paths.(1) in | ||
286 | - if a = 1 && is_good | ||
287 | - then two_hyphens a b son parent); | ||
288 | - !ref_paths | ||
289 | - | ||
290 | -let correct_interp_with_father_0 paths tokens = | ||
291 | - Array.iteri (fun i (id,super,label) -> | ||
292 | - if (super = 0 || | ||
293 | - (ExtArray.get tokens id).token = Interp "<or-sentence>" || | ||
294 | - (ExtArray.get tokens id).token = Interp "</or-sentence>") && (ExtArray.get tokens id).orth = "," | ||
295 | - then Array.iteri (fun i1 (id1,super1,label1) -> | ||
296 | - if super1 = i | ||
297 | - then paths.(i1) <- (id1,0,label1)) paths) paths; | ||
298 | - paths | ||
299 | - | ||
300 | -let remove_interps interp paths tokens = | ||
301 | - let paths_ls = Array.to_list paths in | ||
302 | - Array.iteri (fun i (id,super,label) -> | ||
303 | - if (ExtArray.get tokens id).orth = interp && | ||
304 | - not (List.exists (fun (_,super,_) -> super = i) paths_ls) | ||
305 | - then paths.(i) <- (0,-1,"")) paths; | ||
306 | - paths | ||
307 | - | ||
308 | -let correct_passive_voice paths tokens = | ||
309 | - Array.iteri (fun i (id,super,label) -> | ||
310 | - if super >= 0 then | ||
311 | - (let id_s, super_s, label_s = paths.(super) in | ||
312 | - if (if_cat ["praet"] (ExtArray.get tokens id).token && | ||
313 | - if_cat ["ppas"] (ExtArray.get tokens id_s).token) | ||
314 | - then (paths.(i) <- (id,super_s,label); | ||
315 | - paths.(super) <- (id_s,i,label_s); | ||
316 | - Array.iteri (fun i_p (id_p,super_p,label_p) -> | ||
317 | - if super_p = super | ||
318 | - then paths.(i_p) <- (id_p,i,label_p)) paths))) paths; | ||
319 | - paths | ||
320 | - | ||
321 | -let swap_dep paths tokens = | ||
322 | - let change_dep i (id,super,label) = | ||
323 | - let id_S, super_S, label_S = paths.(super) in | ||
324 | - paths.(i) <- (id,super_S,label); | ||
325 | - paths.(super) <- (id_S, id, label_S) in | ||
326 | - let rec correct_dep i (id,super,label) = | ||
327 | - let adv_relators = ["kto";"co";"ile";"czyj";"jaki";"który"; | ||
328 | - "jak";"skąd";"dokąd";"gdzie";"którędy";"kiedy";"odkąd";"dlaczego";"czemu";"gdy"] in | ||
329 | - if (if_cat ["comp"] (ExtArray.get tokens id).token && | ||
330 | - if_cat ["fin"; "praet"; "winien"; "pred"; "imps"; "ppas"] (ExtArray.get tokens super).token) || | ||
331 | - (if_cat ["conj"] (ExtArray.get tokens id).token && | ||
332 | - if_cat ["fin"; "praet"; "winien"; "pred"; "imps"; "ppas"] (ExtArray.get tokens super).token && | ||
333 | - not (List.exists (fun (_,super,_) -> super = i) (Array.to_list paths))) || | ||
334 | - (if_cat ["ppron3"] (ExtArray.get tokens id).token && | ||
335 | - if_interps [5,"praep"] (ExtArray.get tokens id).token) || | ||
336 | - (if_lemma adv_relators (ExtArray.get tokens id).token && | ||
337 | - if_cat ["fin"; "praet"; "winien"; "pred"; "imps"; "ppas"; "subst"] (ExtArray.get tokens super).token) | ||
338 | - then | ||
339 | - change_dep i (id,super,label); | ||
340 | - if (if_lemma adv_relators (ExtArray.get tokens id).token && | ||
341 | - if_cat ["subst"; "pred"] (ExtArray.get tokens super).token) | ||
342 | - then correct_dep i paths.(i) in | ||
343 | - Array.iteri correct_dep paths; paths | ||
344 | - | ||
345 | - (* | ||
346 | - correct_coordination1 -> sąsiad słowem najbliższym po prawej, jeśli pomiędzy nim a mną spójnik, to najbliższym po lewej | ||
347 | - nieobsługiwana na razie koordynacja strony biernej - zarówno czasowniki posiłkowe, jak i imiesłowy | ||
348 | - nieobsługiwana na razie koordynacja podrzędników spójników podrzędnych *) |
parser/visualization.ml
@@ -916,7 +916,7 @@ let rec html_of_text path tokens = function | @@ -916,7 +916,7 @@ let rec html_of_text path tokens = function | ||
916 | sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_text path tokens text))) ^ | 916 | sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_text path tokens text))) ^ |
917 | "</table>" | 917 | "</table>" |
918 | 918 | ||
919 | -let print_html_text path name text tokens lex_sems = | 919 | +let print_html_text path name text tokens (*lex_sems*) = |
920 | File.file_out (path ^ name ^ ".html") (fun file -> | 920 | File.file_out (path ^ name ^ ".html") (fun file -> |
921 | fprintf file "%s\n" html_header; | 921 | fprintf file "%s\n" html_header; |
922 | fprintf file "%s<BR>\n" (html_of_text path tokens text); | 922 | fprintf file "%s<BR>\n" (html_of_text path tokens text); |
pre/makefile
@@ -3,7 +3,7 @@ OCAMLOPT=ocamlopt | @@ -3,7 +3,7 @@ OCAMLOPT=ocamlopt | ||
3 | OCAMLDEP=ocamldep | 3 | OCAMLDEP=ocamldep |
4 | INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam | 4 | INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam |
5 | OCAMLFLAGS=$(INCLUDES) -g | 5 | OCAMLFLAGS=$(INCLUDES) -g |
6 | -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-plWordnet.cmxa eniam-walenty.cmxa eniam-integration.cmxa eniam-lexSemantics.cmxa | 6 | +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-plWordnet.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa eniam-integration.cmxa eniam-lexSemantics.cmxa |
7 | INSTALLDIR=`ocamlc -where` | 7 | INSTALLDIR=`ocamlc -where` |
8 | 8 | ||
9 | WAL= paths.ml | 9 | WAL= paths.ml |
pre/preProcessing.ml
@@ -121,9 +121,9 @@ let parse_text = function | @@ -121,9 +121,9 @@ let parse_text = function | ||
121 | let lex_sems = ENIAMlexSemantics.assign tokens text in | 121 | let lex_sems = ENIAMlexSemantics.assign tokens text in |
122 | text,tokens,lex_sems | 122 | text,tokens,lex_sems |
123 | | AltText[Raw,RawText query;CONLL,StructText[ | 123 | | AltText[Raw,RawText query;CONLL,StructText[ |
124 | - StructParagraph[{psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence dep_paths]} as p]]],tokens -> | 124 | + StructParagraph[{sentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence dep_paths]} as p]]],tokens -> |
125 | let m_dep_paths = Array.map (fun (id,_,_) -> id,-1,"") dep_paths in | 125 | let m_dep_paths = Array.map (fun (id,_,_) -> id,-1,"") dep_paths in |
126 | - let conll = StructParagraph[{p with psentence = AltSentence([Raw, RawSentence text; CONLL, DepSentence dep_paths] | 126 | + let conll = StructParagraph[{p with sentence = AltSentence([Raw, RawSentence text; CONLL, DepSentence dep_paths] |
127 | @ if Paths.config.Paths.mate_parser_enabled then [Mate, DepSentence m_dep_paths] else [])}] in | 127 | @ if Paths.config.Paths.mate_parser_enabled then [Mate, DepSentence m_dep_paths] else [])}] in |
128 | let paths = ENIAMsubsyntax.parse query in | 128 | let paths = ENIAMsubsyntax.parse query in |
129 | let sentences = ENIAMsentences.split_into_sentences "" query tokens paths in | 129 | let sentences = ENIAMsentences.split_into_sentences "" query tokens paths in |
@@ -135,7 +135,7 @@ let parse_text = function | @@ -135,7 +135,7 @@ let parse_text = function | ||
135 | 135 | ||
136 | let rec main_loop in_chan out_chan = | 136 | let rec main_loop in_chan out_chan = |
137 | (* print_endline "main_loop 1"; *) | 137 | (* print_endline "main_loop 1"; *) |
138 | - let query = (Marshal.from_channel in_chan : text * ENIAMtokenizerTypes.token_record ExtArray.t) in | 138 | + let query = (Marshal.from_channel in_chan : text * ENIAMtokenizerTypes.token_env ExtArray.t) in |
139 | (* print_endline "main_loop 2"; *) | 139 | (* print_endline "main_loop 2"; *) |
140 | if fst query = RawText "" then () else ( | 140 | if fst query = RawText "" then () else ( |
141 | (try | 141 | (try |
@@ -154,7 +154,7 @@ let rec main_loop in_chan out_chan = | @@ -154,7 +154,7 @@ let rec main_loop in_chan out_chan = | ||
154 | (* print_endline "main_loop 7"; *) | 154 | (* print_endline "main_loop 7"; *) |
155 | Marshal.to_channel out_chan ( | 155 | Marshal.to_channel out_chan ( |
156 | RawText "", | 156 | RawText "", |
157 | - ExtArray.make 1 ENIAMtokenizerTypes.empty_token, | 157 | + ExtArray.make 1 ENIAMtokenizerTypes.empty_token_env, |
158 | ExtArray.make 1 ENIAMlexSemanticsTypes.empty_lex_sem, | 158 | ExtArray.make 1 ENIAMlexSemanticsTypes.empty_lex_sem, |
159 | Printexc.to_string e, | 159 | Printexc.to_string e, |
160 | 0.) [])); | 160 | 0.) [])); |
testy/skladnica-test2.conll
@@ -11,7 +11,7 @@ | @@ -11,7 +11,7 @@ | ||
11 | 5 szanse szansa subst subst pl|acc|f 4 obj_th _ _ | 11 | 5 szanse szansa subst subst pl|acc|f 4 obj_th _ _ |
12 | 6 ? ? interp interp _ 4 punct _ _ | 12 | 6 ? ? interp interp _ 4 punct _ _ |
13 | 13 | ||
14 | -# trees/NKJP_1M_1202900095/morph_3-p/morph_3.46-s.xml.tree | 14 | +# trees/NKJP_1M_1202900095/morph_3-p/morph_3.46-s.xml.trees |
15 | 1 - - interp interp 0 _ _ _ | 15 | 1 - - interp interp 0 _ _ _ |
16 | 2 Słoń słoń subst subst sg|nom|m2 4 _ _ _ | 16 | 2 Słoń słoń subst subst sg|nom|m2 4 _ _ _ |
17 | 3 - - interp interp 0 _ _ _ | 17 | 3 - - interp interp 0 _ _ _ |
@@ -19,7 +19,7 @@ | @@ -19,7 +19,7 @@ | ||
19 | 5 Pinio Pinio subst subst sg|nom|m1 4 _ _ _ | 19 | 5 Pinio Pinio subst subst sg|nom|m1 4 _ _ _ |
20 | 6 . . interp interp 0 _ _ _ | 20 | 6 . . interp interp 0 _ _ _ |
21 | 21 | ||
22 | -# trees/NKJP_1M_2002000114/morph_2-p/morph_2.72-s.xml.tree | 22 | +# trees/NKJP_1M_2002000114/morph_2-p/morph_2.72-s.xml.trees |
23 | 1 - - interp interp 0 _ _ _ | 23 | 1 - - interp interp 0 _ _ _ |
24 | 2 Nie nie qub qub 3 _ _ _ | 24 | 2 Nie nie qub qub 3 _ _ _ |
25 | 3 mogę móc fin fin sg|pri|imperf 7 _ _ _ | 25 | 3 mogę móc fin fin sg|pri|imperf 7 _ _ _ |
@@ -29,7 +29,7 @@ | @@ -29,7 +29,7 @@ | ||
29 | 7 zachrypiał zachrypieć praet praet sg|m1|perf 0 _ _ _ | 29 | 7 zachrypiał zachrypieć praet praet sg|m1|perf 0 _ _ _ |
30 | 8 . . interp interp 0 _ _ _ | 30 | 8 . . interp interp 0 _ _ _ |
31 | 31 | ||
32 | -# trees/NKJP_1M_2002000028/morph_5-p/morph_5.40-s.xml.tree | 32 | +# trees/NKJP_1M_2002000028/morph_5-p/morph_5.40-s.xml.trees |
33 | 1 - - interp interp 0 _ _ _ | 33 | 1 - - interp interp 0 _ _ _ |
34 | 2 Właśnie właśnie qub qub 4 _ _ _ | 34 | 2 Właśnie właśnie qub qub 4 _ _ _ |
35 | 3 to to subst subst sg|acc|n 4 _ _ _ | 35 | 3 to to subst subst sg|acc|n 4 _ _ _ |
@@ -39,7 +39,7 @@ | @@ -39,7 +39,7 @@ | ||
39 | 7 twardo twardo adv adv pos 6 _ _ _ | 39 | 7 twardo twardo adv adv pos 6 _ _ _ |
40 | 8 . . interp interp 0 _ _ _ | 40 | 8 . . interp interp 0 _ _ _ |
41 | 41 | ||
42 | -# trees/NKJP_1M_1202000001/morph_3-p/morph_3.9-s.xml.tree | 42 | +# trees/NKJP_1M_1202000001/morph_3-p/morph_3.9-s.xml.trees |
43 | 1 CKM CKM subst subst sg|nom|n 0 _ _ _ | 43 | 1 CKM CKM subst subst sg|nom|n 0 _ _ _ |
44 | 2 : interp 0 _ _ _ | 44 | 2 : interp 0 _ _ _ |
45 | 3 Jak jak adv adv pos 5 _ _ _ | 45 | 3 Jak jak adv adv pos 5 _ _ _ |
@@ -50,7 +50,7 @@ | @@ -50,7 +50,7 @@ | ||
50 | 8 patrzeć patrzeć inf inf imperf 5 _ _ _ | 50 | 8 patrzeć patrzeć inf inf imperf 5 _ _ _ |
51 | 9 ? ? interp interp 0 _ _ _ | 51 | 9 ? ? interp interp 0 _ _ _ |
52 | 52 | ||
53 | -# trees/NKJP_1M_2001000023/morph_1-p/morph_1.61-s.xml.tree | 53 | +# trees/NKJP_1M_2001000023/morph_1-p/morph_1.61-s.xml.trees |
54 | 1 Pochylił pochylić praet praet sg|m1|perf 0 _ _ _ | 54 | 1 Pochylił pochylić praet praet sg|m1|perf 0 _ _ _ |
55 | 2 em być aglt aglt sg|pri|imperf|wok 1 _ _ _ | 55 | 2 em być aglt aglt sg|pri|imperf|wok 1 _ _ _ |
56 | 3 się się qub qub 1 _ _ _ | 56 | 3 się się qub qub 1 _ _ _ |