Commit 7b958d71f96e5566a8099ce5afa9b62f3d325e28
1 parent
d6fcfbc6
analiza banku drzew zależnościowych
Showing
53 changed files
with
9396 additions
and
1241 deletions
Too many changes to show.
To preserve performance only 23 of 53 files are displayed.
LCGparser/ENIAM_LCGchart.ml
... | ... | @@ -250,8 +250,9 @@ let is_dep_parsed = function |
250 | 250 | [] -> false |
251 | 251 | | [Bracket(false,false,Tensor[Atom "<conll_root>"]),_] -> true |
252 | 252 | | [Bracket(false,false,Imp(Tensor[Atom("<conll_root>")],Forward,Maybe _)),sem]-> true |
253 | - | [t,_] -> print_endline @@ ENIAM_LCGstringOf.grammar_symbol_prime t; failwith "is_dep_parsed" | |
254 | - | l -> failwith ("is_dep_parsed " ^ (String.concat " " @@ List.map (fun x -> ENIAM_LCGstringOf.grammar_symbol 0 @@ fst x) l)) | |
253 | + | _ -> false | |
254 | + (* | [t,_] -> print_endline @@ ENIAM_LCGstringOf.grammar_symbol_prime t; failwith "is_dep_parsed" | |
255 | + | l -> failwith ("is_dep_parsed " ^ (String.concat " " @@ List.map (fun x -> ENIAM_LCGstringOf.grammar_symbol 0 @@ fst x) l)) *) | |
255 | 256 | (* | l -> failwith ("is_dep_parsed " ^ (string_of_int @@ List.length l)) |
256 | 257 | *) |
257 | 258 | let get_parsed_term chart = |
... | ... |
compile.sh
1 | 1 | #!/bin/bash |
2 | 2 | |
3 | -cd xt | |
4 | -make clean | |
5 | -sudo make install | |
6 | -make clean | |
7 | -cd .. | |
8 | - | |
9 | 3 | cd tokenizer |
10 | 4 | make clean |
11 | 5 | sudo make install |
... | ... | @@ -45,6 +39,12 @@ sudo make install |
45 | 39 | make clean |
46 | 40 | cd .. |
47 | 41 | |
42 | +cd xt | |
43 | +make clean | |
44 | +sudo make install | |
45 | +make clean | |
46 | +cd .. | |
47 | + | |
48 | 48 | cd lexSemantics |
49 | 49 | make clean |
50 | 50 | sudo make install |
... | ... |
corpora/CONLL2.ml
0 → 100644
1 | +(* | |
2 | + * ENIAMcorpora is a library that integrates ENIAM with corpora in CONLL format | |
3 | + * Copyright (C) 2016 Daniel Oklesinski <oklesinski dot daniel atSPAMfree gmail dot com> | |
4 | + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
5 | + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
6 | + * | |
7 | + * This library is free software: you can redistribute it and/or modify | |
8 | + * it under the terms of the GNU Lesser General Public License as published by | |
9 | + * the Free Software Foundation, either version 3 of the License, or | |
10 | + * (at your option) any later version. | |
11 | + * | |
12 | + * This library is distributed in the hope that it will be useful, | |
13 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | + * GNU Lesser General Public License for more details. | |
16 | + * | |
17 | + * You should have received a copy of the GNU Lesser General Public License | |
18 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | + *) | |
20 | + | |
21 | +open Xstd | |
22 | +open ENIAMsubsyntaxTypes | |
23 | +open ENIAMtokenizerTypes | |
24 | + | |
25 | +exception Comment_line | |
26 | +exception Empty_line | |
27 | +exception Empty_sentence | |
28 | +exception Sent_id of string | |
29 | +exception Raw_text of string | |
30 | +exception Orig of string | |
31 | +exception Interval_id of int | |
32 | + | |
33 | +let load_token beg compound in_channel = | |
34 | + let n_token id orth beg lemma interp sl sem sp = | |
35 | + let sp = match sp with | |
36 | + "_" -> if compound > 0 then 0 else 100 | |
37 | + | "SpaceAfter=No" -> 0 | |
38 | + | _ -> failwith ("load_token sp: " ^ sp) in | |
39 | + let sem = match sem with | |
40 | + "_" -> "" | |
41 | + | _ -> sem in | |
42 | + let len = (Xlist.size (Xunicode.utf8_chars_of_utf8_string orth)) * 100 in | |
43 | + let next = beg+len+sp in | |
44 | + let id = try int_of_string id with _ -> | |
45 | + let len = match Xstring.split "-" id with | |
46 | + [a;b] -> (try int_of_string b - int_of_string a with _ -> failwith "load_token: interval id") | |
47 | + | _ -> failwith "load_token: interval id" in | |
48 | + raise (Interval_id len) in | |
49 | + let pos,tags = match ENIAMtokens.parse_postags interp with [x] -> x | _ -> failwith "n_token" in | |
50 | + {empty_token_env with orth = orth; beg=beg; len=len; next=next; | |
51 | + token = Lemma(lemma,pos,[tags])}, next, id, sl, sem in | |
52 | + let line = input_line in_channel in | |
53 | + if line = "" | |
54 | + then raise Empty_line | |
55 | + else if line.[0] = '#' | |
56 | + then | |
57 | + if Xstring.check_prefix "# sent_id = " line then | |
58 | + raise (Sent_id(Xstring.cut_prefix "# sent_id = " line)) else | |
59 | + if Xstring.check_prefix "# text = " line then | |
60 | + raise (Raw_text(Xstring.cut_prefix "# text = " line)) else | |
61 | + if Xstring.check_prefix "# orig_file_sentence = " line then | |
62 | + raise (Orig(Xstring.cut_prefix "# orig_file_sentence = " line)) else | |
63 | + raise Comment_line | |
64 | + else | |
65 | + match Xstring.split "\t" line with | |
66 | + [id; orth; lemma; ucat; interp; uinterp; super; label; "_"; sp] -> | |
67 | + let super = if super = "_" then 0 else try int_of_string super with _ -> failwith ("load_token super: " ^ super) in | |
68 | + n_token id orth beg lemma interp [super,label] "_" sp | |
69 | + | [id; orth; lemma; ucat; interp; uinterp; super; label; sl; sp; sem] -> | |
70 | + let sl = match sl with | |
71 | + "_:_" -> [] | |
72 | + | _ -> Xlist.map (Xstring.split "|" sl) (fun s -> | |
73 | + match Xstring.split ":" s with | |
74 | + super :: l -> (try int_of_string super, String.concat ":" l with _ -> failwith ("load_token sl: " ^ sl)) | |
75 | + | _ -> failwith ("load_token sl: " ^ sl)) in | |
76 | + n_token id orth beg lemma interp sl sem sp | |
77 | + | _ -> failwith ("load_token: " ^ line) | |
78 | + | |
79 | +let substract_next tokens = function | |
80 | + ((id,_,_) :: _) as rev_paths -> | |
81 | + let t = ExtArray.get tokens id in | |
82 | + ExtArray.set tokens id {t with next=t.next-100}; | |
83 | + rev_paths | |
84 | + | _ -> failwith "substract_next" | |
85 | + | |
86 | +let load_sentence in_channel = | |
87 | + let tokens = ExtArray.make 100 empty_token_env in | |
88 | + let _ = ExtArray.add tokens {empty_token_env with token = Interp "<conll_root>"} in | |
89 | + let rec pom rev_paths next compound sent_id text orig = | |
90 | + try | |
91 | + let token, next, conll_id, sl, sem = load_token next compound in_channel in | |
92 | + let id_a = ExtArray.add tokens token in | |
93 | + if id_a <> conll_id then failwith "load_sentence: different ids" else | |
94 | + pom ((id_a,sl,sem) :: rev_paths) next (max 0 (compound-1)) sent_id text orig | |
95 | + with | |
96 | + Sent_id sent_id -> pom rev_paths next compound sent_id text orig | |
97 | + | Raw_text text -> pom rev_paths next compound sent_id text orig | |
98 | + | Orig orig -> pom rev_paths next compound sent_id text orig | |
99 | + | Comment_line -> failwith "load_sentence: Comment_line" | |
100 | + | Interval_id len -> (*print_endline line;*) pom rev_paths next len sent_id text orig | |
101 | + | Empty_line -> substract_next tokens rev_paths, sent_id, text, orig | |
102 | + | End_of_file -> if rev_paths = [] | |
103 | + then raise End_of_file | |
104 | + else substract_next tokens rev_paths, sent_id, text, orig in | |
105 | + let rev_paths, sent_id, text, orig = pom [] 100 0 "" "" "" in | |
106 | + {id = sent_id; beg = -1; len = -1; next = -1; file_prefix = ""; sentence = DepSentence[Array.of_list ((0,[],"") :: List.rev rev_paths)]}, text, orig, tokens | |
107 | +(* {s_id = id; s_text = ""; s_paths = (List.rev rev_paths)} *) | |
108 | + | |
109 | +let load_corpus in_channel = | |
110 | + let rec pom res = | |
111 | + try | |
112 | + let conll_sentence, text, orig, tokens = load_sentence in_channel in | |
113 | + pom ((conll_sentence, text, orig, tokens) :: res) | |
114 | + with End_of_file -> res | |
115 | + (*| e -> prerr_endline (Printexc.to_string e); res*) in | |
116 | + List.rev @@ pom [] | |
117 | + | |
118 | +let substring a beg len = | |
119 | + String.concat "" (List.rev (Int.fold beg (beg+len-1) [] (fun l i -> | |
120 | + a.(i) :: l))) | |
121 | + | |
122 | +let verify_lengths corpus = | |
123 | + Xlist.iter corpus (fun (conll_sentence, text, orig, tokens) -> | |
124 | + let text = Array.of_list (Xunicode.utf8_chars_of_utf8_string text) in | |
125 | + Int.iter 1 (ExtArray.size tokens - 1) (fun i -> | |
126 | + let t = ExtArray.get tokens i in | |
127 | + let beg = t.beg/100 - 1 in | |
128 | + let len = t.len/100 in | |
129 | + let next = t.next/100 - 1 in | |
130 | + let s = substring text beg len in | |
131 | + if s <> t.orth then Printf.printf "%s: %s %s\n" conll_sentence.id s t.orth; | |
132 | + if beg + len = next then () else | |
133 | + if beg + len + 1 = next then | |
134 | + if substring text (next-1) 1 = " " then () else Printf.printf "%s: space problem\n" conll_sentence.id else | |
135 | + Printf.printf "%s: next problem\n" conll_sentence.id)) | |
136 | + | |
137 | +let get_tagset corpus = | |
138 | + Xlist.fold corpus StringQMap.empty (fun qmap (conll_sentence, text, orig, tokens) -> | |
139 | + Int.fold 1 (ExtArray.size tokens - 1) qmap (fun qmap i -> | |
140 | + let t = ExtArray.get tokens i in | |
141 | + match t.token with | |
142 | + Lemma(lemma,cat,interp) -> StringQMap.add qmap (cat ^ ":" ^ ENIAMtokens.string_of_interps interp) | |
143 | + | _ -> failwith "get_tagset")) | |
144 | + | |
145 | +let numbers = StringSet.of_list ["sg";"pl"] | |
146 | +let cases = StringSet.of_list ["nom";"gen";"dat";"acc";"inst";"loc";"voc"] | |
147 | +let genders = StringSet.of_list ["m1";"m2";"m3";"n";"f"] | |
148 | +let degrees = StringSet.of_list ["pos";"com";"sup"] | |
149 | + | |
150 | +let convert_n n = | |
151 | + if StringSet.mem numbers n then n else failwith ("convert_n: " ^ n) | |
152 | + | |
153 | +let convert_c c = | |
154 | + if StringSet.mem cases c then c else failwith ("convert_c: " ^ c) | |
155 | + | |
156 | +let convert_g = function | |
157 | + "n1" -> "n" | |
158 | + | "n2" -> "n" | |
159 | + | "p1" -> "m1" | |
160 | + | "p2" -> "n" | |
161 | + | g -> if StringSet.mem genders g then g else failwith ("convert_g: " ^ g) | |
162 | + | |
163 | +let convert_d d = | |
164 | + if StringSet.mem degrees d then d else failwith ("convert_d: " ^ d) | |
165 | + | |
166 | +let convert_tagset_token id = function | |
167 | + Lemma(lemma,"adj",[[[n];[c];[g];[d]]]) -> Lemma(lemma,"adj",[[[convert_n n];[convert_c c];[convert_g g];[convert_d d]]]) | |
168 | + | Lemma(lemma,"adja",[[]]) as t -> t | |
169 | + | Lemma(lemma,"adjc",[[]]) as t -> t | |
170 | + | Lemma(lemma,"adjp",[[]]) as t -> t | |
171 | + | Lemma(lemma,"adv",[[]]) -> Lemma(lemma,"adv",[[["pos"]]]) | |
172 | + | Lemma(lemma,"adv",[[[d]]]) -> Lemma(lemma,"adv",[[[convert_d d]]]) | |
173 | + | Lemma(lemma,"aglt",_) as t -> t | |
174 | + | Lemma(lemma,"bedzie",_) as t -> t | |
175 | + | Lemma(lemma,"brev",_) as t -> t | |
176 | + | Lemma(lemma,"burk",[[]]) as t -> t | |
177 | + | Lemma(lemma,"burk",[_]) -> Lemma(lemma,"burk",[[]]) | |
178 | + | Lemma(lemma,"comp",[[]]) as t -> t | |
179 | + | Lemma(lemma,"conj",[[]]) as t -> t | |
180 | + | Lemma(lemma,"depr",[[["pl"];["nom"];["m2"]]]) as t -> t | |
181 | + | Lemma(lemma,"depr",[[["pl"];["voc"];["m2"]]]) as t -> t | |
182 | + | Lemma(lemma,"depr",[[[n];[c];["m1"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["m1"]]]) | |
183 | + | Lemma(lemma,"depr",[[[n];[c];["m2"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["m2"]]]) | |
184 | + | Lemma(lemma,"depr",[[[n];[c];["m3"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["m3"]]]) | |
185 | + | Lemma(lemma,"depr",[[[n];[c];["f"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["f"]]]) | |
186 | + | Lemma(lemma,"dig",[[]]) as t -> t | |
187 | + | Lemma(lemma,"emo",[[]]) as t -> t | |
188 | + | Lemma(lemma,"fin",_) as t -> t | |
189 | + | Lemma(lemma,"ger",[[[n];[c];[g];[a];[neg]]]) -> Lemma(lemma,"ger",[[[convert_n n];[convert_c c];[convert_g g];[a];[neg]]]) | |
190 | + | Lemma(lemma,"imps",_) as t -> t | |
191 | + | Lemma(lemma,"impt",_) as t -> t | |
192 | + | Lemma(lemma,"inf",_) as t -> t | |
193 | + | Lemma(lemma,"interj",[[]]) as t -> t | |
194 | + | Lemma(lemma,"interp",[[]]) as t -> t | |
195 | + | Lemma(lemma,"num",[[[n];[c];["m1"];[acm]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["m1"];[acm]]]) | |
196 | + | Lemma(lemma,"num",[[[n];[c];["m2"];[acm]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["m2"];[acm]]]) | |
197 | + | Lemma(lemma,"num",[[[n];[c];["m3"];[acm]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["m3"];[acm]]]) | |
198 | + | Lemma(lemma,"num",[[[n];[c];["f"];[acm]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["f"];[acm]]]) | |
199 | + | Lemma(lemma,"num",[[[n];[c];["n"];[acm];["col"]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["n"];[acm];["col"]]]) | |
200 | + | Lemma(lemma,"num",[[[n];[c];["n"];[acm];["ncol"]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["n"];[acm];["ncol"]]]) | |
201 | + | Lemma(lemma,"num",[[[n];[c];["n1"];[acm]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["n"];[acm];["col"]]]) | |
202 | + | Lemma(lemma,"num",[[[n];[c];["n2"];[acm]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["n"];[acm];["ncol"]]]) | |
203 | + | Lemma(lemma,"num",[[[n];[c];["p1"];[acm]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["m1"];[acm]]]) | |
204 | + | Lemma(lemma,"num",[[[n];[c];["p2"];[acm]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["n"];[acm];["col";"ncol"]]]) | |
205 | + | Lemma(lemma,"num",[[[n];[c];["m1"];[acm];[_]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["m1"];[acm]]]) | |
206 | + | Lemma(lemma,"num",[[[n];[c];["m2"];[acm];[_]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["m2"];[acm]]]) | |
207 | + | Lemma(lemma,"num",[[[n];[c];["m3"];[acm];[_]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["m3"];[acm]]]) | |
208 | + | Lemma(lemma,"num",[[[n];[c];["f"];[acm];[_]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["f"];[acm]]]) | |
209 | + | Lemma(lemma,"num",[[[n];[c];["n"];[acm]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["n"];[acm];["col";"ncol"]]]) | |
210 | + | Lemma(lemma,"num",[[[n];[c];["m3"]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["m3"];["congr";"rec"]]]) | |
211 | + | Lemma(lemma,"pact",[[[n];[c];[g];[a];[neg]]]) -> Lemma(lemma,"pact",[[[convert_n n];[convert_c c];[convert_g g];[a];[neg]]]) | |
212 | + | Lemma(lemma,"pant",_) as t -> t | |
213 | + | Lemma(lemma,"pcon",_) as t -> t | |
214 | + | Lemma(lemma,"ppas",[[[n];[c];[g];[a];[neg]]]) -> Lemma(lemma,"ppas",[[[convert_n n];[convert_c c];[convert_g g];[a];[neg]]]) | |
215 | + | Lemma(lemma,"ppron12",[[[n];[c];[g];[p]]]) -> Lemma(lemma,"ppron12",[[[convert_n n];[convert_c c];[convert_g g];[p]]]) | |
216 | + | Lemma(lemma,"ppron12",[[[n];[c];[g];[p];[akc]]]) -> Lemma(lemma,"ppron12",[[[convert_n n];[convert_c c];[convert_g g];[p];[akc]]]) | |
217 | + | Lemma(lemma,"ppron3",[[[n];[c];[g];[p];[akc];[praep]]]) -> Lemma(lemma,"ppron3",[[[convert_n n];[convert_c c];[convert_g g];[p];[akc];[praep]]]) | |
218 | + | Lemma(lemma,"praet",[[[n];[g];[a]]]) -> Lemma(lemma,"praet",[[[convert_n n];[convert_g g];[a]]]) | |
219 | + | Lemma(lemma,"praet",[[[n];[g];[a];[agl]]]) -> Lemma(lemma,"praet",[[[convert_n n];[convert_g g];[a];[agl]]]) | |
220 | + | Lemma(lemma,"pred",[[]]) as t -> t | |
221 | + | Lemma(lemma,"prep",_) as t -> t | |
222 | + | Lemma(lemma,"qub",_) as t -> t | |
223 | + | Lemma(lemma,"romandig",[[]]) as t -> t | |
224 | + | Lemma(lemma,"siebie",_) as t -> t | |
225 | + | Lemma(lemma,"subst",[[[n];[c];["m1"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["m1"]]]) | |
226 | + | Lemma(lemma,"subst",[[[n];[c];["m2"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["m2"]]]) | |
227 | + | Lemma(lemma,"subst",[[[n];[c];["m3"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["m3"]]]) | |
228 | + | Lemma(lemma,"subst",[[[n];[c];["f"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["f"]]]) | |
229 | + | Lemma(lemma,"subst",[[[n];[c];["n"];["col"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["n"];["col"]]]) | |
230 | + | Lemma(lemma,"subst",[[[n];[c];["n"];["ncol"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["n"];["ncol"]]]) | |
231 | + | Lemma(lemma,"subst",[[[n];[c];["m1"];["pt"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["m1"];["pt"]]]) | |
232 | + | Lemma(lemma,"subst",[[[n];[c];["n"];["pt"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["n"];["pt"]]]) | |
233 | + | Lemma(lemma,"subst",[[[n];[c];["n1"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["n"];["col"]]]) | |
234 | + | Lemma(lemma,"subst",[[[n];[c];["n2"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["n"];["ncol"]]]) | |
235 | + | Lemma(lemma,"subst",[[[n];[c];["p1"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["m1"];["pt"]]]) | |
236 | + | Lemma(lemma,"subst",[[[n];[c];["p2"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["n"];["pt"]]]) | |
237 | + | Lemma(lemma,"subst",[[[n];[c];["m3"];[_]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["m3"]]]) | |
238 | + | Lemma(lemma,"subst",[[[n];[c];["n"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["n"];["ncol"]]]) | |
239 | + | Lemma(lemma,"winien",[[[n];[g];[a]]]) -> Lemma(lemma,"winien",[[[convert_n n];[convert_g g];[a]]]) | |
240 | + | Lemma("Crimeboys" as lemma,"ign",[[]]) -> Lemma(lemma,"subst",[[["pl"];["nom"];["m1"]]]) | |
241 | + | Lemma("109P4" as lemma,"ign",[[]]) -> Lemma(lemma,"subst",[[["sg"];["nom"];["m2"]]]) | |
242 | + | Lemma("1a." as lemma,"ign",[[]]) -> Lemma(lemma,"list-item",[[]]) | |
243 | + | Lemma("orfano" as lemma,"ign",[[]]) -> Lemma(lemma,"xxx",[[]]) | |
244 | + | Lemma("650-91-58" as lemma,"ign",[[]]) -> Lemma(lemma,"phone-number",[[]]) | |
245 | + | Lemma("654-66-91" as lemma,"ign",[[]]) -> Lemma(lemma,"phone-number",[[]]) | |
246 | + | Lemma("U2" as lemma,"ign",[[]]) -> Lemma(lemma,"subst",[[["sg"];["gen"];["m1"]]]) | |
247 | + | Lemma("Uudenkaupungin" as lemma,"ign",[[]]) -> Lemma(lemma,"subst",[[["sg"];["gen"];["m1"]]]) | |
248 | + | Lemma("kaupunki" as lemma,"ign",[[]]) -> Lemma(lemma,"subst",[[["sg"];["gen"];["m1"]]]) | |
249 | + | Lemma("AKP" as lemma,"ign",[[]]) -> Lemma(lemma,"subst",[[[""];[""];[""]]]) | |
250 | + | Lemma("Beginning" as lemma,"ign",[[]]) -> Lemma(lemma,"xxx",[[]]) | |
251 | + | Lemma("with" as lemma,"ign",[[]]) -> Lemma(lemma,"xxx",[[]]) | |
252 | + | Lemma("my" as lemma,"ign",[[]]) -> Lemma(lemma,"xxx",[[]]) | |
253 | + | Lemma("streets" as lemma,"ign",[[]]) -> Lemma(lemma,"xxx",[[]]) | |
254 | + | t -> print_endline ("convert_tagset_token: " ^ id ^ " " ^ ENIAMtokens.string_of_token t);t | |
255 | + | |
256 | +let convert_tagset corpus = | |
257 | + Xlist.iter corpus (fun (conll_sentence, text, orig, tokens) -> | |
258 | + Int.iter 1 (ExtArray.size tokens - 1) (fun i -> | |
259 | + let t = ExtArray.get tokens i in | |
260 | + let token = convert_tagset_token conll_sentence.id t.token in | |
261 | + ExtArray.set tokens i {t with token=token})); | |
262 | + corpus | |
263 | + | |
264 | +(*let string_of_depencency = function | |
265 | + (* Lemma(lemma1,cat1,interp1),"punct",Lemma(lemma2,"interp",_) -> cat1 ^ " -> punct -> " ^ lemma2 ^ ":interp" | |
266 | + | Interp "<conll_root>","root",Lemma(lemma2,cat2,_) -> "<conll_root> -> root -> " ^ cat2 *) | |
267 | + | _,"nsubj",_ -> "nsubj" | |
268 | + | _,"amod",_ -> "amod" | |
269 | + | _,"root",_ -> "root" | |
270 | + | _,"punct",_ -> "punct" | |
271 | + | _,"advmod",_ -> "advmod" | |
272 | + | _,"expl:impers",_ -> "expl:impers" | |
273 | + | _,"mark",_ -> "mark" (* ??? *) | |
274 | + | _,"cc",_ -> "cc" | |
275 | + | _,"conj",_ -> "conj" | |
276 | + | _,"compound:aglt",_ -> "compound:aglt" | |
277 | + | _,"case",_ -> "case" | |
278 | + | _,"advcl",_ -> "advcl" | |
279 | + | _,"obj",_ -> "obj" | |
280 | + | _,"iobj",_ -> "iobj" | |
281 | + | _,"obl",_ -> "obl" | |
282 | + | _,"obl:arg",_ -> "obl:arg" | |
283 | + | _,"appos",_ -> "appos" | |
284 | + | _,"xcomp",_ -> "xcomp" | |
285 | + | _,"flat",_ -> "flat" | |
286 | + | _,"fixed",_ -> "fixed" | |
287 | + | _,"nmod",_ -> "nmod" | |
288 | + | _,"nmod:arg",_ -> "nmod:arg" | |
289 | + | _,"nummod",_ -> "nummod" | |
290 | + | _,"cop",_ -> "cop" | |
291 | + | _,"det",_ -> "det" | |
292 | + | _,"nsubj:pass",_ -> "nsubj:pass" | |
293 | + | _,"aux",_ -> "aux" | |
294 | + | _,"aux:pass",_ -> "aux:pass" | |
295 | + | _,"compound:cnd",_ -> "compound:cnd" | |
296 | + | _,"parataxis",_ -> "parataxis" | |
297 | + | _,"ccomp",_ -> "ccomp" | |
298 | + | _,"acl:relcl",_ -> "acl:relcl" | |
299 | + | _,"discourse:comment",_ -> "discourse:comment" | |
300 | + | _,"list",_ -> "list" | |
301 | + | _,"ccomp:obj",_ -> "ccomp:obj" | |
302 | + | _,"vocative",_ -> "vocative" | |
303 | + | _,"csubj",_ -> "csubj" | |
304 | + | _,"advmod:arg",_ -> "advmod:arg" | |
305 | + | _,"compound:imp",_ -> "compound:imp" | |
306 | + | _,"obl:comp",_ -> "obl:comp" | |
307 | + | _,"cc:preconj",_ -> "cc:preconj" | |
308 | + | _,"discourse:intj",_ -> "discourse:intj" | |
309 | + | _,"acl:attrib",_ -> "acl:attrib" | |
310 | + | _,"nmod:title",_ -> "nmod:title" | |
311 | + | _,"obl:agent",_ -> "obl:agent" | |
312 | + | _,"orphan",_ -> "orphan" | |
313 | + | _,"nmod:subj",_ -> "nmod:subj" | |
314 | + | _,"obl:pass",_ -> "obl:pass" | |
315 | + | _,"discourse:emo",_ -> "discourse:emo" | |
316 | + | (Lemma(lemma1,"subst",[[_] :: [c1] :: _]) as s),"case",(Lemma(lemma2,"prep",[[c2] :: _]) as t) -> | |
317 | + if c1 = c2 then "subst" ^ " -> case -> " ^ "prep" else ENIAMtokens.string_of_token s ^ " -> " ^ "case" ^ " -> " ^ ENIAMtokens.string_of_token t | |
318 | + | Lemma(lemma1,cat1,interp1),"case",Lemma(lemma2,"adv",interp2) -> cat1 ^ ":" ^ ENIAMtokens.string_of_interps interp1 ^ " -> case -> " ^ lemma2 ^ ":" ^ "adv" ^ ":" ^ ENIAMtokens.string_of_interps interp2 | |
319 | + | Lemma(lemma1,cat1,interp1),label,Lemma(lemma2,cat2,interp2) -> | |
320 | + cat1 ^ ":" ^ ENIAMtokens.string_of_interps interp1 ^ " -> " ^ label ^ " -> " ^ cat2 ^ ":" ^ ENIAMtokens.string_of_interps interp2 | |
321 | + | s,label,t -> ENIAMtokens.string_of_token s ^ " -> " ^ label ^ " -> " ^ ENIAMtokens.string_of_token t | |
322 | + | |
323 | +let list_dependencies corpus = | |
324 | + Xlist.fold corpus StringQMap.empty (fun qmap (conll_sentence, text, orig, tokens) -> | |
325 | + let a = match conll_sentence.sentence with | |
326 | + DepSentence[a] -> a | |
327 | + | _ -> failwith "list_dependencies" in | |
328 | + Int.fold 1 (Array.length a - 1) qmap (fun qmap i -> | |
329 | + let id,sl,sem = a.(i) in | |
330 | + Xlist.fold sl qmap (fun qmap (super,label) -> | |
331 | + let super_id,_,_ = a.(super) in | |
332 | + let t = ExtArray.get tokens id in | |
333 | + let s = ExtArray.get tokens super_id in | |
334 | + StringQMap.add qmap (string_of_depencency (s.token,label,t.token)))))*) | |
335 | + | |
336 | +type dep = | |
337 | + {id: int; tid: int; lemma: string; cat: string; interp: string list list list; | |
338 | + label: string; sem: string; sons: tree list; is_shared: bool} | |
339 | + | |
340 | +and tree = | |
341 | + Dep of dep | |
342 | + | Cluster of (string * string list list list) * dep * tree list (* nazwa frazy * komponenty * podrzędniki *) | |
343 | + | Coordination of string * string * tree list * tree list | |
344 | + | |
345 | +let empty_dep = {id=(-1); tid=(-1); lemma=""; cat=""; interp=[]; label=""; sem=""; sons=[]; is_shared=false} | |
346 | + | |
347 | +let string_of_sem sem = | |
348 | + if sem = "" then "" else "[" ^ sem ^ "]" | |
349 | + | |
350 | +let string_of_lci d = | |
351 | + let interp = ENIAMtokens.string_of_interps d.interp in | |
352 | + if interp = "" then Printf.sprintf "%s,%s" d.lemma d.cat | |
353 | + else Printf.sprintf "%s,%s:%s" d.lemma d.cat interp | |
354 | + | |
355 | +let string_of_phrase (phrase,interp) = | |
356 | + let interp = ENIAMtokens.string_of_interps interp in | |
357 | + if interp = "" then phrase | |
358 | + else Printf.sprintf "%s:%s" phrase interp | |
359 | + | |
360 | +let rec string_of_tree spaces = function | |
361 | + Dep d -> | |
362 | + if d.sons = [] then Printf.sprintf "%s%sDep(%d,%s,%s%s)" spaces (if d.is_shared then "Shared" else "") d.id (string_of_lci d) d.label (string_of_sem d.sem) | |
363 | + else Printf.sprintf "%s%sDep(%d,%s,%s%s,[\n%s])" spaces (if d.is_shared then "Shared" else "") d.id (string_of_lci d) d.label (string_of_sem d.sem) | |
364 | + (String.concat "\n" (Xlist.map d.sons (string_of_tree (" " ^ spaces)))) | |
365 | + | Cluster((phrase,interp),d,sons) -> | |
366 | + let dsons = if d.sons = [] then "" else | |
367 | + ",{\n" ^ String.concat "\n" (Xlist.map d.sons (string_of_tree (" " ^ spaces))) ^ "}" in | |
368 | + let sons = if sons = [] then "" else | |
369 | + ",[\n" ^ String.concat "\n" (Xlist.map sons (string_of_tree (" " ^ spaces))) ^ "]" in | |
370 | + Printf.sprintf "%s%sCluster(%d,%s,%s,%s%s%s%s)" spaces (if d.is_shared then "Shared" else "") | |
371 | + d.id (string_of_phrase (phrase,interp)) (string_of_lci d) d.label (string_of_sem d.sem) dsons sons | |
372 | + (* | PairDep(d,d2) -> | |
373 | + if d.sons = [] then Printf.sprintf "%s%sPairDep(%d,%s,%s%s,%s)" spaces (if d.is_shared then "Shared" else "") d.id (string_of_lci d) d.label (string_of_sem d.sem) (string_of_lci d2) | |
374 | + else Printf.sprintf "%s%sPairDep(%d,%s,%s%s,%s,[\n%s])" spaces (if d.is_shared then "Shared" else "") d.id (string_of_lci d) d.label (string_of_sem d.sem) (string_of_lci d2) | |
375 | + (String.concat "\n" (Xlist.map d.sons (string_of_tree (" " ^ spaces)))) *) | |
376 | + | Coordination(label,sem,sons,[]) -> Printf.sprintf "%sCoordination(%s%s,[\n%s])" spaces label (string_of_sem sem) | |
377 | + (String.concat "\n" (Xlist.map sons (string_of_tree (" " ^ spaces)))) | |
378 | + | Coordination(label,sem,sons,coords) -> Printf.sprintf "%sCoordination(%s%s,[\n%s],[\n%s])" spaces label (string_of_sem sem) | |
379 | + (String.concat "\n" (Xlist.map sons (string_of_tree (" " ^ spaces)))) | |
380 | + (String.concat "\n" (Xlist.map coords (string_of_tree (" " ^ spaces)))) | |
381 | + | |
382 | +let rec get_tree_node_id = function | |
383 | + Dep d -> d.id | |
384 | + | Coordination(label,sem,sons,coord) -> get_tree_node_id (List.hd sons) | |
385 | + | _ -> failwith "get_tree_node_id" | |
386 | + | |
387 | +let sort_dependents l = | |
388 | + Xlist.sort l (fun x y -> compare (get_tree_node_id x) (get_tree_node_id y)) | |
389 | + | |
390 | +let rec make_tree_rec tokens id tid label b sem sons = | |
391 | + let l = try IntMap.find sons id with Not_found -> [] in | |
392 | + let l = Xlist.fold l [] (fun l (id,tid,label,b,sem) -> | |
393 | + make_tree_rec tokens id tid label b sem sons :: l) in | |
394 | + let lemma,cat,interp = match (ExtArray.get tokens tid).token with | |
395 | + Lemma(lemma,cat,interp) -> lemma,cat,interp | |
396 | + | Interp s -> s,"interp",[[]] | |
397 | + | _ -> failwith "make_tree_rec" in | |
398 | + Dep{id=id; tid=tid; lemma=lemma; cat=cat; interp=interp; label=label; sem=sem; sons=l; is_shared=b} | |
399 | + | |
400 | +let clean_coord_deps = function | |
401 | + [] -> [] | |
402 | + | [i,s] -> [i,s] | |
403 | + | [i1,"conj";i2,s2] -> [i1,"conj"] | |
404 | + | [i1,s1;i2,"conj"] -> [i2,"conj"] | |
405 | + | (i,s) :: l -> | |
406 | + if Xlist.fold l true (fun b (_,t) -> if t = s then b else false) then (i,s) :: l | |
407 | + else ((*print_endline ("clean_coord_deps: " ^ (String.concat " " (Xlist.map ((i,s) :: l) snd)));*) (i,s) :: l) | |
408 | + | |
409 | +let make_tree tokens a = | |
410 | + let sons = Int.fold 1 (Array.length a - 1) IntMap.empty (fun sons i -> | |
411 | + let tid,sl,sem = a.(i) in | |
412 | + let sl = clean_coord_deps sl in | |
413 | + let b = Xlist.size sl > 1 in | |
414 | + Xlist.fold sl sons (fun sons (super,label) -> | |
415 | + IntMap.add_inc sons super [i,tid,label,b,sem] (fun l -> (i,tid,label,b,sem) :: l))) in | |
416 | + make_tree_rec tokens 0 0 "" false "" sons | |
417 | + | |
418 | +let rec split_sons pat sel rev = function | |
419 | + (Dep d as t) :: l -> | |
420 | + if pat = d.label then split_sons pat (t :: sel) rev l | |
421 | + else split_sons pat sel (t :: rev) l | |
422 | + | t :: l -> split_sons pat sel (t :: rev) l | |
423 | + | [] -> sel,rev | |
424 | + | |
425 | +let extract_sons pat = function | |
426 | + Dep d -> | |
427 | + let sel,sons = split_sons pat [] [] d.sons in | |
428 | + sel,Dep{d with sons=sons} | |
429 | + | _ -> failwith "extract_sons" | |
430 | + | |
431 | +let get_label = function | |
432 | + Dep d -> d.label | |
433 | + | Coordination(label,sem,sons,coord) -> label | |
434 | + | _ -> failwith "get_label" | |
435 | + | |
436 | +let get_sorted_sons = function | |
437 | + Dep d -> | |
438 | + List.rev (Xlist.rev_map (sort_dependents d.sons) (fun t -> get_label t, t)) | |
439 | + | _ -> failwith "get_sorted_sons" | |
440 | + | |
441 | +let set_sons sons = function | |
442 | + Dep d -> Dep{d with sons=sons} | |
443 | + | _ -> failwith "set_sons" | |
444 | + | |
445 | +let extract_cc l = [],l | |
446 | + | |
447 | +(*let extract_cc l = | |
448 | + let first,rest = | |
449 | + match sort_dependents l with | |
450 | + first :: rest -> first,rest | |
451 | + | _ -> failwith "extract_cc" in | |
452 | + let cc_preconj,first = | |
453 | + match get_sorted_sons first with | |
454 | + ("cc:preconj",t) :: l -> [t],set_sons (Xlist.map l snd) first | |
455 | + | ("punct",t1) :: ("cc:preconj",t2) :: l -> [t1;t2],set_sons (Xlist.map l snd) first | |
456 | + (* | ("cc",t) :: l -> | |
457 | + print_endline (string_of_tree "" first); | |
458 | + failwith ("extract_cc: " ^ (String.concat " " (Xlist.map (("cc",t) :: l) fst))) *) | |
459 | + | ("punct",t) :: l -> | |
460 | + print_endline (string_of_tree "" first); | |
461 | + failwith ("extract_cc: " ^ (String.concat " " (Xlist.map (("punct",t) :: l) fst))) | |
462 | + | l -> [],first in | |
463 | + cc_preconj, first :: rest*) | |
464 | + | |
465 | + | |
466 | +(*let extract_cc l = | |
467 | + let first,middle,last = | |
468 | + match sort_dependents l with | |
469 | + [first;last] -> first,[],last | |
470 | + | first :: l -> | |
471 | + (match List.rev l with | |
472 | + last :: rev_middle -> first,List.rev rev_middle,last | |
473 | + | _ -> failwith "extract_cc") | |
474 | + | _ -> failwith "extract_cc" in | |
475 | + let cc_preconj,first = extract_sons "cc:preconj" first in | |
476 | + if Xlist.size cc_preconj > 1 then failwith "extract_cc: cc:preconj" else | |
477 | + let cc,last = extract_sons "cc" last in | |
478 | + let punct,last = extract_sons "punct" last in | |
479 | + if Xlist.size cc > 1 then failwith "extract_cc: cc" else | |
480 | + if Xlist.size punct > 1 then failwith "extract_cc: punct 1" else | |
481 | + let puncts,middle = | |
482 | + Xlist.fold middle ([],[]) (fun (puncts,middle) t -> | |
483 | + let punct,t = extract_sons "punct" t in | |
484 | + if Xlist.size punct > 1 then failwith "extract_cc: punct 2" else | |
485 | + punct @ puncts, t :: middle) in | |
486 | + sort_dependents (cc_preconj @ cc @ punct @ puncts), | |
487 | + [first] @ (List.rev middle) @ [last]*) | |
488 | + | |
489 | +let rec process_coordination = function | |
490 | + Dep d -> | |
491 | + let sons = Xlist.rev_map d.sons process_coordination in | |
492 | + let coord,sons = split_sons "conj" [] [] sons in | |
493 | + if coord = [] then Dep{d with sons=sons} else | |
494 | + let coord,sons = extract_cc (Dep{d with sons=sons} :: coord) in | |
495 | + Coordination(d.label,d.sem,sons,coord) | |
496 | + | _ -> failwith "process_coordination" | |
497 | + | |
498 | +(*let rec shift_case = function | |
499 | + Dep(id,tid,lci,label,sem,sons,is_shared) as t -> | |
500 | + let case,sons = split_sons "case" [] [] sons in | |
501 | + (match case with | |
502 | + [] -> Dep(id,tid,lci,label,sem,Xlist.rev_map sons shift_case,is_shared) | |
503 | + | [Dep(id2,tid2,lci2,label2,sem2,sons2,is_shared2)] -> | |
504 | + Dep(id2,tid2,lci2,label,sem,Dep(id,tid,lci,"rev_case",sem2,Xlist.rev_map sons shift_case,is_shared2) :: sons2,is_shared) | |
505 | + | [Dep(id2,tid2,lci2,label2,sem2,sons2,is_shared2);t2] -> | |
506 | + Dep(id2,tid2,lci2,label,sem,Dep(id,tid,lci,"rev_case",sem2,Xlist.rev_map (t2 :: sons) shift_case,is_shared2) :: sons2,is_shared) | |
507 | + | _ -> print_endline (string_of_tree "" t); failwith "shift_case") | |
508 | + | Coordination(label,sem,sons,coords) -> Coordination(label,sem,Xlist.rev_map sons shift_case,coords) | |
509 | + | |
510 | +let rec shift_nummod = function | |
511 | + Dep(id,tid,lci,label,sem,sons,is_shared) as t -> | |
512 | + let nummod,sons = split_sons "nummod" [] [] sons in | |
513 | + (match nummod with | |
514 | + [] -> Dep(id,tid,lci,label,sem,Xlist.rev_map sons shift_nummod,is_shared) | |
515 | + | [Dep(id2,tid2,lci2,label2,sem2,sons2,is_shared2)] -> | |
516 | + Dep(id2,tid2,lci2,label,sem,Dep(id,tid,lci,"rev_nummod",sem2,Xlist.rev_map sons shift_nummod,is_shared2) :: sons2,is_shared) | |
517 | + | _ -> print_endline (string_of_tree "" t); failwith "shift_nummod") | |
518 | + | Coordination(label,sem,sons,coords) -> Coordination(label,sem,Xlist.rev_map sons shift_nummod,coords) | |
519 | + | |
520 | +let rec shift_mark = function | |
521 | + Dep(id,tid,lci,label,sem,sons,is_shared) as t -> | |
522 | + let mark,sons = split_sons "mark" [] [] sons in | |
523 | + (match sort_dependents mark with | |
524 | + [] -> Dep(id,tid,lci,label,sem,Xlist.rev_map sons shift_mark,is_shared) | |
525 | + | [Dep(id2,tid2,lci2,label2,sem2,sons2,is_shared2)] -> | |
526 | + Dep(id2,tid2,lci2,label,sem,Dep(id,tid,lci,"rev_mark",sem2,Xlist.rev_map sons shift_mark,is_shared2) :: sons2,is_shared) | |
527 | + | [Dep(id2,tid2,lci2,label2,sem2,sons2,is_shared2);t2] -> | |
528 | + Dep(id2,tid2,lci2,label,sem,Dep(id,tid,lci,"rev_mark",sem2,Xlist.rev_map (t2 :: sons) shift_mark,is_shared2) :: sons2,is_shared) | |
529 | + (* | [Dep(_,_,(lem,_,_),_,_,_,_);Dep(_,_,(lem2,_,_),_,_,_,_)] -> print_endline (string_of_tree "" t); failwith ("shift_mark: " ^ lem ^ " " ^ lem2) *) | |
530 | + | _ -> print_endline (string_of_tree "" t); failwith "shift_mark") | |
531 | + | Coordination(label,sem,sons,coords) -> Coordination(label,sem,Xlist.rev_map sons shift_mark,coords) | |
532 | + | |
533 | +let rec shift_cop = function | |
534 | + Dep(id,tid,lci,label,sem,sons,is_shared) as t -> | |
535 | + let cop,sons = split_sons "cop" [] [] sons in | |
536 | + (match cop with | |
537 | + [] -> Dep(id,tid,lci,label,sem,Xlist.rev_map sons shift_cop,is_shared) | |
538 | + | [Dep(id2,tid2,lci2,label2,sem2,sons2,is_shared2)] -> | |
539 | + Dep(id2,tid2,lci2,label,sem,Dep(id,tid,lci,"rev_cop",sem2,Xlist.rev_map sons shift_cop,is_shared2) :: sons2,is_shared) | |
540 | + | [Dep(id2,tid2,lci2,label2,sem2,sons2,is_shared2);t2] -> | |
541 | + Dep(id2,tid2,lci2,label,sem,Dep(id,tid,lci,"rev_cop",sem2,Xlist.rev_map (t2 :: sons) shift_cop,is_shared2) :: sons2,is_shared) | |
542 | + | _ -> print_endline (string_of_tree "" t); failwith "shift_cop") | |
543 | + | Coordination(label,sem,sons,coords) -> Coordination(label,sem,Xlist.rev_map sons shift_cop,coords) | |
544 | + | |
545 | +let rec shift_aux_pass = function | |
546 | + Dep(id,tid,lci,label,sem,sons,is_shared) as t -> | |
547 | + let aux_pass,sons = split_sons "aux:pass" [] [] sons in | |
548 | + (match aux_pass with | |
549 | + [] -> Dep(id,tid,lci,label,sem,Xlist.rev_map sons shift_aux_pass,is_shared) | |
550 | + | [Dep(id2,tid2,lci2,label2,sem2,sons2,is_shared2)] -> | |
551 | + Dep(id2,tid2,lci2,label,sem2,Dep(id,tid,lci,"rev_aux:pass",sem,Xlist.rev_map sons shift_aux_pass,is_shared2) :: sons2,is_shared) | |
552 | + | _ -> print_endline (string_of_tree "" t); failwith "shift_aux_pass") | |
553 | + | Coordination(label,sem,sons,coords) -> Coordination(label,sem,Xlist.rev_map sons shift_aux_pass,coords)*) | |
554 | + | |
555 | +let make_trees corpus = | |
556 | + Xlist.rev_map corpus (fun (conll_sentence, text, orig, tokens) -> | |
557 | + (* try *) | |
558 | + let a = match conll_sentence.sentence with | |
559 | + DepSentence[a] -> a | |
560 | + | _ -> failwith "list_dependencies" in | |
561 | + let tree = make_tree tokens a in | |
562 | + let tree = process_coordination tree in | |
563 | +(* let tree = shift_case tree in | |
564 | + let tree = shift_nummod tree in | |
565 | + let tree = shift_mark tree in | |
566 | + let tree = shift_cop tree in | |
567 | + let tree = shift_aux_pass tree in*) | |
568 | + (* print_endline conll_sentence.id; | |
569 | + print_endline text; | |
570 | + print_endline (string_of_tree "" tree); *) | |
571 | + conll_sentence.id,text,tree,tokens | |
572 | + (*with e -> (print_endline (Printexc.to_string e); | |
573 | + print_endline conll_sentence.id; | |
574 | + print_endline text; | |
575 | + (* print_endline (string_of_tree "" tree); *) | |
576 | + ())*)) | |
577 | + | |
578 | +let rec flatten_coordination is_coord ulabel usem = function | |
579 | + Dep d -> | |
580 | + if ulabel = "" then [is_coord,Dep d] else [is_coord,Dep{d with label=ulabel;sem=usem}] | |
581 | + | Coordination(label,sem,sons,coords) -> | |
582 | + if ulabel = "" then List.flatten (Xlist.rev_map sons (flatten_coordination true label sem)) | |
583 | + else List.flatten (Xlist.rev_map sons (flatten_coordination true ulabel usem)) | |
584 | + | _ -> failwith "flatten_coordination" | |
585 | + | |
586 | +let string_of_dependency2 is_coord (lemma1,cat1,interp1) label sem (lemma2,cat2,interp2) = | |
587 | + (if is_coord then "COORD " else "") ^ | |
588 | + lemma1 ^ ":" ^ cat1 ^ ":" ^ ENIAMtokens.string_of_interps interp1 ^ | |
589 | + " -> " ^ label ^ (if sem = "" then "" else "["^sem^"]") ^ " -> " | |
590 | + (*^ lemma2 ^ ":"*) ^ cat2 ^ ":" ^ ENIAMtokens.string_of_interps interp2 | |
591 | + | |
592 | +type sel = Any | Value of string list | Agr of string | |
593 | +type coord = Coord | Gen | |
594 | +type pattern = | |
595 | + PatternNode of sel * sel * sel list (* (sel * pattern) list *) | |
596 | + | PatternPhrase of sel * sel list | |
597 | + | PatternEdge of pattern * sel * pattern | |
598 | + | |
599 | +let phrase_names = StringSet.of_list ["np";"adjp";"ip";"infp";"pp";"comprep";"sent";"cp";"conjp"] | |
600 | + | |
601 | +let raw_patterns = File.load_lines "data/patterns.tab" | |
602 | +let raw_pair_patterns = File.load_lines "data/pair_patterns.tab" | |
603 | + | |
604 | +let is_phrase = function | |
605 | + Value[a] :: _ -> StringSet.mem phrase_names a | |
606 | + | _ -> false | |
607 | + | |
608 | +let parse_pattern2 s a = | |
609 | + let l = Xlist.map (Xstring.split ":" a) (function | |
610 | + "_" -> Any | |
611 | + | "$l" -> Agr "l" | |
612 | + | "$n" -> Agr "n" | |
613 | + | "$c" -> Agr "c" | |
614 | + | "$g" -> Agr "g" | |
615 | + | "$p" -> Agr "p" | |
616 | + | "." -> Value ["."] | |
617 | + | t -> Value(Xstring.split "\\." t)) in | |
618 | + if l = [] then failwith ("parse_pattern2: " ^ s) else | |
619 | + if is_phrase l then | |
620 | + match l with | |
621 | + phrase :: interp -> PatternPhrase(phrase,interp) | |
622 | + | _ -> failwith ("parse_pattern2: " ^ s) | |
623 | + else | |
624 | + match l with | |
625 | + lemma :: cat :: interp -> PatternNode(lemma,cat,interp) | |
626 | + | _ -> failwith ("parse_pattern2: " ^ s) | |
627 | + | |
628 | +let parse_phrase s = | |
629 | + match parse_pattern2 s s with | |
630 | + PatternPhrase(Value [phrase],interp) -> phrase,interp | |
631 | + | _ -> failwith "parse_phrase" | |
632 | + | |
633 | +let parse_pattern s = | |
634 | + if s = "" then [] else | |
635 | + if String.get s 0 = '#' then [] else | |
636 | + match Xstring.split " " s with | |
637 | + [a;"->";"_";"->";b] -> [Gen,s,parse_pattern2 s a,Any,parse_pattern2 s b] | |
638 | + | [a;"->";label;"->";b] -> [Gen,s,parse_pattern2 s a,Value [label],parse_pattern2 s b] | |
639 | + | [a;"->";label;"->";"[";b1;"->";label_b;"->";b2;"]"] -> [Gen,s,parse_pattern2 s a,Value [label],PatternEdge(parse_pattern2 s b1,Value [label_b],parse_pattern2 s b2)] | |
640 | + | [a;"->";label;"->";"[";b;"->";label_b1;"->";b1;"|";label_b2;"->";b2;"]"] -> | |
641 | + [Gen,s,parse_pattern2 s a,Value [label],PatternEdge(PatternEdge(parse_pattern2 s b,Value [label_b1],parse_pattern2 s b1),Value [label_b2],parse_pattern2 s b2)] | |
642 | + | ["[";a1;"->";label_a;"->";a2;"]";"->";label;"->";b] -> [Gen,s,PatternEdge(parse_pattern2 s a1,Value [label_a],parse_pattern2 s a2),Value [label],parse_pattern2 s b] | |
643 | + | ["COORD";a;"->";"_";"->";b] -> [Coord,s,parse_pattern2 s a,Any,parse_pattern2 s b] | |
644 | + | ["COORD";a;"->";label;"->";b] -> [Coord,s,parse_pattern2 s a,Value [label],parse_pattern2 s b] | |
645 | + | ["COORD";a;"->";label;"->";"[";b1;"->";label_b;"->";b2;"]"] -> [Coord,s,parse_pattern2 s a,Value [label],PatternEdge(parse_pattern2 s b1,Value [label_b],parse_pattern2 s b2)] | |
646 | + | _ -> failwith ("parse_pattern: " ^ s) | |
647 | + | |
648 | +let parse_pair_pattern s = | |
649 | + if s = "" then [] else | |
650 | + if String.get s 0 = '#' then [] else | |
651 | + match Xstring.split "\t" s with | |
652 | + [phrase;pat] -> | |
653 | + let phrase,interp = parse_phrase phrase in | |
654 | + (match parse_pattern pat with | |
655 | + [coord,s,p1,plabel,p2] -> [(phrase,interp),coord,s,p1,plabel,p2] | |
656 | + | _ -> failwith ("parse_pair_pattern 1: " ^ s)) | |
657 | + | _ -> failwith ("parse_pair_pattern 2: " ^ s) | |
658 | + | |
659 | +let patterns = List.flatten (Xlist.rev_map raw_patterns parse_pattern) | |
660 | +let pair_patterns = List.flatten (Xlist.rev_map raw_pair_patterns parse_pair_pattern) | |
661 | + | |
662 | +let match_string map s = function | |
663 | + Any -> (*print_endline ("match_string: Any " ^ s);*) map | |
664 | + | Value l -> | |
665 | + let b = Xlist.fold l false (fun b t -> s = t || b) in | |
666 | + (*print_endline ("match_string: " ^ t ^ " " ^ s);*) | |
667 | + if b then map else raise Not_found | |
668 | + | Agr n -> | |
669 | + if StringMap.mem map n then | |
670 | + if StringMap.find map n = s then map else raise Not_found | |
671 | + else StringMap.add map n s | |
672 | + | |
673 | +let rec match_interp_rec2 map = function | |
674 | + [s],pat -> match_string map s pat | |
675 | + | ["congr";"rec"],pat -> map | |
676 | + | _,pat -> failwith "match_interp_rec2" | |
677 | + | |
678 | +let rec match_interp_rec map = function | |
679 | + s :: l,ps :: pl -> | |
680 | + let map = match_interp_rec2 map (s,ps) in | |
681 | + match_interp_rec map (l,pl) | |
682 | + | _,[] -> map | |
683 | + | _ -> failwith "match_interp_rec" | |
684 | + | |
685 | +let match_interp map interp pinterp = | |
686 | + match interp with | |
687 | + [interp] -> match_interp_rec map (interp,pinterp) | |
688 | + | _ -> failwith "match_interp" | |
689 | + | |
690 | +let rec match_pattern_rec map = function | |
691 | + phrase,Dep({sons=[]} as d),PatternNode(plemma,pcat,pinterp) -> | |
692 | + (* print_endline ("match_pattern_rec 1: \n" ^ string_of_tree "" (Dep d)); *) | |
693 | + let map = match_string map d.lemma plemma in | |
694 | + let map = match_string map d.cat pcat in | |
695 | + let map = match_interp map d.interp pinterp in | |
696 | + map | |
697 | + | (phrase,interp),Dep d,PatternPhrase(pphrase,pinterp) -> | |
698 | + (* print_endline ("match_pattern_rec 1: \n" ^ string_of_tree "" (Dep d)); *) | |
699 | + let map = match_string map phrase pphrase in | |
700 | + let map = match_interp map interp pinterp in | |
701 | + map | |
702 | + | phrase,Dep({sons=[Dep d1;Dep d2]} as d),PatternEdge(PatternEdge(p,plabel1,p1),plabel2,p2) -> | |
703 | + (* print_endline ("match_pattern_rec 2: \n" ^ string_of_tree "" (Dep d1)); *) | |
704 | + let map = match_pattern_rec map (("",[]),Dep {d with sons=[]},p) in | |
705 | + (try | |
706 | + let map = match_string map d1.label plabel1 in | |
707 | + let map = match_pattern_rec map (("",[]),Dep d1,p1) in | |
708 | + let map = match_string map d2.label plabel2 in | |
709 | + let map = match_pattern_rec map (("",[]),Dep d2,p2) in | |
710 | + map | |
711 | + with Not_found -> ( | |
712 | + let map = match_string map d1.label plabel2 in | |
713 | + let map = match_pattern_rec map (("",[]),Dep d1,p2) in | |
714 | + let map = match_string map d2.label plabel1 in | |
715 | + let map = match_pattern_rec map (("",[]),Dep d2,p1) in | |
716 | + map)) | |
717 | + | phrase,Dep({sons=[Dep d2]} as d1),PatternEdge(p1,plabel,p2) -> | |
718 | + (* print_endline ("match_pattern_rec 2: \n" ^ string_of_tree "" (Dep d1)); *) | |
719 | + let map = match_pattern_rec map (("",[]),Dep {d1 with sons=[]},p1) in | |
720 | + let map = match_string map d2.label plabel in | |
721 | + let map = match_pattern_rec map (("",[]),Dep d2,p2) in | |
722 | + map | |
723 | + | _ -> raise Not_found | |
724 | + | |
725 | + | |
726 | +let rec match_pattern is_coord (phrase1,d1) (phrase2,d2) = function | |
727 | + (coord,s,p1,plabel,p2) :: l -> | |
728 | + (* print_endline s; *) | |
729 | + if is_coord || d2.is_shared || coord = Gen then | |
730 | + try | |
731 | + let map = StringMap.empty in | |
732 | + let map = match_pattern_rec map (phrase1,Dep d1,p1) in | |
733 | + let map = match_string map d2.label plabel in | |
734 | + let _ = match_pattern_rec map (phrase2,Dep d2,p2) in | |
735 | + s | |
736 | + with Not_found -> match_pattern is_coord (phrase1,d1) (phrase2,d2) l | |
737 | + else match_pattern is_coord (phrase1,d1) (phrase2,d2) l | |
738 | + | [] -> raise Not_found | |
739 | + | |
740 | +let match_phrase_interp s map pinterp = | |
741 | + let interp = Xlist.rev_map pinterp (function | |
742 | + Value [v] -> [v] | |
743 | + | Agr v -> (try [StringMap.find map v] with Not_found -> failwith ("match_phrase_interp: " ^ s)) | |
744 | + | _ -> failwith ("match_phrase_interp: " ^ s)) in | |
745 | + [List.rev interp] | |
746 | + | |
747 | +let rec match_pair_pattern is_coord (phrase1,d1) (phrase2,d2) = function | |
748 | + ((pphrase,pinterp),coord,s,p1,plabel,p2) :: l -> | |
749 | + (* print_endline s; *) | |
750 | + if is_coord || d2.is_shared || coord = Gen then | |
751 | + try | |
752 | + let map = StringMap.empty in | |
753 | + let map = match_pattern_rec map (phrase1,Dep d1,p1) in | |
754 | + let map = match_string map d2.label plabel in | |
755 | + let map = match_pattern_rec map (phrase2,Dep d2,p2) in | |
756 | + pphrase, match_phrase_interp s map pinterp, s | |
757 | + with Not_found -> match_pair_pattern is_coord (phrase1,d1) (phrase2,d2) l | |
758 | + else match_pair_pattern is_coord (phrase1,d1) (phrase2,d2) l | |
759 | + | [] -> raise Not_found | |
760 | + | |
761 | +let rec fold_tree tree s f = | |
762 | + match tree with | |
763 | + Dep d -> Xlist.fold d.sons (f s (Dep d)) (fun s t -> fold_tree t s f) | |
764 | + | Coordination(label,sem,sons,coords) as t -> Xlist.fold sons (f s t) (fun s t -> fold_tree t s f) | |
765 | + | _ -> failwith "fold_tree" | |
766 | + | |
767 | +(*let list_dependencies_tree corpus = | |
768 | + Xlist.fold corpus StringQMap.empty (fun qmap (sentence_id, text, tree, tokens) -> | |
769 | + fold_tree tree qmap (fun qmap -> function | |
770 | + Dep d -> | |
771 | + Xlist.fold (List.flatten (Xlist.rev_map d.sons (flatten_coordination false "" ""))) qmap (fun qmap -> function | |
772 | + is_coord,Dep d2 -> | |
773 | + (try | |
774 | + let s = match_pattern is_coord (Dep d) (Dep d2) patterns in | |
775 | + StringQMap.add qmap ("PATTERN " ^ s) | |
776 | + with Not_found -> StringQMap.add qmap (string_of_dependency2 is_coord (d.lemma,d.cat,d.interp) d2.label d2.sem (d2.lemma,d2.cat,d2.interp))) | |
777 | + | _ -> failwith "list_dependencies_tree") | |
778 | + | Coordination(label,sem,sons,coords) -> StringQMap.add qmap "Coordination" | |
779 | + | _ -> failwith "list_dependencies_tree")) | |
780 | + | |
781 | +let list_dependencies_tree2 corpus = | |
782 | + Xlist.fold corpus StringMap.empty (fun map (sentence_id, text, tree, tokens) -> | |
783 | + fold_tree tree map (fun map -> function | |
784 | + Dep d -> | |
785 | + Xlist.fold (List.flatten (Xlist.rev_map d.sons (flatten_coordination false "" ""))) map (fun map -> function | |
786 | + is_coord,Dep d2 -> | |
787 | + (try | |
788 | + let _ = match_pattern is_coord (Dep d) (Dep d2) patterns in | |
789 | + map | |
790 | + with Not_found -> StringMap.add_inc map (string_of_dependency2 is_coord (d.lemma,d.cat,d.interp) d2.label d2.sem (d2.lemma,d2.cat,d2.interp)) [text] (fun l -> text :: l)) | |
791 | + | _ -> failwith "list_dependencies_tree2") | |
792 | + | Coordination(label,sem,sons,coords) -> StringMap.add_inc map "Coordination" [text] (fun l -> text :: l) | |
793 | + | _ -> failwith "list_dependencies_tree2"))*) | |
794 | + | |
795 | +let rec parse_pair_patterns = function | |
796 | + Cluster(phrase,d,l),[] -> false,Cluster(phrase,d,l) | |
797 | + | Cluster(phrase,d,l), Cluster(phrase2,d2,[]) :: sons -> | |
798 | + (try | |
799 | + let pphrase,pinterp,_ = match_pair_pattern false (phrase,d) (phrase2,d2) pair_patterns in | |
800 | + true,Cluster((pphrase,pinterp),{d with sons=Dep d2 :: d.sons},sons @ l) | |
801 | + with Not_found -> parse_pair_patterns (Cluster(phrase,d,Cluster(phrase2,d2,[]) :: l), sons)) | |
802 | + | Cluster(phrase,d,l), t :: sons -> parse_pair_patterns (Cluster(phrase,d,t :: l), sons) | |
803 | + | _ -> failwith "parse_pair_patterns" | |
804 | + | |
805 | +let rec check_cc = function | |
806 | + [] -> true | |
807 | + | Cluster(_,{lemma=",";cat="interp";label="punct"},[]) :: l -> check_cc l | |
808 | + (* | Dep{lemma="-";cat="interp";label="punct"} :: l -> check_cc l *) | |
809 | + | Cluster(_,{lemma="i";cat="conj";label="cc"},[]) :: l -> check_cc l | |
810 | + | Cluster(_,{lemma="a";cat="conj";label="cc"},[]) :: l -> check_cc l | |
811 | + | Cluster(_,{lemma="zarówno";cat="conj";label="cc:preconj"},[]) :: l -> check_cc l | |
812 | + | Cluster(_,{lemma="jak";cat="conj";label="cc";sons=[Dep{lemma="i";cat="conj"}]},_) :: l -> check_cc l | |
813 | + | _ -> false | |
814 | + | |
815 | +let parse_coordination = function | |
816 | + Coordination(label,sem,[ | |
817 | + Cluster(_,{cat="adja";sons=[]},[]); | |
818 | + Cluster(phrase,({cat=adj;sons=[]} as d),[Cluster(_,{lemma="-";cat="interp";label="punct"},[])])],[]) -> | |
819 | + Cluster(phrase,{d with label=label;sem=sem},[]) | |
820 | + | Coordination(label,sem,sons,[]) -> | |
821 | + let b = Xlist.fold sons true (fun b -> function | |
822 | + Cluster(_,_,sons) -> check_cc sons && b | |
823 | + (* | PairDep(d,_) -> check_cc d.sons && b *) | |
824 | + | _ -> failwith "parse_coordination 2") in | |
825 | + if b then | |
826 | + match List.hd sons with | |
827 | + Cluster(phrase,d,_) -> Cluster(phrase,{d with is_shared=true},[]) | |
828 | + (* | PairDep(d,d2) -> PairDep({d with is_shared=true; sons=[]},d2) *) | |
829 | + | _ -> failwith "parse_coordination 3" | |
830 | + else Coordination(label,sem,sons,[]) | |
831 | + | _ -> failwith "parse_coordination" | |
832 | + | |
833 | +let make_phrase = function | |
834 | + "subst" -> "np" | |
835 | + | "depr" -> "np" | |
836 | + | "ppron12" -> "np" | |
837 | + | "ppron3" -> "np" | |
838 | + | "ger" -> "np" | |
839 | + | "adj" -> "adjp" | |
840 | + | "pact" -> "adjp" | |
841 | + | "ppas" -> "adjp" | |
842 | + | "fin" -> "ip" | |
843 | + | "bedzie" -> "ip" | |
844 | + | "praet" -> "ip" | |
845 | + | "winien" -> "ip" | |
846 | + | "impt" -> "ip" | |
847 | + | "imps" -> "ip" | |
848 | + | "pred" -> "ip" | |
849 | + (* | "siebie" -> "np" *) | |
850 | + (* | "symbol" -> "noun" | |
851 | + | "unk" -> "noun" | |
852 | + | "xxx" -> "noun" | |
853 | + | "adjc" -> "adj" | |
854 | + | "adjp" -> "adj" | |
855 | + | "adja" -> "adj" | |
856 | + | "ordnum" -> "ordnum" *) | |
857 | + | "inf" -> "infp" | |
858 | + (* | "pcon" -> "verb" | |
859 | + | "pant" -> "verb" | |
860 | + | "pacta" -> "verb" *) | |
861 | + | "conj" -> "conjp" | |
862 | + (* | "fixed" -> "fixed" | |
863 | + | "num" -> "num"*) | |
864 | + | _ -> "" | |
865 | + | |
866 | +let rec parse_tree = function | |
867 | + Dep d -> | |
868 | + (* Printf.printf "parse_tree 1: |sons|=%d\n" (Xlist.size d.sons); *) | |
869 | + let sons = Xlist.rev_map d.sons parse_tree in | |
870 | + (* Printf.printf "parse_tree 2: %s |sons|=%d\n" d.lemma (Xlist.size sons); *) | |
871 | + let phrase = make_phrase d.cat, d.interp in | |
872 | + let sons = Xlist.fold sons [] (fun sons -> function | |
873 | + Cluster(phrase2,d2,[]) as t -> | |
874 | + (try | |
875 | + (* print_endline "parse_tree 2a"; *) | |
876 | + let _ = match_pattern false (phrase,{d with sons=[]}) (phrase2,d2) patterns in | |
877 | + (* print_endline "parse_tree 2b"; *) | |
878 | + sons | |
879 | + with Not_found -> t :: sons) | |
880 | + | t -> t :: sons) in | |
881 | + let b,t = parse_pair_patterns (Cluster(phrase,{d with sons=[]},[]),sons) in | |
882 | + if b then parse_tree t else t | |
883 | + | Coordination(label,sem,sons,coords) -> | |
884 | + parse_coordination (Coordination(label,sem,List.rev (Xlist.rev_map sons parse_tree),coords)) | |
885 | + | Cluster(phrase,d,sons) -> | |
886 | + let sons = Xlist.fold sons [] (fun sons -> function | |
887 | + Cluster(phrase2,d2,[]) -> | |
888 | + (try | |
889 | + let _ = match_pattern false (phrase,d) (phrase2,d2) patterns in | |
890 | + sons | |
891 | + with Not_found -> Cluster(phrase2,d2,[]) :: sons) | |
892 | + | t -> t :: sons) in | |
893 | + let b,t = parse_pair_patterns (Cluster(phrase,d,[]),sons) in | |
894 | + if b then parse_tree t else t | |
895 | + (* | _ -> failwith "parse_tree" *) | |
896 | + | |
897 | +let is_parsed = function | |
898 | + Cluster(_,{lemma="<conll_root>";sons=[]},[]) -> true | |
899 | + | _ -> false | |
900 | + | |
901 | +let excluded = StringSet.of_list (File.load_lines "data/excluded.tab") | |
902 | + | |
903 | +let rec split_tree forest = function | |
904 | + Coordination(label,sem,sons,coords) -> | |
905 | + Xlist.fold sons forest split_tree | |
906 | + | Cluster(phrase,d,[]) -> forest | |
907 | + | Cluster(phrase,d,sons) -> | |
908 | + let b = Xlist.fold sons true (fun b -> function | |
909 | + Cluster(_,_,[]) -> b | |
910 | + | _ -> false) in | |
911 | + if b then Cluster(phrase,d,sons) :: forest else | |
912 | + Xlist.fold sons forest split_tree | |
913 | + | _ -> failwith "split_tree" | |
914 | + | |
915 | +(* let rec rules_of_tree2 = function | |
916 | + Dep({sons=[]} as d) -> | |
917 | + d.lemma ^ ":" ^ d.cat ^ ":" ^ ENIAMtokens.string_of_interps d.interp | |
918 | + | Dep({sons=[Dep d2]} as d) -> | |
919 | + "[ " ^ d.lemma ^ ":" ^ d.cat ^ ":" ^ ENIAMtokens.string_of_interps d.interp ^ | |
920 | + " -> " ^ d2.label ^ " -> " ^ rules_of_tree2 (Dep d2) ^ " ]" | |
921 | + | Dep({sons=[Dep d2;Dep d3]} as d) -> | |
922 | + "[ " ^ d.lemma ^ ":" ^ d.cat ^ ":" ^ ENIAMtokens.string_of_interps d.interp ^ | |
923 | + " -> " ^ d2.label ^ " -> " ^ rules_of_tree2 (Dep d2) ^ " | " ^ d3.label ^ " -> " ^ rules_of_tree2 (Dep d3) ^ " ]" | |
924 | + | _ -> failwith "rules_of_tree2" *) | |
925 | + | |
926 | +(* let rec rules_of_tree2 = function | |
927 | + Dep({sons=[]} as d) -> | |
928 | + "_:" ^ d.cat ^ ":" ^ ENIAMtokens.string_of_interps d.interp | |
929 | + | Dep({sons=[Dep d2]} as d) -> | |
930 | + "[ _:" ^ d.cat ^ ":" ^ ENIAMtokens.string_of_interps d.interp ^ | |
931 | + " -> " ^ d2.label ^ " -> " ^ rules_of_tree2 (Dep d2) ^ " ]" | |
932 | + | Dep({sons=[Dep d2;Dep d3]} as d) -> | |
933 | + "[ _:" ^ d.cat ^ ":" ^ ENIAMtokens.string_of_interps d.interp ^ | |
934 | + " -> " ^ d2.label ^ " -> " ^ rules_of_tree2 (Dep d2) ^ " | " ^ d3.label ^ " -> " ^ rules_of_tree2 (Dep d3) ^ " ]" | |
935 | + | _ -> failwith "rules_of_tree2" *) | |
936 | + | |
937 | +let rec rules_of_tree2 = function | |
938 | + Dep({sons=[]} as d) -> | |
939 | + "_:" ^ d.cat (*^ ":" ^ ENIAMtokens.string_of_interps d.interp*) | |
940 | + | Dep({sons=[Dep d2]} as d) -> | |
941 | + "[ _:" ^ d.cat ^ (*":" ^ ENIAMtokens.string_of_interps d.interp ^*) | |
942 | + " -> " ^ d2.label ^ " -> " ^ rules_of_tree2 (Dep d2) ^ " ]" | |
943 | + | Dep({sons=[Dep d2;Dep d3]} as d) -> | |
944 | + "[ _:" ^ d.cat ^ (*":" ^ ENIAMtokens.string_of_interps d.interp ^*) | |
945 | + " -> " ^ d2.label ^ " -> " ^ rules_of_tree2 (Dep d2) ^ " | " ^ d3.label ^ " -> " ^ rules_of_tree2 (Dep d3) ^ " ]" | |
946 | + | _ -> failwith "rules_of_tree2" | |
947 | + | |
948 | + | |
949 | +let rules_of_tree rules = function | |
950 | + Cluster(_,d,sons) -> | |
951 | + Xlist.fold sons rules (fun rules -> function | |
952 | + Cluster(_,d2,[]) -> (rules_of_tree2 (Dep d) ^ " -> " ^ d2.label ^ " -> " ^ rules_of_tree2 (Dep d2)) :: rules | |
953 | + | _ -> failwith "rules_of_tree") | |
954 | + | _ -> failwith "rules_of_tree" | |
955 | + | |
956 | + | |
957 | +let parse corpus = | |
958 | + Xlist.iter corpus (fun (sentence_id, text, tree, tokens) -> | |
959 | + if StringSet.mem excluded sentence_id then () else | |
960 | + (try | |
961 | + let tree = parse_tree tree in | |
962 | + if is_parsed tree then () (*print_endline ("PARSED: " ^ sentence_id)*) else ( | |
963 | + print_endline sentence_id; | |
964 | + print_endline text; | |
965 | + print_endline (string_of_tree "" tree); | |
966 | + let forest = split_tree [] tree in | |
967 | + Xlist.iter forest (fun tree -> | |
968 | + (* print_endline ("\n" ^ string_of_tree "" tree); *) | |
969 | + let rules = rules_of_tree [] tree in | |
970 | + Xlist.iter rules print_endline)) | |
971 | + with e -> | |
972 | + print_endline sentence_id; | |
973 | + print_endline text; | |
974 | + print_endline (string_of_tree "" tree); | |
975 | + print_endline (Printexc.to_string e))) | |
976 | + | |
977 | +let extract_rules corpus = | |
978 | + Xlist.fold corpus StringQMap.empty (fun qmap (sentence_id, text, tree, tokens) -> | |
979 | + if StringSet.mem excluded sentence_id then qmap else | |
980 | + (try | |
981 | + let tree = parse_tree tree in | |
982 | + if is_parsed tree then StringQMap.add qmap "PARSED" else ( | |
983 | + let forest = split_tree [] tree in | |
984 | + Xlist.fold forest qmap (fun qmap tree -> | |
985 | + let rules = rules_of_tree [] tree in | |
986 | + Xlist.fold rules qmap StringQMap.add)) | |
987 | + with e -> StringQMap.add qmap (Printexc.to_string e))) | |
... | ... |
corpora/data/OrdNumber.tab
0 → 100644
1 | +ostatni | |
2 | +przedostatni | |
3 | +pierwszy | |
4 | +drugi | |
5 | +trzeci | |
6 | +czwarty | |
7 | +piąty | |
8 | +szósty | |
9 | +siódmy | |
10 | +ósmy | |
11 | +dziewiąty | |
12 | +dziesiąty | |
13 | +jedenasty | |
14 | +dwunasty | |
15 | +trzynasty | |
16 | +czternasty | |
17 | +piętnasty | |
18 | +szesnasty | |
19 | +siedemnasty | |
20 | +osiemnasty | |
21 | +dziewiętnasty | |
22 | +dwudziesty | |
23 | +trzydziesty | |
24 | +czterdziesty | |
25 | +pięćdziesiąty | |
26 | +sześćdziesiąty | |
27 | +siedemdziesiąty | |
28 | +osiemdziesiąty | |
29 | +dziewięćdziesiąty | |
30 | +setny | |
31 | +dwusetny | |
32 | +trzysetny | |
33 | + | |
... | ... |
corpora/data/OrdNumberCompound.tab
0 → 100644
corpora/data/OrdNumberUnit.tab
0 → 100644
corpora/data/both-correct.tab
0 → 100644
1 | +U . S . U.S. | |
2 | +... . . . | |
3 | +.. . . | |
4 | +(...) ( . . . ) | |
5 | +?! ? ! | |
6 | +!!! ! ! ! | |
7 | +??? ? ? ? | |
8 | +.... . . . . | |
9 | +?... ? . . . | |
10 | +,, , , | |
11 | +?? ? ? | |
12 | +S . A S.A | |
13 | +...? . . . ? | |
14 | +..... . . . . . | |
15 | +!!!! ! ! ! ! | |
16 | +!! ! ! | |
17 | +!... ! . . . | |
18 | +[...] [ . . . ] | |
19 | +’’ . ’ ’ . | |
20 | +[..] [ . . ] | |
21 | +....... . . . . . . . | |
22 | +…? … ? | |
23 | +(*) ( * ) | |
24 | +***** * * * * * | |
25 | +[+] [ + ] | |
26 | +[-] [ - ] | |
27 | +[?] [ ? ] | |
28 | +1975-1998 1975 - 1998 | |
29 | +’’ ’ ’ | |
30 | +P . S P.S | |
31 | +2-3 2 - 3 | |
32 | +17.00 17 . 00 | |
33 | +...... . . . . . . | |
34 | +16.00 16 . 00 | |
35 | +!? ! ? | |
36 | +2:0 2 : 0 | |
37 | +22.00 22 . 00 | |
38 | +1:0 1 : 0 | |
39 | +........ . . . . . . . . | |
40 | +???? ? ? ? ? | |
41 | +30-40 30 - 40 | |
42 | +3-4 3 - 4 | |
43 | +20.00 20 . 00 | |
44 | +1:1 1 : 1 | |
45 | +14.00 14 . 00 | |
46 | +13.00 13 . 00 | |
47 | +12.00 12 . 00 | |
48 | +1-3 1 - 3 | |
49 | +>> > > | |
50 | +7-9 7 - 9 | |
51 | +3-5 3 - 5 | |
52 | +22.30 22 . 30 | |
53 | +18.30 18 . 30 | |
54 | +18.00 18 . 00 | |
55 | +16.30 16 . 30 | |
56 | +13.30 13 . 30 | |
57 | +11.00 11 . 00 | |
58 | +10.30 10 . 30 | |
59 | +(?) ( ? ) | |
60 | +8.00 8 . 00 | |
61 | +6:4 6 : 4 | |
62 | +4-5 4 - 5 | |
63 | +3:0 3 : 0 | |
64 | +20-30 20 - 30 | |
65 | +2-4 2 - 4 | |
66 | +19.30 19 . 30 | |
67 | +14.30 14 . 30 | |
68 | +**** * * * * | |
69 | +*** * * * | |
70 | +(-) ( - ) | |
71 | +(!) ( ! ) | |
72 | +’80 ’ 80 | |
73 | +k . p . c k.p.c | |
74 | +(++) ( + + ) | |
75 | +(--) ( -- ) | |
76 | +(..) ( . . ) | |
77 | +(?!) ( ? ! ) | |
78 | +(…) ( … ) | |
79 | +-10 - 10 | |
80 | +-124 - 124 | |
81 | +-17 - 17 | |
82 | +-2007 - 2007 | |
83 | +-22 - 22 | |
84 | +-23 - 23 | |
85 | +-367 - 367 | |
86 | +-40 - 40 | |
87 | +-5 - 5 | |
88 | +/.../ / . . . / | |
89 | +0,05-1,0 0,05 - 1,0 | |
90 | +0,05-1,5 0,05 - 1,5 | |
91 | +0,5–1,0 0,5 – 1,0 | |
92 | +0,5—1 0,5 — 1 | |
93 | +0,9-2,75 0,9 - 2,75 | |
94 | +0-46 855 -45 -26 ) . 0-46 855-45-26 ) . | |
95 | +0-5 0 - 5 | |
96 | +00:39 00 : 39 | |
97 | +01:26 01 : 26 | |
98 | +01:40 01 : 40 | |
99 | +03.03. 03 . 03 . | |
100 | +04:02 04 : 02 | |
101 | +05:10 05 : 10 | |
102 | +06:15 06 : 15 | |
103 | +06:37 06 : 37 | |
104 | +07:43 07 : 43 | |
105 | +07:54 07 : 54 | |
106 | +09:56 09 : 56 | |
107 | +0:00 0 : 00 | |
108 | +0:30 0 : 30 | |
109 | +0:35 0 : 35 | |
110 | +0:5 0 : 5 | |
111 | +1,24 1 , 24 | |
112 | +1,5-2 1,5 - 2 | |
113 | +1,50-2 1 , 50 - 2 | |
114 | +1,6-1,8 1,6 - 1,8 | |
115 | +1-11 1 - 11 | |
116 | +1-17 1 - 17 | |
117 | +1-23 1 - 23 | |
118 | +1-8 1 - 8 | |
119 | +1.01.1993 1 . 01 . 1993 | |
120 | +1.04. 1 . 04 . | |
121 | +1.04.286 1 . 04 . 286 | |
122 | +1.12 1 . 12 | |
123 | +1.3 1 . 3 | |
124 | +10-11 10 - 11 | |
125 | +10-13 10 - 13 | |
126 | +10-14 10 - 14 | |
127 | +10-15 10 - 15 | |
128 | +10-20 10 - 20 | |
129 | +10.10.1995 10 . 10 . 1995 | |
130 | +10.20 10 . 20 | |
131 | +10 - proc 10-proc | |
132 | +100 - % 100-% | |
133 | +100-110 100 - 110 | |
134 | +1000-1500 1000 - 1500 | |
135 | +10–15 10 – 15 | |
136 | +10—12 10 — 12 | |
137 | +10—15 10 — 15 | |
138 | +11-12 11 - 12 | |
139 | +11.10 11 . 10 | |
140 | +11.X.2001 11 . X . 2001 | |
141 | +1150-1250 1150 - 1250 | |
142 | +11:39 11 : 39 | |
143 | +12,8-12,9 12,8 - 12,9 | |
144 | +12-16 12 - 16 | |
145 | +12-17 12 - 17 | |
146 | +12-24 12 - 24 | |
147 | +12.07.1982 12 . 07 . 1982 | |
148 | +12.09. 12 . 09 . | |
149 | +12.20 12 . 20 | |
150 | +12.25 12 . 25 | |
151 | +12.28 12 . 28 | |
152 | +12.5 . 12 . 5 . | |
153 | +12.50 12 . 50 | |
154 | +120-140 120 - 140 | |
155 | +1200-1259 1200 - 1259 | |
156 | +120–140 120 – 140 | |
157 | +120–410 120 – 410 | |
158 | +1230-1246 1230 - 1246 | |
159 | +1248–60 1248 – 60 | |
160 | +1253-1810 1253 - 1810 | |
161 | +1256-1258 1256 - 1258 | |
162 | +1288–90 1288 – 90 | |
163 | +12:15 12 : 15 | |
164 | +12:29 12 : 29 | |
165 | +12:52 12 : 52 | |
166 | +13-17 13 - 17 | |
167 | +13-23 13- 23 | |
168 | +13-35 13 - 35 | |
169 | +13.01.1228 13 . 01 . 1228 | |
170 | +13.15 13 . 15 | |
171 | +13.28 13 . 28 | |
172 | +13.40 13 . 40 | |
173 | +1333-70 1333 - 70 | |
174 | +1340-1405 1340 - 1405 | |
175 | +1394-1831 1394 - 1831 | |
176 | +13:02 13 : 02 | |
177 | +13:06 13 : 06 | |
178 | +13:39 13 : 39 | |
179 | +13–14 13 – 14 | |
180 | +14,36 14 , 36 | |
181 | +14-11 14 - 11 | |
182 | +14-18 14 - 18 | |
183 | +14.00-16.00 14 . 00 - 16 . 00 | |
184 | +14.01.1928 14 . 01 . 1928 | |
185 | +14.07.1995 14 . 07 . 1995 | |
186 | +14.12 14 . 12 | |
187 | +14.38 14 . 38 | |
188 | +14.50 14 . 50 | |
189 | +14.X.1954 14 . X . 1954 | |
190 | +1400-1600 1400 - 1600 | |
191 | +142-131 142 - 131 | |
192 | +1466–1772 1466 – 1772 | |
193 | +1486-93 1486 - 93 | |
194 | +14:11 14 : 11 | |
195 | +14:54 14 : 54 | |
196 | +15,22 15 , 22 | |
197 | +15-10 15 - 10 | |
198 | +15-17 15 - 17 | |
199 | +15-20 15 - 20 | |
200 | +15-25 15 - 25 | |
201 | +15.15 15 . 15 | |
202 | +15.40 15 . 40 | |
203 | +15.46 15 . 46 | |
204 | +15.50 15 . 50 | |
205 | +150-180 150 - 180 | |
206 | +150-300 150- 300 | |
207 | +150–180 150 – 180 | |
208 | +1511-21 1511 - 21 | |
209 | +1558-62 1558 - 62 | |
210 | +1566-82 1566 - 82 | |
211 | +1582-1727 1582 - 1727 | |
212 | +1585-1590 1585 - 1590 | |
213 | +15:10 15 : 10 | |
214 | +15:18 15 : 18 | |
215 | +15:48 15 : 48 | |
216 | +15:53 15 : 53 | |
217 | +15:55 15 : 55 | |
218 | +15—20 15 — 20 | |
219 | +16.15 16 . 15 | |
220 | +1600–03 1600 – 03 | |
221 | +161 c 161c | |
222 | +1615-17 1615 - 17 | |
223 | +1618-22 1618 - 22 | |
224 | +1643-1670 1643 - 1670 | |
225 | +1687–1716 1687 – 1716 | |
226 | +1691-1692 1691 - 1692 | |
227 | +1699-1766 1699 - 1766 | |
228 | +16:18 16 : 18 | |
229 | +16:24 16 : 24 | |
230 | +16:30 16 : 30 | |
231 | +16–23 16 – 23 | |
232 | +17.03 17 . 03 | |
233 | +17.05 17 . 05 | |
234 | +17.11 17 . 11 | |
235 | +17.15 17 . 15 | |
236 | +17.34 17 . 34 | |
237 | +17.40 17 . 40 | |
238 | +17.X.64 17 . X . 64 | |
239 | +1709-1713 1709 - 1713 | |
240 | +1727–28 1727 – 28 | |
241 | +1770-84 1770 - 84 | |
242 | +1777–86 1777 – 86 | |
243 | +1797-1805 1797 - 1805 | |
244 | +1797-1863 1797 - 1863 | |
245 | +1799–1873 1799 – 1873 | |
246 | +17:25 17 : 25 | |
247 | +17:26 17 : 26 | |
248 | +17:35 17 : 35 | |
249 | +17:40 17 : 40 | |
250 | +18-20 18 - 20 | |
251 | +18.09. 18 . 09 . | |
252 | +18.15 18 . 15 | |
253 | +18.50 18 . 50 | |
254 | +1809-1814 1809 - 1814 | |
255 | +1809–1881 1809 – 1881 | |
256 | +1812–23 1812 – 23 | |
257 | +1821-1914 1821 - 1914 | |
258 | +1822-24 1822 - 24 | |
259 | +1824-1891 1824 - 1891 | |
260 | +1829–1907 1829 – 1907 | |
261 | +1830–36 1830 – 36 | |
262 | +1831-1846 1831 - 1846 | |
263 | +1831-1880 1831 - 1880 | |
264 | +1839–41 1839 – 41 | |
265 | +1848-49 1848 - 49 | |
266 | +1848–1931 1848 – 1931 | |
267 | +1850-1920 1850 - 1920 | |
268 | +1855-1940 1855 - 1940 | |
269 | +1857–60 1857 – 60 | |
270 | +1858–61 1858 – 61 | |
271 | +1859–1913 1859 – 1913 | |
272 | +1859–1922 1859 – 1922 | |
273 | +1862-1951 1862 - 1951 | |
274 | +1865-1940 1865 - 1940 | |
275 | +1870-1914 1870 - 1914 | |
276 | +1871-1893 1871 - 1893 | |
277 | +1874-1959 1874 - 1959 | |
278 | +1875-76 1875 - 76 | |
279 | +1875–1975 1875 – 1975 | |
280 | +1876-1917 1876 - 1917 | |
281 | +1878-1949 1878 - 1949 | |
282 | +1879-1885 1879 - 1885 | |
283 | +1879–98 1879 – 98 | |
284 | +1883-1913 1883 - 1913 | |
285 | +1886-87 1886 - 87 | |
286 | +1893-1971 1893 - 1971 | |
287 | +1894-1972 1894 - 1972 | |
288 | +1894-97 1894 - 97 | |
289 | +1895-1968 1895 - 1968 | |
290 | +1896-1901 1896 - 1901 | |
291 | +1896-1952 1896 - 1952 | |
292 | +1896-1962 1896 - 1962 | |
293 | +1897-1936 1897 - 1936 | |
294 | +1898-1901 1898 - 1901 | |
295 | +18:22 18 : 22 | |
296 | +18:24 18 : 24 | |
297 | +18:30 18 : 30 | |
298 | +18:48 18 : 48 | |
299 | +18–20 18 – 20 | |
300 | +18–24 18 – 24 | |
301 | +18–28 18 – 28 | |
302 | +19,3 19 , 3 | |
303 | +19-2 19 - 2 | |
304 | +19-20 19 - 20 | |
305 | +19-21 19 - 21 | |
306 | +19-7 19 - 7 | |
307 | +19.09 19 . 09 | |
308 | +19.12.1956 19 . 12 . 1956 | |
309 | +19.2.—20.3. 19 . 2 . — 20 . 3 . | |
310 | +19.20 19 . 20 | |
311 | +19.25 19 . 25 | |
312 | +19.27 19 . 27 | |
313 | +1900-2000 1900 - 2000 | |
314 | +1901-49 1901 - 49 | |
315 | +1905-1928 1905 - 1928 | |
316 | +1905-40 1905 - 40 | |
317 | +1906-07 1906 - 07 | |
318 | +1906-1907 1906 - 1907 | |
319 | +1906-1979 1906 - 1979 | |
320 | +1906-1980 1906 - 1980 | |
321 | +1906–59 1906 – 59 | |
322 | +1907-1983 1907 - 1983 | |
323 | +1907–09 1907 – 09 | |
324 | +1909-1987 1909 - 1987 | |
325 | +1910-1963 1910 - 1963 | |
326 | +1912-13 1912 - 13 | |
327 | +1913-1983 1913 - 1983 | |
328 | +1913-21 1913 - 21 | |
329 | +1914-1945 1914 - 1945 | |
330 | +1915–30 1915 – 30 | |
331 | +1917-1919 1917 - 1919 | |
332 | +1918-1919 1918 - 1919 | |
333 | +1918-1922 1918 - 1922 | |
334 | +1918-1932 1918 - 1932 | |
335 | +1918-9 1918 - 9 | |
336 | +1919-1932 1919 - 1932 | |
337 | +1920-1932 1920 - 1932 | |
338 | +1922-36 1922 - 36 | |
339 | +1928-1929 1928 - 1929 | |
340 | +1929-1933 1929 - 1933 | |
341 | +1929-1939 1929 - 1939 | |
342 | +1931-39 1931 - 39 | |
343 | +1934-1939 1934 - 1939 | |
344 | +1937-39 1937 - 39 | |
345 | +1938-1957 1938 - 1957 | |
346 | +1938-1978 1938 - 1978 | |
347 | +1939-1945 1939 - 1945 | |
348 | +1941–44 1941 – 44 | |
349 | +1942-1944 1942 - 1944 | |
350 | +1944-48 1944 - 48 | |
351 | +1944-89 1944 - 89 | |
352 | +1945-1965 1945 - 1965 | |
353 | +1947-48 1947 - 48 | |
354 | +1947-49 1947 - 49 | |
355 | +1948-49 1948 - 49 | |
356 | +1948–49 1948 – 49 | |
357 | +1950-52 1950 - 52 | |
358 | +1951-77 1951 - 77 | |
359 | +1952-1954 1952 - 1954 | |
360 | +1952-57 1952 - 57 | |
361 | +1955-1991 1955 - 1991 | |
362 | +1960-1970 1960 - 1970 | |
363 | +1960-1993 1960 - 1993 | |
364 | +1962-1980 1962 - 1980 | |
365 | +1962-66 1962 - 66 | |
366 | +1963-1967 1963 - 1967 | |
367 | +1966—1969 1966 — 1969 | |
368 | +1968-1980 1968 - 1980 | |
369 | +1969-71 1969 - 71 | |
370 | +1970-1972 1970 - 1972 | |
371 | +1970–1973 1970 – 1973 | |
372 | +1970–1975 1970 – 1975 | |
373 | +1971-1974 1971 - 1974 | |
374 | +1971-1980 1971 - 1980 | |
375 | +1971–79 1971 – 79 | |
376 | +1973-75 1973 - 75 | |
377 | +1975:132 1975 : 132 | |
378 | +1975:133 1975 : 133 | |
379 | +1978-2001 1978 - 2001 | |
380 | +1979-81 1979 - 81 | |
381 | +1979-83 1979 - 83 | |
382 | +1980-1981 1980 - 1981 | |
383 | +1980-1982 1980 - 1982 | |
384 | +1980-1989 1980 - 1989 | |
385 | +1981-1984 1981 - 1984 | |
386 | +1981-1990 1981 - 1990 | |
387 | +1982-1983 1982 - 1983 | |
388 | +1982-1984 1982 - 1984 | |
389 | +1982–91 1982 – 91 | |
390 | +1982–92 1982 – 92 | |
391 | +1983-1987 1983 - 1987 | |
392 | +1983-87 1983 - 87 | |
393 | +1984-1985 1984 - 1985 | |
394 | +1984-1991 1984 - 1991 | |
395 | +1987–95 1987 – 95 | |
396 | +1989-2009 1989 - 2009 | |
397 | +1989-93 1989 - 93 | |
398 | +1990-1993 1990 - 1993 | |
399 | +1990-1994 1990 - 1994 | |
400 | +1990-93 1990 - 93 | |
401 | +1991-1993 1991 - 1993 | |
402 | +1993-2008 1993 - 2008 | |
403 | +1994-1995 1994 - 1995 | |
404 | +1994-2000 1994 - 2000 | |
405 | +1994-95 1994 - 95 | |
406 | +1995-1996 1995 - 1996 | |
407 | +1997-1998 1997 - 1998 | |
408 | +1997-2001 1997 - 2001 | |
409 | +1997–2001 1997 – 2001 | |
410 | +1999-2000 1999 - 2000 | |
411 | +1999-2001 1999 - 2001 | |
412 | +1:00 1 : 00 | |
413 | +1:10 1 : 10 | |
414 | +1:100 000 . 1 : 100 000 . | |
415 | +1:2 1 : 2 | |
416 | +1:25 1 : 25 | |
417 | +1:30000 1 : 30000 | |
418 | +1:4 1 : 4 | |
419 | +1:50 1 : 50 | |
420 | +1–3 1 – 3 | |
421 | +1–8 1 – 8 | |
422 | +2-11 2 - 11 | |
423 | +2-7 2 - 7 | |
424 | +2.00 2 . 00 | |
425 | +2.2 2 . 2 | |
426 | +2.30 2 . 30 | |
427 | +20,12 20 , 12 | |
428 | +20-26 20 - 26 | |
429 | +20.10.2005 20 . 10 . 2005 | |
430 | +20.30 20 . 30 | |
431 | +200-300 200 - 300 | |
432 | +2000-2006 2000 - 2006 | |
433 | +20000-30000 20000 - 30000 | |
434 | +2001-2002 2001 - 2002 | |
435 | +2001-2006 2001 - 2006 | |
436 | +2003-2006 2003 - 2006 | |
437 | +2004–2006 2004 – 2006 | |
438 | +2008-2018 2008 - 2018 | |
439 | +2025-30 2025 - 30 | |
440 | +20:13 20 : 13 | |
441 | +20:25 20 : 25 | |
442 | +20:47 20 : 47 | |
443 | +20—30 20 — 30 | |
444 | +21-23 21 - 23 | |
445 | +21.1.—18.2. 21 . 1 . — 18 . 2 . | |
446 | +21.12.1796 21 . 12 . 1796 | |
447 | +211-215 211 - 215 | |
448 | +21:11 21 : 11 | |
449 | +21:12 21 : 12 | |
450 | +21:21 21 : 21 | |
451 | +21:25 21 : 25 | |
452 | +21:30 21 : 30 | |
453 | +21:32 21 : 32 | |
454 | +21:34 21 : 34 | |
455 | +22.02 22 . 02 | |
456 | +22.05.1689 22 . 05 . 1689 | |
457 | +22.05.1859 22 . 05 . 1859 | |
458 | +22.05.2002 22 . 05 . 2002 | |
459 | +22.5.—21.6. 22 . 5 . — 21 . 6 . | |
460 | +22.6.—22.7. 22 . 6 . — 22 . 7 . | |
461 | +22:07 22 : 07 | |
462 | +22:12 22 : 12 | |
463 | +22:33 22 : 33 | |
464 | +22:51 22 : 51 | |
465 | +23.00 23 . 00 | |
466 | +23.11.—21.12. 23 . 11 . — 21 . 12 . | |
467 | +230–280 230 – 280 | |
468 | +231-198 231 - 198 | |
469 | +23:00 23 : 00 | |
470 | +24-26 24 - 26 | |
471 | +24-48 24 - 48 | |
472 | +24.00 24 . 00 | |
473 | +24.12.1926 24 . 12 . 1926 | |
474 | +24.II.1993 24 . II . 1993 | |
475 | +24.VII.1997 24 . VII . 1997 | |
476 | +240-249 240 - 249 | |
477 | +242206083.2 242206083 . 2 | |
478 | +25-30 25 - 30 | |
479 | +25:19 25 : 19 | |
480 | +25:9 25 : 9 | |
481 | +25–27 25 – 27 | |
482 | +25–30 25 – 30 | |
483 | +26.06. 26 . 06 . | |
484 | +27,46 27 , 46 | |
485 | +27.01.99 27 . 01 . 99 | |
486 | +27.06. 27 . 06 . | |
487 | +27.06.1997 27 . 06 . 1997 | |
488 | +270-300 270 - 300 | |
489 | +286–298 286 – 298 | |
490 | +28:13 28 : 13 | |
491 | +29.06.1929 29 . 06 . 1929 | |
492 | +2:1 2 : 1 | |
493 | +2:13 2 : 13 | |
494 | +2:2 2 : 2 | |
495 | +2:26 2 : 26 | |
496 | +2:27 2 : 27 | |
497 | +2:3 2 : 3 | |
498 | +2:35 2 : 35 | |
499 | +2–3 2 – 3 | |
500 | +2—3 2 — 3 | |
501 | +2—4 2 — 4 | |
502 | +3-10 3 - 10 | |
503 | +3-16 3 - 16 | |
504 | +3-2 3 - 2 | |
505 | +3-7 3 - 7 | |
506 | +3-8 3 - 8 | |
507 | +3.00 3 . 00 | |
508 | +3.15 3 . 15 | |
509 | +3.29 , 46 3.29,46 | |
510 | +3.30 , 47 3.30,47 | |
511 | +3.32 , 91 3.32,91 | |
512 | +3.47 3 . 47 | |
513 | +30 - proc 30-proc | |
514 | +30-31 30 - 31 | |
515 | +30-35 30 - 35 | |
516 | +30.03.1993 30 . 03 . 1993 | |
517 | +300-350 300 - 350 | |
518 | +300-400 300 - 400 | |
519 | +31.03.1146 31 . 03 . 1146 | |
520 | +31.10.97 31 . 10 . 97 | |
521 | +32-40 32 - 40 | |
522 | +32-76 32 - 76 | |
523 | +333-4 333 - 4 | |
524 | +34-64 34 - 64 | |
525 | +34:19 34 : 19 | |
526 | +35-38 35 - 38 | |
527 | +3:11 3 : 11 | |
528 | +3:25 3 : 25 | |
529 | +3:6 3 : 6 | |
530 | +3–4 3 – 4 | |
531 | +4-25 4 - 25 | |
532 | +4-7 4 - 7 | |
533 | +4.00 4 . 00 | |
534 | +4.07.1610 4 . 07 . 1610 | |
535 | +4.07.1890 4 . 07 . 1890 | |
536 | +4.10 4 . 10 | |
537 | +4.12 4 . 12 | |
538 | +4.30 4 . 30 | |
539 | +4.9 4 . 9 | |
540 | +428-348 428 - 348 | |
541 | +43-69 43 - 69 | |
542 | +4:2 4 : 2 | |
543 | +4:3 4 : 3 | |
544 | +4–5 4 – 5 | |
545 | +4–6 4 – 6 | |
546 | +5 % 5% | |
547 | +5,16 5 , 16 | |
548 | +5-2 5 - 2 | |
549 | +5.13 5 . 13 | |
550 | +5.16.26 5 . 16 . 26 | |
551 | +5.21 5 . 21 | |
552 | +5.23 5 . 23 | |
553 | +5.30 5 . 30 | |
554 | +5.40 5 . 40 | |
555 | +5.55 5 . 55 | |
556 | +50-100 50 - 100 | |
557 | +50-56 50 - 56 | |
558 | +50-69 50 - 69 | |
559 | +53-58 53 - 58 | |
560 | +536–552 536 – 552 | |
561 | +54-56 54 - 56 | |
562 | +5:0 5 : 0 | |
563 | +5:2 5 : 2 | |
564 | +5–10 5 – 10 | |
565 | +5–8 5 – 8 | |
566 | +5—15 5 — 15 | |
567 | +570918348.10 570918348 . 10 | |
568 | +57:56 57 : 56 | |
569 | +6.02 6 . 02 | |
570 | +6.15 6 . 15 | |
571 | +6.38 6 . 38 | |
572 | +60-70 60 - 70 | |
573 | +63:63 63 : 63 | |
574 | +68-75 68 - 75 | |
575 | +6:0 6 : 0 | |
576 | +6–8 6 – 8 | |
577 | +7,15 7 , 15 | |
578 | +7-2 7 - 2 | |
579 | +7-3 7 - 3 | |
580 | +7-5 7 - 5 | |
581 | +7-7 7 - 7 | |
582 | +7.07.2005 7 . 07 . 2005 | |
583 | +7.20 7 . 20 | |
584 | +7.56 7 . 56 | |
585 | +7.8.9 7 . 8 . 9 | |
586 | +74:71 74 : 71 | |
587 | +75-139 75 - 139 | |
588 | +7:0 7 : 0 | |
589 | +8-12 8 - 12 | |
590 | +8-19 8 - 19 | |
591 | +8-4 8 - 4 | |
592 | +8.02 8 . 02 | |
593 | +8.28 8 . 28 | |
594 | +8.45 8 . 45 | |
595 | +80-100 80 - 100 | |
596 | +80-90 80 - 90 | |
597 | +800-1000 800 - 1000 | |
598 | +80000-130000 80000 - 130000 | |
599 | +85:0 85 : 0 | |
600 | +87:62 87 : 62 | |
601 | +89-109 89 - 109 | |
602 | +8:25 8 : 25 | |
603 | +9-11 9 - 11 | |
604 | +9-12 9 - 12 | |
605 | +9-14 9 - 14 | |
606 | +9.15 9 . 15 | |
607 | +9.40 9 . 40 | |
608 | +9.45 9 . 45 | |
609 | +900-1900 900 - 1900 | |
610 | +93-97 93 - 97 | |
611 | +95-98 95 - 98 | |
612 | +960-1127 960 - 1127 | |
613 | +99,5-99,7 99,5 - 99,7 | |
614 | +9:4 9 : 4 | |
615 | +9–15 9 – 15 | |
616 | +0:0 0 : 0 | |
617 | +0:1 0 : 1 | |
618 | +0:3 0 : 3 | |
619 | +1-10 1 - 10 | |
620 | +1-2 1 - 2 | |
621 | +1-5 1 - 5 | |
622 | +10-12 10 - 12 | |
623 | +10-17 10 - 17 | |
624 | +11.30 11 . 30 | |
625 | +12.30 12 . 30 | |
626 | +14-16 14 - 16 | |
627 | +14-17 14 - 17 | |
628 | +14.20 14 . 20 | |
629 | +15.00 15 . 00 | |
630 | +15.30 15 . 30 | |
631 | +16-17 16 - 17 | |
632 | +16.20 16 . 20 | |
633 | +16:21 16 : 21 | |
634 | +17-20 17 - 20 | |
635 | +17.30 17 . 30 | |
636 | +17.50 17 . 50 | |
637 | +18 . 18. | |
638 | +18-19 18 - 19 | |
639 | +1849–1910 1849 – 1910 | |
640 | +19.00 19 . 00 | |
641 | +1939-1941 1939 - 1941 | |
642 | +1971—1975 1971 — 1975 | |
643 | +1989-1991 1989 - 1991 | |
644 | +1992-96 1992 - 96 | |
645 | +1997-1999 1997 - 1999 | |
646 | +1:5 1 : 5 | |
647 | +2,5-3 2,5 - 3 | |
648 | +2007-2013 2007 - 2013 | |
649 | +21.00 21 . 00 | |
650 | +21.30 21 . 30 | |
651 | +25:21 25 : 21 | |
652 | +26-28 26 - 28 | |
653 | +3.30 3 . 30 | |
654 | +3:3 3 : 3 | |
655 | +4.2.1 4 . 2 . 1 | |
656 | +4:1 4 : 1 | |
657 | +5-10 5 - 10 | |
658 | +5-6 5 - 6 | |
659 | +5-7 5 - 7 | |
660 | +5-8 5 - 8 | |
661 | +5.00 5 . 00 | |
662 | +6-7 6 - 7 | |
663 | +6-8 6 - 8 | |
664 | +6.30 6 . 30 | |
665 | +7 - proc 7-proc | |
666 | +7-13 7 - 13 | |
667 | +7-14 7 - 14 | |
668 | +7.00 7 . 00 | |
669 | +8-10 8 - 10 | |
670 | +8.15 8 . 15 | |
671 | +8.30 8 . 30 | |
672 | +8:2 8 : 2 | |
673 | +9.30 9 . 30 | |
674 | +0:2 0 : 2 | |
675 | +10.00 10 . 00 | |
676 | +2-5 2 - 5 | |
677 | +3:1 3 : 1 | |
678 | +40-50 40 - 50 | |
679 | +4:0 4 : 0 | |
680 | +6.00 6 . 00 | |
681 | +7:6 7 : 6 | |
682 | +8-9 8 - 9 | |
683 | +8:15 8 : 15 | |
684 | +9.00 9 . 00 | |
685 | +?!!! ? ! ! ! | |
686 | +?!... ? ! . . . | |
687 | +północno-wschodniej północno - wschodniej | |
688 | +warmińsko-mazurskie warmińsko - mazurskie | |
689 | +kujawsko-pomorskim kujawsko - pomorskim | |
690 | +północno-wschodnim północno - wschodnim | |
691 | +warmińsko-mazurskiego warmińsko - mazurskiego | |
692 | +warmińsko-mazurskim warmińsko - mazurskim | |
693 | +północno-wschodni północno - wschodni | |
694 | +północno-wschodniego północno - wschodniego | |
695 | +północno-wschodnią północno - wschodnią | |
696 | +kujawsko-pomorskie kujawsko - pomorskie | |
697 | +kujawsko-pomorskiego kujawsko - pomorskiego | |
698 | +kulturalno-oświatowe kulturalno - oświatowe | |
699 | +austro-węgierskiej austro - węgierskiej | |
700 | +austro-węgierską austro - węgierską | |
701 | +XVI-XVII XVI - XVII | |
702 | +XVII-XIX XVII - XIX | |
703 | +XV–XVI XV – XVI | |
704 | +XIII-XV XIII - XV | |
705 | +XIV-XVIII XIV - XVIII | |
706 | +XIX–XX XIX – XX | |
707 | +XI–XV XI – XV | |
708 | +XV-XVI XV - XVI | |
709 | +VI–VIII VI – VIII | |
710 | +SGGW-AR SGGW - AR | |
711 | +IV-V IV - V | |
712 | +III–V III – V | |
713 | +I-III I - III | |
714 | +I-IV I - IV | |
715 | +I-VIII I - VIII | |
716 | +(+) ( + ) | |
717 | +40000-50000 40000 - 50000 | |
718 | +45,72 45 , 72 | |
719 | +45-46 45 - 46 | |
720 | +49-70 49 - 70 | |
721 | +70-75 70 - 75 | |
722 | +71:61 71 : 61 | |
723 | +9.00-16.00 9 . 00 - 16 . 00 | |
724 | +Bielska-Białej Bielska - Białej | |
725 | +Bielsko-Biała Bielsko - Biała | |
726 | +Bim-bam Bim - bam | |
727 | +1.05. 1 . 05 . | |
728 | +1.05 1 . 05 | |
729 | +10.10. 10 . 10 . | |
730 | +10.12. 10 . 12 . | |
731 | +11.12. 11 . 12 . | |
732 | +14.05. 14 . 05 . | |
733 | +21.06. 21 . 06 . | |
734 | +29.09. 29 . 09 . | |
735 | +9.02. 9 . 02 . | |
736 | +Tse-tungiem Tse - tungiem | |
737 | +kulturalno-oświatowym kulturalno - oświatowym | |
738 | +n - ru n-ru | |
739 | +pif-paf pif - paf | |
740 | +serbo - chorwackimi serbo-chorwackimi | |
741 | +gadu-gadu gadu - gadu | |
742 | +p . s p.s | |
743 | +w . c w.c | |
744 | +z - ca z-ca | |
745 | +e.cz e . cz | |
746 | +25-30 25 - 30 | |
747 | +<< < < | |
748 | +?!? ? ! ? | |
749 | +?… ? … | |
750 | +MORTKOWICZ - OLCZAKOWA MORTKOWICZ-OLCZAKOWA | |
751 | +Międzychodzko - Sierakowskiego Międzychodzko-Sierakowskiego | |
752 | +Strona | Format Strona|Format | |
753 | +Wołk - Karczewska Wołk-Karczewska | |
754 | +** * * | |
755 | +0-0 0 - 0 | |
756 | +1-0 1 - 0 | |
757 | +1-1 1 - 1 | |
758 | +165-168 165 - 168 | |
759 | +1727-31 1727 - 31 | |
760 | +17:00 17 : 00 | |
761 | +18-1 18 - 1 | |
762 | +2.14 2 . 14 | |
763 | +20.15 20 . 15 | |
764 | +22.12. - 20.01 . 22 . 12 . - 20 . 01 . | |
765 | +23-1 23 - 1 | |
766 | +25-2 25 - 2 | |
767 | +65-8 65 - 8 | |
768 | +7.30 - 15.30 . 7 . 30 - 15 . 30 . | |
769 | +AWS - UW AWS-UW | |
770 | +Dz . U Dz.U | |
771 | +R . P R.P | |
772 | +S . A . S.A. | |
773 | +S . C S.C | |
774 | +S . T S.T | |
775 | +16.01. 16 . 01 . | |
776 | +niby - romantyczna niby-romantyczna | |
777 | +beta - laktamaz beta-laktamaz | |
778 | +zrobiłe ( a ) m zrobiłe(a)m | |
779 | +zawiodłe ( a ) m zawiodłe(a)m | |
780 | +z - dy z-dy | |
781 | +s . c s.c | |
782 | +c . k c.k | |
783 | +c . o c.o | |
784 | +k . w k.w | |
785 | +10-tys. 10-tys . | |
786 | +400-tys. 400-tys . | |
787 | +P W . PW . | |
788 | +jakże śmy jak że śmy | |
789 | +`99 ` 99 | |
790 | +b . r b.r | |
791 | +talk-show talk - show | |
792 | +tingel-tanglu tingel - tanglu | |
793 | +środkowo - środkowo- | |
794 | +e . w e.w | |
795 | +e ( a ) e(a) | |
796 | +zobaczył ( am zobaczył(a m | |
797 | +P. P . | |
798 | +R . P . R.P. | |
799 | +Ś. Ś . | |
800 | +pomaga my pomaga my | |
801 | +SS ' manie SS'manie | |
802 | +P. O. N . R . P . [ IM | I M ] . MJR . H . D . H P.O.N.R.P.IM.MJR.H.D.H | |
803 | +1-13 1 - 13 | |
804 | +1.300 1 . 300 | |
805 | +11—19 11 — 19 | |
806 | +16–18 16 – 18 | |
807 | +17–21 17 – 21 | |
808 | +18-35 18 - 35 | |
809 | +1960-62 1960 - 62 | |
810 | +1973-1975 1973 - 1975 | |
811 | +2.3 2 . 3 | |
812 | +2000–2006 2000 – 2006 | |
813 | +2000–2007 2000 – 2007 | |
814 | +2006-2007 2006 - 2007 | |
815 | +2007-2010 2007 - 2010 | |
816 | +20–23 20 – 23 | |
817 | +226–236 226 – 236 | |
818 | +270-300 mln 270 - 300 mln | |
819 | +30—45 30 — 45 | |
820 | +4,5 4 , 5 | |
821 | +406.100 406 . 100 | |
822 | +5,5 5 , 5 | |
823 | +56–59 56 – 59 | |
824 | +61–63 61 – 63 | |
825 | +6–11 6 – 11 | |
826 | +one ż oneż | |
827 | +Bielska - Białej Bielska-Białej | |
828 | +B 12 B12 | |
829 | +μ g μg | |
830 | +pięć - sześć pięć-sześć | |
831 | +krio - elektronową krio-elektronową | |
832 | +benzo [ a ] pirenu benzo[a]pirenu | |
833 | +Winfryd - Bonifacy Winfryd-Bonifacy | |
834 | +Staręga - Piasek Staręga-Piasek | |
835 | +N - telopeptyd N-telopeptyd | |
836 | +Mettler - Toledo Mettler-Toledo | |
837 | +McMillan - Scott McMillan-Scott | |
838 | +L - askorbinowego L-askorbinowego | |
839 | +Kędzierzynie - Koźlu Kędzierzynie-Koźlu | |
840 | +Jean - Yves Jean-Yves | |
841 | +Gołota - Lewis Gołota-Lewis | |
842 | +Bielsku - Białej Bielsku-Białej | |
843 | +8 - hydroksychinoliny 8-hydroksychinoliny | |
844 | +- y -y | |
845 | +' ' '' | |
846 | +-- - - | |
847 | +: ) :) | |
848 | +: ) ) ) ) ) ) ) ) ) :))))))))) | |
849 | +: - ) :-) | |
850 | +’ ’ ’’ | |
851 | +' ' '' | |
852 | +2 - propanu 2-propanu | |
853 | +3 - merkaptopropanol -1,2 - diol 3-merkaptopropanol-1,2-diol | |
854 | +niby - pies niby-pies | |
855 | +15 - 15- | |
856 | +15-16-latków 15 - 16-latków | |
857 | +srakie - takie srakie-takie | |
... | ... |
corpora/data/brev.tab
0 → 100644
1 | +b r. br . | |
2 | +n p. np . | |
3 | +m. in. m.in . | |
4 | +t zw. tzw . | |
5 | +p n. pn . | |
6 | +d s. ds . | |
7 | +t zn. tzn . | |
8 | +i t p. itp . | |
9 | +i t d. itd . | |
10 | +N p. Np . | |
11 | +r. r . | |
12 | +ub. ub . | |
13 | +ub. r. ub.r . | |
14 | +o. o. o.o . | |
15 | +m. in. m.in. | |
16 | +p t. pt . | |
17 | +n. p. m. n.p.m . | |
18 | +m. m . | |
19 | +in. in . | |
20 | +p. n. e. p.n.e . | |
21 | +n t. nt . | |
22 | +b m. bm . | |
23 | +p w. pw . | |
24 | +ub. r. ub . r . | |
25 | +ub. r. ub.r. | |
26 | +m kw. mkw . | |
27 | +n. e. n . e . | |
28 | +n. n . | |
29 | +b p. bp . | |
30 | +ś p. śp . | |
31 | +T zw. Tzw . | |
32 | +m. st. m . st . | |
33 | +w w. ww . | |
34 | +w. w. w.w . | |
35 | +d/ s d/s | |
36 | +M. in. M.in . | |
37 | +p. p . | |
38 | +p. o. p.o . | |
39 | +p. t. p.t . | |
40 | +c d. cd . | |
41 | +ś. p. ś.p . | |
42 | +m. st. m.st . | |
43 | +m. in. m . in . | |
44 | +C D. CD . | |
45 | +T zn. Tzn . | |
46 | +t j. tj . | |
47 | +i t p itp | |
48 | +i t d itd | |
49 | +km ² km² | |
50 | +km 2 km2 | |
51 | +m 3 m3 | |
52 | +m 2 m2 | |
53 | +μ m μm | |
54 | +p t pt | |
55 | +b m bm | |
56 | +m / s m/s | |
57 | +m ^2 m^2 | |
58 | +n p np | |
59 | +n. e n.e | |
60 | +w/ m w/m | |
61 | +w/ w w/w | |
62 | +M / s M/s | |
63 | +N T G NTG | |
64 | +dm 3 dm3 | |
65 | +m. in m.in | |
66 | +b r br | |
67 | +M. in. M.in. | |
68 | +C d. Cd . | |
69 | +P W PW | |
70 | +c cm ccm | |
71 | +c. d. n c.d.n | |
72 | +cm 3 . cm 3 . | |
73 | +i te de itede | |
74 | +km. 2 km.2 | |
75 | +cm 2 cm2 | |
76 | +j. m. j . m . | |
77 | +m.in m . in | |
78 | +m.in . m.in. | |
... | ... |
corpora/data/coercions.tab
0 → 100644
1 | +Time State Time,State,stan | |
2 | +Hour State Arg,Time,w Time,State,stan | |
3 | +Hour Time Arg,Time,w | |
4 | +HourNumber State Number,Hour,godzina Arg,Time,w Time,State,stan | |
5 | +HourNumber Time Number,Hour,godzina Arg,Time,w | |
6 | +HourNumber Hour Number,Hour,godzina | |
7 | +Day State Arg,Time,w Time,State,stan | |
8 | +Day Time Arg,Time,w | |
9 | +DayNumber State Number,Day,dzień Arg,Time,w Time,State,stan | |
10 | +DayNumber Time Number,Day,dzień Arg,Time,w | |
11 | +DayNumber Day Number,Day,dzień | |
12 | +Month State Arg,Time,w Time,State,stan | |
13 | +Month Time Arg,Time,w | |
14 | +Year State Arg,Time,w Time,State,stan | |
15 | +Year Time Arg,Time,w | |
16 | +YearNumber State Number,Year,rok Arg,Time,w Time,State,stan | |
17 | +YearNumber Time Number,Year,rok Arg,Time,w | |
18 | +YearNumber Year Number,Year,rok | |
19 | +WeekDay State Arg,Time,w Time,State,stan | |
20 | +WeekDay Time Arg,Time,w | |
21 | +Week State Arg,Time,w Time,State,stan | |
22 | +Week Time Arg,Time,w | |
23 | +TimeOfDay State Arg,Time,w Time,State,stan | |
24 | +TimeOfDay Time Arg,Time,w | |
25 | +TimeOrder State Arg,Time,w Time,State,stan | |
26 | +TimeOrder Time Arg,Time,w | |
27 | +TimeApr Time | |
28 | +TimePoint Time | |
29 | +Street Location | |
30 | +Town Location | |
31 | +miasto Town | |
32 | +miasto Location | |
33 | +Quarter Location | |
34 | +dzielnica Location | |
35 | +OrganizationType Division | |
36 | +OrganizationName Division | |
37 | +Profession Person | |
38 | +nazwisko Person | |
39 | +LastName Person | |
40 | +imię Person | |
41 | +FirstName Person | |
42 | +Service Instance | |
43 | + | |
... | ... |
corpora/data/colours.tab
0 → 100644
corpora/data/eniam-correct.tab
0 → 100644
1 | +:)) :) ) | |
2 | +:))) :) ) ) | |
3 | +:-)) :-) ) | |
4 | +;)) ;) ) | |
5 | +AWS - AWS- | |
6 | +czterdziesto - czterdziesto- | |
7 | +dwudziesto - dwudziesto- | |
8 | +długo - długo- | |
9 | +kilku - kilku- | |
10 | +osiemnasto - osiemnasto- | |
11 | +ośmio - ośmio- | |
12 | +pięcio - pięcio- | |
13 | +pięcio -- pięcio- - | |
14 | +piętnasto -- piętnasto- - | |
15 | +przed - przed- | |
16 | +pseudo - pseudo- | |
17 | +˝ 1 ˝1 | |
18 | +˝ albo ˝ . ˝albo˝ . | |
19 | +˝ lub ˝ ˝lub˝ | |
20 | +’70 ’ 70 | |
21 | +dwu -- dwu- - | |
22 | +trzy -- trzy- - | |
23 | +jedno - jedno- | |
24 | +-89 - 89 | |
25 | +dwu - dwu- | |
26 | +1-2 -3 1 - 2-3 | |
27 | +-7,16 - 7 , 16 | |
28 | +rowecki@wp.pl rowecki @ wp . pl | |
29 | +:-) : - ) | |
30 | +- Ekon -Ekon | |
31 | +- Sadat -Sadat | |
32 | +- wsch -wsch | |
33 | +28-29 28 -29 | |
34 | +Praca ˝ Praca˝ | |
35 | +marzycielem - marzycielem- | |
36 | +:)))))) :) ) ) ) ) ) | |
37 | +EKO-U EKO - U | |
38 | +używane . ˝ używane˝ . | |
39 | +kuba.rowecki@wp.pl kuba . rowecki @ wp . pl | |
40 | +-20 - 20 | |
41 | + | |
... | ... |
corpora/data/excluded.tab
0 → 100644
corpora/data/letni.tab
0 → 100644
1 | +1 - klasowa 1-klasowa | |
2 | +1 - letni 1-letni | |
3 | +1 - majowy 1-majowy | |
4 | +10 - dniowe 10-dniowe | |
5 | +10 - dniową 10-dniową | |
6 | +10 - hektarowe 10-hektarowe | |
7 | +10 - kilometrowa 10-kilometrowa | |
8 | +10 - krotnie 10-krotnie | |
9 | +10 - letni 10-letni | |
10 | +10 - letnich 10-letnich | |
11 | +10 - letniego 10-letniego | |
12 | +10 - osobowy 10-osobowy | |
13 | +10 - procentowy 10-procentowy | |
14 | +10 - procentowych 10-procentowych | |
15 | +10 - punktowy 10-punktowy | |
16 | +100 - krotnie 100-krotnie | |
17 | +100 - metrowe 100-metrowe | |
18 | +100 - milionową 100-milionową | |
19 | +100 - tysięczny 100-tysięczny | |
20 | +11 - dniowej 11-dniowej | |
21 | +11 - krotnie 11-krotnie | |
22 | +11 - letni 11-letni | |
23 | +11 - letnia 11-letnia | |
24 | +11 - letniego 11-letniego | |
25 | +11 - metrową 11-metrową | |
26 | +11 - minutowy 11-minutowy | |
27 | +11 - procentowe 11-procentowe | |
28 | +12 - dniową 12-dniową | |
29 | +12 - godzinnej 12-godzinnej | |
30 | +12 - letnia 12-letnia | |
31 | +12 - metrowej 12-metrowej | |
32 | +120 - metrowej 120-metrowej | |
33 | +13 - letni 13-letni | |
34 | +13 - letnia 13-letnia | |
35 | +13 - miesięcznego 13-miesięcznego | |
36 | +13 - miesięczną 13-miesięczną | |
37 | +14 - letnia 14-letnia | |
38 | +14 - letnich 14-letnich | |
39 | +14 - letnią 14-letnią | |
40 | +15 - dniowy 15-dniowy | |
41 | +15 - letnią 15-letnią | |
42 | +15,5 - tysięczna 15,5-tysięczna | |
43 | +16 - bitowe 16-bitowe | |
44 | +16 - dniowej 16-dniowej | |
45 | +16 - letni 16-letni | |
46 | +16 - procentowe 16-procentowe | |
47 | +16 - stopniowej 16-stopniowej | |
48 | +17 - calowe 17-calowe | |
49 | +17 - letnie 17-letnie | |
50 | +17 - letnią 17-letnią | |
51 | +17 - osobową 17-osobową | |
52 | +18 - drużynowa 18-drużynowa | |
53 | +18 - godzinne 18-godzinne | |
54 | +18 - letnia 18-letnia | |
55 | +18 - letnie 18-letnie | |
56 | +18 - letniego 18-letniego | |
57 | +180 - tysięcznego 180-tysięcznego | |
58 | +19 - calowy 19-calowy | |
59 | +19 - kilometrowej 19-kilometrowej | |
60 | +19 - letnich 19-letnich | |
61 | +19 - letnie 19-letnie | |
62 | +19 - letniemu 19-letniemu | |
63 | +190 - letnią 190-letnią | |
64 | +2 - godzinny 2-godzinny | |
65 | +2 - letnia 2-letnia | |
66 | +2 - letnie 2-letnie | |
67 | +2 - metrowy 2-metrowy | |
68 | +2 - odcinkowy 2-odcinkowy | |
69 | +2 - oddziałowa 2-oddziałowa | |
70 | +2 - osobowa 2-osobowa | |
71 | +2,5 - kilogramowy 2,5-kilogramowy | |
72 | +2,5 - letnią 2,5-letnią | |
73 | +2-3 - stopniowe 2 - 3-stopniowe | |
74 | +20 - letniej 20-letniej | |
75 | +20 - letnią 20-letnią | |
76 | +20 - metrowa 20-metrowa | |
77 | +20 - osobowa 20-osobowa | |
78 | +20 - osobowy 20-osobowy | |
79 | +20 - procentowej 20-procentowej | |
80 | +20 - stopniowy 20-stopniowy | |
81 | +21 - letnia 21-letnia | |
82 | +21 - letniej 21-letniej | |
83 | +22 - osobowa 22-osobowa | |
84 | +23 - calowy 23-calowy | |
85 | +23 - letniego 23-letniego | |
86 | +24 - bitowy 24-bitowy | |
87 | +24 - godzinnego 24-godzinnego | |
88 | +24 - godzinnych 24-godzinnych | |
89 | +24 - kondygnacjowej 24-kondygnacjowej | |
90 | +25 - letnia 25-letnia | |
91 | +25 - letnią 25-letnią | |
92 | +25 - metrowej 25-metrowej | |
93 | +25 - procentowy 25-procentowy | |
94 | +26 - letniemu 26-letniemu | |
95 | +28 - letniego 28-letniego | |
96 | +29 - letni 29-letni | |
97 | +29 - letniego 29-letniego | |
98 | +3 - dniowe 3-dniowe | |
99 | +3 - letni 3-letni | |
100 | +3 - letnia 3-letnia | |
101 | +3 - letnie 3-letnie | |
102 | +3 - letnią 3-letnią | |
103 | +3 - miesięczne 3-miesięczne | |
104 | +3 - miesięczny 3-miesięczny | |
105 | +3 - nawowy 3-nawowy | |
106 | +3 - osobowych 3-osobowych | |
107 | +3 - procentowe 3-procentowe | |
108 | +3,5 - kilogramowe 3,5-kilogramowe | |
109 | +30 - krotnego 30-krotnego | |
110 | +30 - letnią 30-letnią | |
111 | +30 - procentowych 30-procentowych | |
112 | +300 - kilogramowa 300-kilogramowa | |
113 | +300 - kilogramowego 300-kilogramowego | |
114 | +300 - tysięcznej 300-tysięcznej | |
115 | +31 - letni 31-letni | |
116 | +31 - letnia 31-letnia | |
117 | +31 - letniego 31-letniego | |
118 | +32 - letnia 32-letnia | |
119 | +32 - letnią 32-letnią | |
120 | +33 - letniemu 33-letniemu | |
121 | +34 - letnia 34-letnia | |
122 | +34 - letniego 34-letniego | |
123 | +35 - letnia 35-letnia | |
124 | +35 - letnią 35-letnią | |
125 | +36 - centymetrowa 36-centymetrowa | |
126 | +36 - letni 36-letni | |
127 | +36 - letnia 36-letnia | |
128 | +38 - letniego 38-letniego | |
129 | +39 - letni 39-letni | |
130 | +4 - drzwiowe 4-drzwiowe | |
131 | +4 - kołowy 4-kołowy | |
132 | +4 - letni 4-letni | |
133 | +4 - letnie 4-letnie | |
134 | +4 - letniego 4-letniego | |
135 | +4 - letniej 4-letniej | |
136 | +4 - tysięcznego 4-tysięcznego | |
137 | +4,5 - metrowej 4,5-metrowej | |
138 | +40 - letnia 40-letnia | |
139 | +40 - metrowy 40-metrowy | |
140 | +40 - osobowa 40-osobowa | |
141 | +42 - letnia 42-letnia | |
142 | +42 - letnią 42-letnią | |
143 | +43 - letniego 43-letniego | |
144 | +44 - letni 44-letni | |
145 | +44 - letniemu 44-letniemu | |
146 | +44 - letnią 44-letnią | |
147 | +45 - procentowego 45-procentowego | |
148 | +46 - letni 46-letni | |
149 | +46 - letniego 46-letniego | |
150 | +46 - osobowa 46-osobowa | |
151 | +47 - letni 47-letni | |
152 | +5 - dniowe 5-dniowe | |
153 | +5 - krotnie 5-krotnie | |
154 | +5 - letnich 5-letnich | |
155 | +5 - letniego 5-letniego | |
156 | +5 - minutowy 5-minutowy | |
157 | +5 - osobową 5-osobową | |
158 | +5 - skrzydłowy 5-skrzydłowy | |
159 | +50 - metrowej 50-metrowej | |
160 | +50 - metrowy 50-metrowy | |
161 | +50 - osobowe 50-osobowe | |
162 | +50 - procentowego 50-procentowego | |
163 | +50 - tysięczna 50-tysięczna | |
164 | +500 - kilometrową 500-kilometrową | |
165 | +500 - złotowy 500-złotowy | |
166 | +52 - letniego 52-letniego | |
167 | +53 - letni 53-letni | |
168 | +53 - letnia 53-letnia | |
169 | +53 - letniego 53-letniego | |
170 | +54 - letnia 54-letnia | |
171 | +55 - letni 55-letni | |
172 | +56 - letni 56-letni | |
173 | +56 - letniego 56-letniego | |
174 | +59 - letni 59-letni | |
175 | +59 - letnią 59-letnią | |
176 | +6 - cylindrowy 6-cylindrowy | |
177 | +6 - godzinne 6-godzinne | |
178 | +6 - letni 6-letni | |
179 | +6 - letnią 6-letnią | |
180 | +6 - osobowej 6-osobowej | |
181 | +6,5 - godzinnej 6,5-godzinnej | |
182 | +6,5 - metrowa 6,5-metrowa | |
183 | +6-9 - letni 6-9-letni | |
184 | +60 - letnie 60-letnie | |
185 | +60 - letniego 60-letniego | |
186 | +60 - minutowej 60-minutowej | |
187 | +600 - litrowe 600-litrowe | |
188 | +61 - letnia 61-letnia | |
189 | +62 - letni 62-letni | |
190 | +62 - letniego 62-letniego | |
191 | +63 - letni 63-letni | |
192 | +63 - letnia 63-letnia | |
193 | +64 - letni 64-letni | |
194 | +65 - tysięcznej 65-tysięcznej | |
195 | +67 - letni 67-letni | |
196 | +68 - letni 68-letni | |
197 | +7 - kondygnacyjnego 7-kondygnacyjnego | |
198 | +7 - krotnie 7-krotnie | |
199 | +7 - letniego 7-letniego | |
200 | +7 - letniej 7-letniej | |
201 | +7 - procentowy 7-procentowy | |
202 | +70 - letnia 70-letnia | |
203 | +70 - letniej 70-letniej | |
204 | +74 - letni 74-letni | |
205 | +77 - letni 77-letni | |
206 | +8 - godzinnych 8-godzinnych | |
207 | +8 - kilometrowego 8-kilometrowego | |
208 | +8,5 - procentową 8,5-procentową | |
209 | +80 - kilogramowego 80-kilogramowego | |
210 | +80 - letnia 80-letnia | |
211 | +80 - tonowe 80-tonowe | |
212 | +81 - letnia 81-letnia | |
213 | +82 - letni 82-letni | |
214 | +83 - letnia 83-letnia | |
215 | +84 - letniej 84-letniej | |
216 | +9 - hektarowy 9-hektarowy | |
217 | +9 - letnia 9-letnia | |
218 | +9 - letniego 9-letniego | |
219 | +9 - letniej 9-letniej | |
220 | +9 - letniemu 9-letniemu | |
221 | +9 - miesięczna 9-miesięczna | |
222 | +9 - miesięczny 9-miesięczny | |
223 | +90 - letniej 90-letniej | |
224 | +900 - osobowej 900-osobowej | |
225 | +12 - letni 12-letni | |
226 | +14 - letni 14-letni | |
227 | +14 - letniego 14-letniego | |
228 | +15 - letniego 15-letniego | |
229 | +16 - letnia 16-letnia | |
230 | +16 - letniej 16-letniej | |
231 | +17 - letnia 17-letnia | |
232 | +18 - letni 18-letni | |
233 | +19 - letni 19-letni | |
234 | +20 - letni 20-letni | |
235 | +20 - letnia 20-letnia | |
236 | +22 - letniego 22-letniego | |
237 | +22 - letniej 22-letniej | |
238 | +24 - letni 24-letni | |
239 | +26 - letniego 26-letniego | |
240 | +28 - letni 28-letni | |
241 | +28 - letnią 28-letnią | |
242 | +3 - letniego 3-letniego | |
243 | +3 - letniej 3-letniej | |
244 | +30 - letnia 30-letnia | |
245 | +35 - letni 35-letni | |
246 | +36 - letniej 36-letniej | |
247 | +37 - letni 37-letni | |
248 | +37 - letnia 37-letnia | |
249 | +4 - krotnie 4-krotnie | |
250 | +40 - letni 40-letni | |
251 | +40 - letniej 40-letniej | |
252 | +43 - letni 43-letni | |
253 | +43 - letnia 43-letnia | |
254 | +45 - osobowa 45-osobowa | |
255 | +49 - letniego 49-letniego | |
256 | +5 - letniej 5-letniej | |
257 | +50 - letniego 50-letniego | |
258 | +52 - letni 52-letni | |
259 | +60 - letni 60-letni | |
260 | +65 - letniego 65-letniego | |
261 | +80 - letni 80-letni | |
262 | +87 - letni 87-letni | |
263 | +15 - letnia 15-letnia | |
264 | +17 - letniego 17-letniego | |
265 | +19 - letniego 19-letniego | |
266 | +25 - letniego 25-letniego | |
267 | +30 - letni 30-letni | |
268 | +34 - letni 34-letni | |
269 | +36 - letniego 36-letniego | |
270 | +38 - letni 38-letni | |
271 | +5 - letnia 5-letnia | |
272 | +54 - letni 54-letni | |
273 | +76 - letni 76-letni | |
274 | +15 - letni 15-letni | |
275 | +17 - letni 17-letni | |
276 | +19 - letnia 19-letnia | |
277 | +22 - letnia 22-letnia | |
278 | +26 - letni 26-letni | |
279 | +30 - letniego 30-letniego | |
280 | +48 - letni 48-letni | |
281 | +10 - minutowych 10-minutowych | |
282 | +21 - letni 21-letni | |
283 | +22 - letni 22-letni | |
284 | +23 - letni 23-letni | |
285 | +32 - letni 32-letni | |
286 | +25 - letni 25-letni | |
287 | +27 - letni 27-letni | |
288 | +33 - letni 33-letni | |
289 | +5 - minutowych 5-minutowych | |
290 | +XIX - wiecznego XIX-wiecznego | |
291 | +XIX - wieczny XIX-wieczny | |
292 | +XVII - wiecznych XVII-wiecznych | |
293 | +7 - procentowym 7-procentowym | |
294 | +XVI - wieczne XVI-wieczne | |
295 | +XVI - wieczny XVI-wieczny | |
296 | +XVII - wieczna XVII-wieczna | |
297 | +XVII - wieczny XVII-wieczny | |
298 | +XVIII - wieczna XVIII-wieczna | |
299 | +XVIII - wieczny XVIII-wieczny | |
300 | +XIII - wiecznym XIII-wiecznym | |
301 | +XIII - wiecznych XIII-wiecznych | |
302 | +XIV - wiecznego XIV-wiecznego | |
303 | +XIX - wiecznych XIX-wiecznych | |
304 | +XV - wieczny XV-wieczny | |
305 | +IV - ligowej IV-ligowej | |
306 | +III - ligowym III-ligowym | |
307 | +III - ligową III-ligową | |
308 | +II - ligowego II-ligowego | |
309 | +II - ligowemu II-ligowemu | |
310 | +I - ligowej I-ligowej | |
311 | +60 - stopniowym 60-stopniowym | |
312 | +435 - litrowym 435-litrowym | |
313 | +41 - letni 41-letni | |
314 | +40 - godzinnym 40-godzinnym | |
315 | +40 - tysięcznym 40-tysięcznym | |
316 | +3,5 - tonowym 3,5-tonowym | |
317 | +24 - karatowym 24-karatowym | |
318 | +24 - osobowym 24-osobowym | |
319 | +22 - procentowym 22-procentowym | |
320 | +20 - tomowym 20-tomowym | |
321 | +20 - złotowym 20-złotowym | |
322 | +2 - godzinnym 2-godzinnym | |
323 | +17 - letnim 17-letnim | |
324 | +16 - letnim 16-letnim | |
325 | +15 - letnim 15-letnim | |
326 | +150 - tysięcznym 150-tysięcznym | |
327 | +150 - procentowego 150-procentowego | |
328 | +100 - tysięcznym 100-tysięcznym | |
329 | +10 - letnim 10-letnim | |
330 | +72 - letniego 72-letniego | |
331 | +11 - letnim 11-letnim | |
332 | +12 - letnim 12-letnim | |
333 | +8 - osobowych 8-osobowych | |
334 | +800 - gramowy 800-gramowy | |
335 | +100 - ml 100-ml | |
336 | +12 - cyfrowy 12-cyfrowy | |
337 | +185 - osobową 185-osobową | |
338 | +27 - Ietni 27-Ietni | |
339 | +4.4 - litrowy 4.4-litrowy | |
340 | +44 - krotne 44-krotne | |
341 | +6 - minutowego 6-minutowego | |
... | ... |
corpora/data/lexicon.dic
0 → 100644
1 | +@PHRASE_NAMES | |
2 | + infp np prepnp adjp ip cp ncp prepncp advp padvp colonp mp intp conj-np | |
3 | + adja prepadjp compar measure num aglt aux-fut | |
4 | + aux-past aux-imp qub interj sinterj hyphen | |
5 | + rparen rparen2 rquot rquot2 rquot3 inclusion | |
6 | + day-interval day-lex day-month-interval date-interval | |
7 | + month-lex month-interval year-interval roman roman-interval | |
8 | + hour-minute-interval hour-interval obj-id match-result | |
9 | + url email day-month day year date hour hour-minute lex | |
10 | + się nie roku to by s <root> <conll_root> or or2 <colon> <speaker> <speaker-end> <squery> <sentence> <paragraph> | |
11 | + <subst> <depr> <ppron12> <ppron3> <siebie> <prep> <num> <numcomp> <intnum> | |
12 | + <realnum> <intnum-interval> <realnum-interval> <symbol> <ordnum> | |
13 | + <date> <date-interval> <hour-minute> <hour> <hour-minute-interval> | |
14 | + <hour-interval> <year> <year-interval> <day> <day-interval> <day-month> | |
15 | + <day-month-interval> <month-interval> <roman> <roman-interval> <roman-ordnum> | |
16 | + <match-result> <url> <email> <phone-number> <postal-code> <obj-id> <list-item> <fixed> <adj> <apron> <adjc> <adjp> <adja> | |
17 | + <adv> <ger> <pact> <ppas> <fin> <bedzie> <praet> <winien> <impt> | |
18 | + <imps> <pred> <aglt> <inf> <pcon> <pant> <qub> <comp> <compar> <conj> <interj> | |
19 | + <sinterj> <burk> <interp> <part> <unk> <building-number> jak czy za do od o w na z u dla przeciwko location time link miesiąc pod niż w_celu | |
20 | + title title-end token inclusion inclusion-end comparp jako quot-end | |
21 | + Time GenericDescription | |
22 | + Location Street StreetName Town TownName | |
23 | + Payment Person Profession ProfessionParam | |
24 | + Division OrganizationName OrganizationType OrganizationTypeParam | |
25 | + Service ServiceParam SericeEffect | |
26 | + Instance Issue Quarter Price Name Confirmation Email Telephone PostalCode | |
27 | + HouseNumber Geolocus Measure Rating OpAdNum Y Animal State Interrogative | |
28 | + Action Attitude PriceDescription RateDescription ServiceParamDescription | |
29 | + null Apoz PHas CORE Has Attr Compar PApoz Merge Count Thme Manr Lemma Arg Time | |
30 | + sem nosem | |
31 | + | |
32 | +@WEIGHTS | |
33 | +symbol_weight=1 | |
34 | +measure_weight=1 | |
35 | + | |
36 | +@LEXICON | |
37 | + | |
38 | +pos=adj,cat=HourNumber: | |
39 | + QUANT[person=ter] | |
40 | + np*number*case*gender*person*coerced*role*node{schema}{local-schema}; | |
41 | + | |
42 | +pos=adj,cat=DayNumber: | |
43 | + QUANT[person=ter] | |
44 | + np*number*case*gender*person*coerced*role*node{schema}{local-schema}; | |
45 | + | |
46 | + | |
47 | +lemma=</sentence>,pos=interp,node=relations,phrase=s: BRACKET | |
48 | + QUANT[role=0] | |
49 | + s*role*node | |
50 | + \?(ip*T*T*T*T*null*sit+cp*int*T*T*null*sit+cp*sub*T*T*null*sit+mp*T*null*sit+intp*T*null*sit+interj*T*null*sit+sinterj*T*null*sit+np*T*nom*T*ter*Location*null*sit); | |
51 | +lemma=</sentence>,pos=interp,node=relations,phrase=s: BRACKET | |
52 | + QUANT[role=0] | |
53 | + s*role*node | |
54 | + \?(mp*State*null*sit+intp*State*null*sit+np*T*T*T*ter*State*null*sit+xp*State*null*sit+interj*T*null*sit+sinterj*T*null*sit); | |
55 | + | |
56 | + | |
... | ... |
corpora/data/mwe.tab
0 → 100644
1 | +Y Y subst:sg:_:_ | |
2 | +nad ranem nad ranem adv | |
3 | +jak daleko jak daleko adv | |
4 | +o ile o_ile comp | |
5 | +w celu w celu prep:gen | |
6 | +wraz z wraz z prep:inst | |
7 | +w górę w_górę qub | |
8 | +dzień dobry dzień dobry interj | |
9 | +Dzień Dobry dzień dobry interj | |
10 | +do widzenia do widzenia interj | |
11 | +do zobaczenia do zobaczenia interj | |
12 | +do następnego razu do następnego razu interj | |
13 | +luz luz interj | |
14 | +yes yes interj | |
15 | +yhym yhym interj | |
16 | +yeap yeap interj | |
17 | +aj aj interj | |
18 | +niech będzie niech będzie interj | |
19 | +wszystko jasne wszystko jasne interj | |
20 | +to ważne to ważne interj | |
21 | +trochę słabo trochę słabo interj | |
22 | +to znaczy to znaczy interj | |
23 | +w porządku w porządku interj | |
24 | +na przykład na przykład interj | |
25 | + | |
... | ... |
corpora/data/mwe2.tab
0 → 100644
corpora/data/ne.tab
0 → 100644
corpora/data/nkjp-correct.tab
0 → 100644
1 | +( C ) Maćka (C)Maćka | |
2 | +-21 - latków - 21-latków | |
3 | +. pl .pl | |
4 | +... ewski ...ewski | |
5 | +0-20 -21 -22 0-20-21-22 | |
6 | +0-46 855 -45 -26 ) . 0-46 855-45-26 ) . | |
7 | +0-800 -20 -150 0-800-20-150 | |
8 | +0603 / 166-367 0603/166-367 | |
9 | +1 ) 1) | |
10 | +1 , 115.000 1,115.000 | |
11 | +1 a . 1a. | |
12 | +1 d 1d | |
13 | +1 f 1f | |
14 | +1004 - A 1004-A | |
15 | +109 P 4 109P4 | |
16 | +11 b 11b | |
17 | +11 c 11c | |
18 | +12 - b 12-b | |
19 | +12 E 12E | |
20 | +12 a 12a | |
21 | +12 b 12b | |
22 | +13 . 13. | |
23 | +13 b 13b | |
24 | +14 a 14a | |
25 | +146 A 146A | |
26 | +146 B 146B | |
27 | +147 h 147h | |
28 | +147 j 147j | |
29 | +158 - Pound 158-Pound | |
30 | +16 b 16b | |
31 | +18 a 18a | |
32 | +18 b 18b | |
33 | +18 c 18c | |
34 | +1912-199 ? 1912 - 199? | |
35 | +1:100 000 . 1 : 100 000 . | |
36 | +2 + 2+ | |
37 | +2 - 2- | |
38 | +2 - 000 - 001 2-000-001 | |
39 | +2 A 4 2A4 | |
40 | +2 b 2b | |
41 | +2 c 2c | |
42 | +2 d 2d | |
43 | +2 e 2e | |
44 | +2 p 2p | |
45 | +24 A 24A | |
46 | +24 d 24d | |
47 | +248 Z 248Z | |
48 | +25 a 25a | |
49 | +25 h 25h | |
50 | +251 a 251a | |
51 | +3 . c 3.c | |
52 | +3 b 3b | |
53 | +3 d 3d | |
54 | +3 mamy 3mamy | |
55 | +35 ´ 35´ | |
56 | +4 . a . 4.a. | |
57 | +4 . b 4.b | |
58 | +4 . c . 4.c. | |
59 | +4 media 4media | |
60 | +40 - 045 40-045 | |
61 | +40 - krotność 40-krotność | |
62 | +40 b 40b | |
63 | +41 ai 41a i | |
64 | +418-26 -88 418-26-88 | |
65 | +44 m -7 44m-7 | |
66 | +455 - A 455-A | |
67 | +467-89 -45 467-89-45 | |
68 | +492 A 492A | |
69 | +492 B 492B | |
70 | +5 a 5a | |
71 | +5 d 5d | |
72 | +5 x 5x | |
73 | +50 - krotność 50-krotność | |
74 | +50 - metre 50-metre | |
75 | +50 b 50b | |
76 | +510 256 732 . 510 256 732 . | |
77 | +515 - A 515-A | |
78 | +52 a 52a | |
79 | +520-59 -50 520-59-50 | |
80 | +56 e 56e | |
81 | +56 zf 56zf | |
82 | +58 / fin / 50 58/fin/50 | |
83 | +6 . c 6.c | |
84 | +6 . d . 6.d. | |
85 | +6 ABX 2 6ABX2 | |
86 | +6 LX 2 6LX2 | |
87 | +60.22.11 - 00.00 60.22.11-00.00 | |
88 | +635-25 - 00 635-25-00 | |
89 | +642-65 -85 642-65-85 | |
90 | +650-91 -58 650-91-58 | |
91 | +654-66 -91 654-66-91 | |
92 | +6667 / 88 / IV 6667/88/IV | |
93 | +7 . a . 7.a. | |
94 | +7 . b 7.b | |
95 | +7 . d 7.d | |
96 | +7 . f 7.f | |
97 | +70 . 70. | |
98 | +71 a 71a | |
99 | +71 c 71c | |
100 | +73-65 -32 73-65-32 | |
101 | +735 ie 735ie | |
102 | +8 ) ) ) ) 8)))) | |
103 | +8 ) ) ) ) ) ) ) ) ) 8))))))))) | |
104 | +8 - ) 8-) | |
105 | +8 X 8X | |
106 | +8 a 8a | |
107 | +8.1 . 8.1. | |
108 | +80 ' 80' | |
109 | +81 A 81A | |
110 | +825-44 - 02 825-44-02 | |
111 | +825-53 -94 825-53-94 | |
112 | +9 . b 9.b | |
113 | +9 b 9b | |
114 | +9 c 9c | |
115 | +9 g 9g | |
116 | +9 x 9x | |
117 | +90 a 90a | |
118 | +96 ' 96' | |
119 | +: - D :-D | |
120 | +: D :D | |
121 | +: O :O | |
122 | +: O ( :O( | |
123 | +: P ) ) ) ) ) ) ) ) :P)))))))) | |
124 | +:P ) ) ) ) ) ) ) ) :P)))))))) | |
125 | +:P lany : Plany | |
126 | +: o ) ) :o)) | |
127 | +: o ) ) ) ) :o)))) | |
128 | +:))) ) ) ) ) :))))))) | |
129 | +:))) ) ) ) ) ) ) :))))))))) | |
130 | +:))) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) :))))))))))))))))))) | |
131 | +; P ) ) ) ;P))) | |
132 | +;P ) ) ) ;P))) | |
133 | +; d ;d | |
134 | +; ddd ;ddd | |
135 | +; p ;p | |
136 | +A -1 A-1 | |
137 | +A . A. | |
138 | +A . C A.C | |
139 | +A . D A.D | |
140 | +A . D . D . A . M A.D.D.A.M | |
141 | +A . K . M A.K.M | |
142 | +A 3 A3 | |
143 | +A 320 A320 | |
144 | +AC + 79 AC+79 | |
145 | +ADP - RYBOLIZACJA ADP-RYBOLIZACJA | |
146 | +AG -23 AG-23 | |
147 | +AGENTA 17 AGENTA17 | |
148 | +AN -124 AN-124 | |
149 | +AWS -2002 AWS-2002 | |
150 | +Abu - Tor Abu-Tor | |
151 | +Al - Mansur Al-Mansur | |
152 | +Al - Mashru Al-Mashru | |
153 | +Al - Qaidy Al-Qaidy | |
154 | +Ali - ili Ali-ili | |
155 | +Aujourd ' hui Aujourd'hui | |
156 | +Aulnay - Bois Aulnay-Bois | |
157 | +Avallac ' h Avallac'h | |
158 | +B ) B) | |
159 | +B + R B+R | |
160 | +B - 06250 B-06250 | |
161 | +B -1 B B-1B | |
162 | +B -14 B-14 | |
163 | +B -17 B-17 | |
164 | +B -2 B-2 | |
165 | +B -24 B-24 | |
166 | +B -29 B-29 | |
167 | +B -52 B-52 | |
168 | +B . B. | |
169 | +B 10 M B10M | |
170 | +B 3 B3 | |
171 | +Bailly - Salins Bailly-Salins | |
172 | +Barr ` es Barr`es | |
173 | +Bianco - neri Bianco-neri | |
174 | +Brockley - Lewisham Brockley-Lewisham | |
175 | +C ' est C'est | |
176 | +C - c C-c | |
177 | +C . C. | |
178 | +C 18 H 28 O 2 C18H28O2 | |
179 | +C 18 H 30 O 2 C18H30O2 | |
180 | +C 18 H 34 O 2 C18H34O2 | |
181 | +C 2 H 5 OH C2H5OH | |
182 | +C 70 C70 | |
183 | +CO 2 CO2 | |
184 | +Canal + Canal+ | |
185 | +Canale + Canale+ | |
186 | +Cassino - Orbis Cassino-Orbis | |
187 | +Clarion - Clipperton Clarion-Clipperton | |
188 | +Coca - Cola Coca-Cola | |
189 | +Coca - Coli Coca-Coli | |
190 | +Cosmo - Worlds Cosmo-Worlds | |
191 | +Czechowic - Dziedzic Czechowic-Dziedzic | |
192 | +Czou - kou - tien Czou-kou-tien | |
193 | +D ' Arc D'Arc | |
194 | +D ' Viosion D'Viosion | |
195 | +D - dur D-dur | |
196 | +D . D. | |
197 | +D : D: | |
198 | +DDR 400 DDR400 | |
199 | +DVD - XV 10 DVD-XV10 | |
200 | +DW 592 DW592 | |
201 | +DW 594 DW594 | |
202 | +Daszewo -1 Daszewo-1 | |
203 | +Do - centa Do-centa | |
204 | +DŁUGOPOLE - ZDRÓJ DŁUGOPOLE-ZDRÓJ | |
205 | +E 100 E100 | |
206 | +E 18 E18 | |
207 | +E 2 E2 | |
208 | +E 261 E261 | |
209 | +E 3 E3 | |
210 | +E : E: | |
211 | +EP -3 EP-3 | |
212 | +Eishockey - Liga Eishockey-Liga | |
213 | +En - laia En-laia | |
214 | +Era - Art Era-Art | |
215 | +Express - AM 22 Express-AM22 | |
216 | +F - dur F-dur | |
217 | +F - ka F-ka | |
218 | +F -1 F-1 | |
219 | +F -18 F-18 | |
220 | +F -8 F-8 | |
221 | +F 3 F3 | |
222 | +F 5 F5 | |
223 | +F 7 F7 | |
224 | +Ferte - sous - Jouarre Ferte-sous-Jouarre | |
225 | +Fool - X Fool-X | |
226 | +Frieda - K Frieda-K | |
227 | +Fu - Czu Fu-Czu | |
228 | +G -14 G-14 | |
229 | +G -7 G-7 | |
230 | +G -8 G-8 | |
231 | +G 11 G11 | |
232 | +G 12 G12 | |
233 | +Gekij _ o Gekij_o | |
234 | +Gheorghiu - Dej Gheorghiu-Dej | |
235 | +Győr - Moson - Sopron Győr-Moson-Sopron | |
236 | +H - K H-K | |
237 | +H 5 N 1 H5N1 | |
238 | +HIV - dodatnich HIV-dodatnich | |
239 | +HSW - Trading HSW-Trading | |
240 | +Ha - Nocri Ha-Nocri | |
241 | +Halla - li Halla-li | |
242 | +Har - Homa Har-Homa | |
243 | +Head - Up Head-Up | |
244 | +Hewlett - Packard Hewlett-Packard | |
245 | +Home - Fleet Home-Fleet | |
246 | +Humięcino - Klary Humięcino-Klary | |
247 | +I ' ll I'll | |
248 | +I ' ve I've | |
249 | +I - szy I-szy | |
250 | +I -22 I-22 | |
251 | +I . I. | |
252 | +II . 16 II.16 | |
253 | +ITU - T ITU-T | |
254 | +IV . 5 IV.5 | |
255 | +Idol 92 Idol92 | |
256 | +Invest - Consult Invest-Consult | |
257 | +Invest - Euro Invest-Euro | |
258 | +Iran - Sarbas Iran-Sarbas | |
259 | +Iwano - Frankowsku Iwano-Frankowsku | |
260 | +J @ zz J@zz | |
261 | +JAS - MOS JAS-MOS | |
262 | +Ja ' raia Ja'raia | |
263 | +Ja 96 Ja96 | |
264 | +Jak - mu - tam Jak-mu-tam | |
265 | +Jangi - Julu Jangi-Julu | |
266 | +Jar -32 Jar-32 | |
267 | +Jean - Romainem Jean-Romainem | |
268 | +Jeana - Luca Jeana-Luca | |
269 | +Jeana - Paula Jeana-Paula | |
270 | +Jie - jie Jie-jie | |
271 | +K - International K-International | |
272 | +K -2 K-2 | |
273 | +K 30 K30 | |
274 | +K 6 K6 | |
275 | +K 8 K8 | |
276 | +KC -135 KC-135 | |
277 | +KJ 2 KJ2 | |
278 | +KPN - OP KPN-OP | |
279 | +KRL - D KRL-D | |
280 | +KT 400 KT400 | |
281 | +Karl - Heinz Karl-Heinz | |
282 | +Kogel - mogel Kogel-mogel | |
283 | +Koni - Art Koni-Art | |
284 | +Konstancina - Jeziorny Konstancina-Jeziorny | |
285 | +Konstancinie - Jeziornie Konstancinie-Jeziornie | |
286 | +Kołomyi - Czerniowcach Kołomyi-Czerniowcach | |
287 | +Ku - ku - ku - ku Ku-ku-ku-ku | |
288 | +Kórniku - Bninie Kórniku-Bninie | |
289 | +L ' Authentique L'Authentique | |
290 | +L ' Oreal L'Oreal | |
291 | +L ' odeur L'odeur | |
292 | +L -4 L-4 | |
293 | +L 4 L4 | |
294 | +L 7 L7 | |
295 | +LI _ I _ I LI_I_I | |
296 | +M - A M-A | |
297 | +M -2 M-2 | |
298 | +M -28 M-28 | |
299 | +M -80 M-80 | |
300 | +M . A . S . H M.A.S.H | |
301 | +M . P M.P | |
302 | +M 12 M12 | |
303 | +M 14 M14 | |
304 | +M 28 M28 | |
305 | +M 5 M5 | |
306 | +M 6 M6 | |
307 | +M 7 M7 | |
308 | +MATIKI ' S MATIKI'S | |
309 | +MD -80 MD-80 | |
310 | +MI -17 MI-17 | |
311 | +MI -2 MI-2 | |
312 | +MI 5 MI5 | |
313 | +MNP -20 M MNP-20M | |
314 | +MP 5 KA 4 MP5KA4 | |
315 | +MPEG -1 MPEG-1 | |
316 | +MPEG -4 MPEG-4 | |
317 | +Magdasf 1 Magdasf1 | |
318 | +Mawsu ' at Mawsu'at | |
319 | +Mc - danie Mc-danie | |
320 | +Medix - Force Medix-Force | |
321 | +Mi -8 Mi-8 | |
322 | +Montesquiou - Fezensac Montesquiou-Fezensac | |
323 | +Mołczat ' Mołczat' | |
324 | +N . T N.T | |
325 | +N 2 N2 | |
326 | +N 33 N33 | |
327 | +NSZ - NOW NSZ-NOW | |
328 | +NSZ - ZJ NSZ-ZJ | |
329 | +Na + Na+ | |
330 | +Nag – czu Nag–czu | |
331 | +Niwka - Modrzejów Niwka-Modrzejów | |
332 | +Non - Stop Non-Stop | |
333 | +O - Polskich O-Polskich | |
334 | +O _ o O_o | |
335 | +OPZZ / p / 630 / 2000 OPZZ/p/630/2000 | |
336 | +Olszewo - Reszki Olszewo-Reszki | |
337 | +On - the - Loose On-the-Loose | |
338 | +Orzeł - Łysiak Orzeł-Łysiak | |
339 | +Ostrowiec - Osada Ostrowiec-Osada | |
340 | +Ouest - France Ouest-France | |
341 | +P ! nk P!nk | |
342 | +P - ski P-ski | |
343 | +P -64 P-64 | |
344 | +P . E . N . P.E.N. | |
345 | +P. O. N . R . P . IM . MJR . H . D . H P.O.N.R.P.IM.MJR.H.D.H | |
346 | +P 22 P22 | |
347 | +PIT -37 PIT-37 | |
348 | +PN -55 PN-55 | |
349 | +PN -92 / T -20091 PN-92/T-20091 | |
350 | +PR 3 PR3 | |
351 | +PRZERZECZYN - ZDRÓJ PRZERZECZYN-ZDRÓJ | |
352 | +PZL -104 PZL-104 | |
353 | +Pen - Press Pen-Press | |
354 | +PiS - uarami PiS-uarami | |
355 | +Pick - up Pick-up | |
356 | +Pif-paf Pif - paf | |
357 | +Pol - Mot Pol-Mot | |
358 | +Pol - Orsa Pol-Orsa | |
359 | +Polanicy - Zdroju Polanicy-Zdroju | |
360 | +Poor - Know Poor-Know | |
361 | +Port - Bau Port-Bau | |
362 | +Prefabet - Lisów Prefabet-Lisów | |
363 | +Przycisk 2 _ 1 Przycisk2_1 | |
364 | +Prêt - à - Porter Prêt-à-Porter | |
365 | +Q 1 Q1 | |
366 | +R - XXI R-XXI | |
367 | +R . E . M . R.E.M. | |
368 | +R 1 R1 | |
369 | +R 2 CH 2 R2CH2 | |
370 | +R 2 D 2 R2D2 | |
371 | +R 3 CH R3CH | |
372 | +R ’ N ’ B R’N’B | |
373 | +RCH 3 RCH3 | |
374 | +RS 2 RS2 | |
375 | +RS 4 RS4 | |
376 | +RTL 7 RTL7 | |
377 | +Raciborzu - Markowicach Raciborzu-Markowicach | |
378 | +Randez - Vous Randez-Vous | |
379 | +Rock ’ N ’ Roll Rock’N’Roll | |
380 | +Rojek – Decor Rojek–Decor | |
381 | +S - Rejestrów S-Rejestrów | |
382 | +S - Video S-Video | |
383 | +S - ka S-ka | |
384 | +S -400 S-400 | |
385 | +S . T . W . S.T.W. | |
386 | +SAAB -9000 SAAB- 9000 | |
387 | +SETI @ Home SETI@Home | |
388 | +SLD - PSL SLD-PSL | |
389 | +SP 5 SP5 | |
390 | +SQ 25 SQ25 | |
391 | +SS - Standartenführera SS-Standartenführera | |
392 | +SS - Standartenführerem SS-Standartenführerem | |
393 | +Sado - maso Sado-maso | |
394 | +Saint - Germain Saint-Germain | |
395 | +Saint - Leu Saint-Leu | |
396 | +Saint - Leu - la - Forêt Saint-Leu-la-Forêt | |
397 | +Saint - Loup Saint-Loup | |
398 | +Saint - Sulpice Saint-Sulpice | |
399 | +Sainte - Jeanne - de - Chantal Sainte-Jeanne-de-Chantal | |
400 | +Saltykov - Shchedrin Saltykov-Shchedrin | |
401 | +Sankt - Leningrad Sankt-Leningrad | |
402 | +Sat - Kom Sat-Kom | |
403 | +Sep - Sin Sep-Sin | |
404 | +Shi - king Shi-king | |
405 | +Ship - Service Ship-Service | |
406 | +Shu - king Shu-king | |
407 | +Siczek - Zalewska Siczek-Zalewska | |
408 | +SnCl 2 SnCl2 | |
409 | +Su -22 Su-22 | |
410 | +Super - Nova Super-Nova | |
411 | +Szin - Bet Szin-Bet | |
412 | +T - Shirt T-Shirt | |
413 | +T . Love T.Love | |
414 | +T 5 T5 | |
415 | +TV 4 TV4 | |
416 | +TVP 1 TVP1 | |
417 | +Tele - Energo Tele-Energo | |
418 | +Top -10 Top-10 | |
419 | +Trata - ta - ta Trata-ta-ta | |
420 | +UOP - ki UOP-ki | |
421 | +UST -110 UST-110 | |
422 | +UTT - SUB UTT-SUB | |
423 | +Uecker - Randow Uecker-Randow | |
424 | +V 1 V1 | |
425 | +V 2 V2 | |
426 | +VC -25 A VC-25A | |
427 | +VDC -300 VDC-300 | |
428 | +Valg - Podhala Valg-Podhala | |
429 | +Valg - Podhale Valg-Podhale | |
430 | +Vigée - Lebrun Vigée-Lebrun | |
431 | +W - Ł - W W-Ł-W | |
432 | +Writers ' Writers' | |
433 | +Wu - El Wu-El | |
434 | +X - lecia X-lecia | |
435 | +X - tiny X-tiny | |
436 | +X -100 X-100 | |
437 | +XF 11 XF11 | |
438 | +XII . 1 XII.1 | |
439 | +Y - usiu Y-usiu | |
440 | +Zosia 12 Zosia12 | |
441 | +AGM -86 AGM-86 | |
442 | +BGM -109 BGM-109 | |
443 | +CM 6000 CM6000 | |
444 | +Każe - duba Każe-duba | |
445 | +Lem _ ura Lem_ura | |
446 | +Okocimiem -2 Okocimiem-2 | |
447 | +Rucianem - Nidzie Rucianem-Nidzie | |
448 | +SDM - M 61 SDM-M61 | |
449 | +SKM - ek SKM-ek | |
450 | +fiksum - dyrdum fiksum-dyrdum | |
451 | +al - Azhar al-Azhar | |
452 | +al - Jihad al-Jihad | |
453 | +al - Mihraniego al-Mihraniego | |
454 | +al - Qaida al-Qaida | |
455 | +al - Qaidy al-Qaidy | |
456 | +al - Quaidy al-Quaidy | |
457 | +al - Sadra al-Sadra | |
458 | +al - Zawahiriego al-Zawahiriego | |
459 | +all ' amatricane all'amatricane | |
460 | +anty ( radio ) aktywność anty(radio)aktywność | |
461 | +aren ' t aren't | |
462 | +can ' t can't | |
463 | +coctail - barze coctail-barze | |
464 | +color - power color-power | |
465 | +e " S " mańskim e"S"mańskim | |
466 | +e " S " manami e"S"manami | |
467 | +e - poczta e-poczta | |
468 | +el - Bahari el-Bahari | |
469 | +fast - foodu fast-foodu | |
470 | +fast - foodów fast-foodów | |
471 | +fin - de - siecle fin-de-siecle | |
472 | +fin - de-siecle'u fin-de-siecle'u | |
473 | +h 2 - b 8 h2-b8 | |
474 | +handy - size handy-size | |
475 | +happy - endem happy-endem | |
476 | +http://www.georgehart.com/pavilion.html, http://www.georgehart.com/pavilion.html , | |
477 | +http://www.li.net/~george/pavilion.html, http://www.li.net/~george/pavilion.html , | |
478 | +http://wyborcza.pl/1,75478,7072995,Na_ZUS_nie_plac... http://wyborcza.pl/1,75478,7072995,Na_ZUS_nie_plac . . . | |
479 | +hydro - powietrznego hydro-powietrznego | |
480 | +i - necie i-necie | |
481 | +isn ' t isn't | |
482 | +iugatio - capitatio iugatio-capitatio | |
483 | +jumbo - jeta jumbo-jeta | |
484 | +językowo - stylistycznego językowo-stylistycznego | |
485 | +kan ' iak kan'iak | |
486 | +koagulazo - dodatnich koagulazo-dodatnich | |
487 | +korned - bify korned-bify | |
488 | +latina - pl latina-pl | |
489 | +lkj 908 ż 7654 fds lkj908ż7654fds | |
490 | +maine - coony maine-coony | |
491 | +multi - master multi-master | |
492 | +n ' est n'est | |
493 | +nasza - klasa.pl nasza-klasa.pl | |
494 | +news : 9 ntb 5 p $ jst $ 1 @ h 1 . uw.edu.pl news:9ntb5p$jst$1@h1.uw.edu.pl | |
495 | +news : pl . news . czytniki news:pl.news.czytniki | |
496 | +news : pl . sci . psychologia news:pl.sci.psychologia | |
497 | +nocz ' nocz' | |
498 | +non - stop non-stop | |
499 | +o - da - da - da o-da-da-da | |
500 | +pl . hum . teatr pl.hum.teatr | |
501 | +pl . sci . filozofia pl.sci.filozofia | |
502 | +pl . soc . cośtam pl.soc.cośtam | |
503 | +pl . soc . religia pl.soc.religia | |
504 | +political - corect political-corect | |
505 | +polsko - polsko- | |
506 | +pop - rocku pop-rocku | |
507 | +punk - rocka punk-rocka | |
508 | +quasi - jednorodnych quasi-jednorodnych | |
509 | +quasi - mocarstwowej quasi-mocarstwowej | |
510 | +ry ( d ) zykować ry(d)zykować | |
511 | +s ... s... | |
512 | +sado - maso sado-maso | |
513 | +samuraj - ko samuraj-ko | |
514 | +science - fiction science-fiction | |
515 | +second - handach second-handach | |
516 | +seks - maniaka seks-maniaka | |
517 | +serdako - suknię serdako-suknię | |
518 | +t - shirtach t-shirtach | |
519 | +tic - taki tic-taki | |
520 | +www.gadki.lublin.pl/east / z / index . html www.gadki.lublin.pl/east/z/index.html | |
521 | +zwisający - mi zwisający-mi | |
522 | +Łódzko - Dymaczewskie Łódzko-Dymaczewskie | |
523 | +Świeradowa - Zdroju Świeradowa-Zdroju | |
524 | +1 b 1b | |
525 | +1 n 1n | |
526 | +11 a 11a | |
527 | +125 p 125p | |
528 | +2 n 2n | |
529 | +2000 + 2000+ | |
530 | +50 . 50. | |
531 | +75 d 75d | |
532 | +94 a 94a | |
533 | +95 ' 95' | |
534 | +: O ) :O) | |
535 | +: P :P | |
536 | +; P ;P | |
537 | +A / H 1 N 1 A/H1N1 | |
538 | +AN / SPY -1 AN/SPY-1 | |
539 | +Ab - Rama Ab-Rama | |
540 | +Al - Dżazira Al-Dżazira | |
541 | +B - B B-B | |
542 | +Binowo - Park Binowo-Park | |
543 | +C + + C++ | |
544 | +C - dur C-dur | |
545 | +C 1 C1 | |
546 | +C 60 C60 | |
547 | +Cobellex - Pol Cobellex-Pol | |
548 | +F 2 F2 | |
549 | +F 4 F4 | |
550 | +F 8 F8 | |
551 | +G 3 G3 | |
552 | +Kujawsko - Pomorskie Kujawsko-Pomorskie | |
553 | +L 3 L3 | |
554 | +Lépanges - sur - Vologne Lépanges-sur-Vologne | |
555 | +M - skiego M-skiego | |
556 | +M 15 M15 | |
557 | +MP 5 MP5 | |
558 | +Mi -2 Mi-2 | |
559 | +MiG -21 MiG-21 | |
560 | +NATURA - TUR NATURA-TUR | |
561 | +Nord - Pas-de-Calais Nord-Pas-de-Calais | |
562 | +S - check S-check | |
563 | +S 3 S3 | |
564 | +T -72 T-72 | |
565 | +U 2 U2 | |
566 | +WIG 20 WIG20 | |
567 | +Y 2 K Y2K | |
568 | +Z / SI 444 SKAK 33883 AQASA Z/SI444SKAK33883AQASA | |
569 | +chłopka - roztropka chłopka-roztropka | |
570 | +news : pl . rec . kuchnia news:pl.rec.kuchnia | |
571 | +3 c 3c | |
572 | +4 a 4a | |
573 | +CD - R CD-R | |
574 | +F 1 F1 | |
575 | +K -202 K-202 | |
576 | +Okocimia -2 Okocimia-2 | |
577 | +e " S " manów e"S"manów | |
578 | +126 p 126p | |
579 | +3 D 3D | |
580 | +3 a 3a | |
581 | +A 2 A2 | |
582 | +Ab - Ram Ab-Ram | |
583 | +MP 3 MP3 | |
584 | +2 a 2a | |
585 | +A 4 A4 | |
586 | +Ś - W Ś-W | |
587 | +F -16 F-16 | |
588 | +1 a 1a | |
589 | +mp 3 mp3 | |
590 | +XM 1 XM1 | |
591 | +( : (: | |
592 | +60546 . fizjks@iftia.univ.gda.pl 60546.fizjks@iftia.univ.gda.pl | |
593 | +9 b 16 v 3 $ pmh $ 1 @ news.onet.pl 9b16v3$pmh$1@news.onet.pl | |
594 | +;))) ) ) ) ) ) ) ) ) ) ) ) ) ;))))))))))))))) | |
595 | +bart @ 1 under.com bart@1under.com | |
596 | +bburski @ friko 6 . onet.pl bburski@friko6.onet.pl | |
597 | +goury @ o 2 . pl goury@o2.pl | |
598 | +izaa 83 @ interia.pl izaa83@interia.pl | |
599 | +latina - pl - subskrypcja@hydepark.pl latina-pl-subskrypcja@hydepark.pl | |
600 | +latina - pl@hydepark.pl latina-pl@hydepark.pl | |
601 | +lebron 26 @ vp.pl lebron26@vp.pl | |
602 | +marcoos _ 87 @ wp.pl marcoos_87@wp.pl | |
603 | +ona _ _ _ @ op.pl ona___@op.pl | |
604 | +pl . soc . [ cośtam | cośta m ] pl.soc.cośtam | |
605 | +rad _ rez@poczta.onet.pl rad_rez@poczta.onet.pl | |
606 | +slawek . sa@gazeta.pl slawek.sa@gazeta.pl | |
607 | +sonka _ 86 @ wp.pl sonka_86@wp.pl | |
608 | +synapse @ friko 2 . onet.pl synapse@friko2.onet.pl | |
609 | +wer 23451 erdfghjklłlpoiujzhzbdfghuz 786543 ihgfcym wer23451erdfghjklłlpoiujzhzbdfghuz786543ihgfcym | |
610 | +BZT 5 BZT5 | |
611 | +- 0.8 -0.8 | |
612 | +- kami -kami | |
613 | +- ski -ski | |
614 | +-- ) --) | |
615 | +-26 - 26 | |
616 | +-29 - 29 | |
617 | +-651 - 651 | |
618 | +-9 - 9 | |
619 | +1-2 -3 1-2-3 | |
620 | +16-6 16 - 6 | |
621 | +23-2 23 - 2 | |
622 | +28-1 28 - 1 | |
623 | +3 x 16 3x16 | |
624 | +421-30 421 - 30 | |
625 | +429-56 429 - 56 | |
626 | +: o ) :o) | |
627 | +B P BP | |
628 | +C D CD | |
629 | +O . K O.K | |
630 | +SG 2 SG2 | |
631 | +SG 3 SG3 | |
632 | +Znaszli Znasz li | |
633 | +` cause `cause | |
634 | +Iże eś I że eś | |
635 | +Kurważe ż Kurwa że ż | |
636 | +Tyże ś Ty że ś | |
637 | +- 0.3 -0.3 | |
638 | +2 - 000-001 2-000-001 | |
639 | +B M - ki BM-ki | |
640 | +P W 5 PW5 | |
641 | +atom 2004 meister@gmail.com atom2004meister@gmail.com | |
642 | +temperatury-i temperatury - i | |
643 | +włast ' włast' | |
644 | +x ) x) | |
645 | +y @ z y@z | |
646 | +zy ( g ) zakiem zy(g)zakiem | |
647 | +—16 — 16 | |
648 | +e - e- | |
649 | +IV - ligowcy IV-ligowcy | |
650 | +byśta by śta | |
651 | +koszernieV koszernie V | |
652 | +AN ] SPY -1 AN]SPY-1 | |
653 | +PM 63 PM63 | |
654 | +1 B 1B | |
655 | +1 C 210 1C210 | |
656 | +CYP 1 A 2 CYP1A2 | |
657 | +CYP 2 C 19 CYP2C19 | |
658 | +CYP 2 C 8 CYP2C8 | |
659 | +CYP 2 C 9 CYP2C9 | |
660 | +CYP 3 A 4 CYP3A4 | |
661 | +D 4 D4 | |
662 | +D 5 D5 | |
663 | +D 7 D7 | |
664 | +FMR 1 FMR1 | |
665 | +Eija - Riitta Eija-Riitta | |
666 | +FRiB - EXu FRiB-EXu | |
667 | +Fuego - Falmana Fuego-Falmana | |
668 | +G - Star G-Star | |
669 | +G 20 G20 | |
670 | +G 8 G8 | |
671 | +H 5 H5 | |
672 | +HA - Il HA-Il | |
673 | +III -1 III-1 | |
674 | +III -2 III-2 | |
675 | +JD 1 JD1 | |
676 | +N 3 N3 | |
677 | +R 2 R2 | |
678 | +Remo - Bud Remo-Bud | |
679 | +SIRT 1 SIRT1 | |
680 | +UGTA 1 UGTA1 | |
681 | +UGTA 3 UGTA3 | |
682 | +V 12 V12 | |
683 | +Win 32 Win32 | |
684 | +ex 68151010 ex68151010 | |
685 | +ex 68159990 ex68159990 | |
686 | +ex 69021000 ex69021000 | |
687 | +ex 69039020 ex69039020 | |
688 | +T -38 T-38 | |
689 | +SRT 1720 SRT1720 | |
690 | +SFMI - Chronopost SFMI-Chronopost | |
691 | +PPE - DE PPE-DE | |
692 | +żezostanie że zostanie | |
693 | +półgodziny pół godziny | |
694 | +musiałabybyć musiała by być | |
695 | +ilepodziwu ile podziwu | |
696 | +i 2010 i2010 | |
697 | +dorana do rana | |
698 | +Spodchmurykapelusza Spod chmury kapelusza | |
699 | +RYBOŁÓWSTW 0 RYBOŁÓWSTW0 | |
700 | +JEDNEG 0 JEDNEG0 | |
701 | +Ideal - ist Ideal-ist | |
702 | +H - L H-L | |
703 | +George , a George,a | |
704 | +Beth - ann Beth-ann | |
705 | +Al - Kaidy Al-Kaidy | |
706 | +Żołądzie m Żołądziem | |
707 | +przyszłe m przyszł em | |
708 | +opuszki em opuszkiem | |
709 | +gruchnie m gruchniem | |
710 | +Remie ń Remień | |
711 | +Red em Redem | |
712 | +Mik em Mikem | |
713 | +Komorze m Komorzem | |
714 | +Ki - jung Ki-jung | |
715 | +Kai m Kaim | |
716 | +Jiang - qing Jiang-qing | |
717 | +Grabo ś Graboś | |
718 | +Dingi em Dingiem | |
719 | +Den by Denby | |
720 | +Ciasto ń Ciastoń | |
721 | +Chore ń Choreń | |
722 | +C -12 C-12 | |
723 | +C -11 C-11 | |
724 | +e - sporcie e-sporcie | |
725 | +non - profit non-profit | |
726 | +metylu -1 - ol metylu-1-ol | |
727 | +eks - szef eks-szef | |
728 | +e - wydania e-wydania | |
729 | +blue - box blue-box | |
730 | +bio - obrazowania bio-obrazowania | |
731 | +art . - rocka art.-rocka | |
732 | +Yabu - san Yabu-san | |
733 | +TEN - T TEN-T | |
734 | +alergogennymskładnikiem alergogennym składnikiem | |
735 | +PNCRM 17 PNCRM17 | |
736 | +Nanim Na nim | |
737 | +6 - ciu 6-ciu | |
738 | +walijskie m walijskiem | |
739 | +będzie m będziem | |
740 | +2 n -1 2n-1 | |
741 | +4 x 4 4x4 | |
742 | +F M FM | |
743 | +Gu ` erinem Gu`erinem | |
744 | +LN M LNM | |
745 | +PL M PLM | |
746 | +UM Ś UMŚ | |
747 | +Mile m Milem | |
748 | +Zachodniopomorskie m Zachodniopomorskiem | |
749 | +Zombie m Zombiem | |
750 | +f m fm | |
751 | +hm m hmm | |
752 | +in ż inż | |
753 | +n m nm | |
754 | +pp m ppm | |
755 | +–3 – 3 | |
756 | +- wymiarowej -wymiarowej | |
757 | +B r. Br . | |
758 | +Mowiłem Mowił em | |
759 | +Poszłem Poszł em | |
760 | +doszłem doszł em | |
761 | +wytuszowałam wytuszowała m | |
762 | +wzielibyście wzieli by ście | |
... | ... |
corpora/data/ontology.dic
0 → 100644
1 | +Time Has Day | |
2 | +#dnia 23 | |
3 | +Time Has Month | |
4 | +Time Has Year | |
5 | +Time Has WeekDay | |
6 | +Time Has Hour | |
7 | +Time Has TimeOfDay | |
8 | + | |
9 | +Hour Prtc Instance | |
10 | +Hour Attr GenericDescription | |
11 | + | |
12 | +#miasto Warszawa | |
13 | +Location Has Town | |
14 | +Location Has Quarter | |
15 | +Location Has Street | |
16 | +Location Has HouseNumber | |
17 | + | |
18 | +Location Prtc Service | |
19 | +Location Attr GenericDescription | |
20 | +Location Attr Attitude | |
21 | + | |
22 | +Division Has OrganizationType | |
23 | +Division Has OrganizationName | |
24 | +Division Has Location | |
25 | +Division Has Rating | |
26 | + | |
27 | +Person Attr GenericDescription | |
28 | +Person Attr PersonDescription | |
29 | +Person Has Profession | |
30 | +#zawód hydraulik | |
31 | +Person Has FirstName | |
32 | +#imię Jan | |
33 | +Person Has LastName | |
34 | +#nazwisko Kowalski | |
35 | +Profession Has ProfessionParam | |
36 | +ProfessionParam Attr ProfessionParamDescription | |
37 | + | |
38 | +Name Prtc Person | |
39 | + | |
40 | +Service Doer Person | |
41 | +Service Doer Division | |
42 | +Service Param ServiceParam | |
43 | +Service Param Animal | |
44 | +Service Goal Service | |
45 | +Service Has Time | |
46 | +Service Has Location | |
47 | +Service SideAction Service | |
48 | +Service Attr ServiceDescription | |
49 | +Service Has Confirmation | |
50 | +Service Has Price | |
51 | + | |
52 | +ServiceParam Prtc Service | |
53 | +ServiceParam Attr ServiceParamDescription | |
54 | +ServiceParam Attr GenericDescription | |
55 | +ServiceParam Prtc Instance | |
56 | + | |
57 | +Animal Attr GenericDescription | |
58 | + | |
59 | +OrganizationType Attr OrganizationTypeDescription | |
60 | +OrganizationType Param OrganizationTypeParam | |
61 | + | |
62 | +Instance Has Service | |
63 | +Instance Doer Person | |
64 | +Instance Doer Division | |
65 | +Instance Has Time | |
66 | +Instance Has Location | |
67 | +Instance Has Price | |
68 | +Instance Client Name | |
69 | +Instance Has Confirmation | |
70 | +Instance Status InstanceStatus | |
71 | +Instance Attr GenericDescription | |
72 | + | |
73 | +#FIXME: czy nie lepiej byłoby: Price Measure Amount | |
74 | +Price Count Number | |
75 | +Price Attr RateDescription | |
76 | + | |
77 | +Rating Prtc Person | |
78 | +#FIXME: w poniższym trzeba zmienić relację lub typ | |
79 | +Rating Measure Measure | |
80 | +Rating Attr GenericDescription | |
81 | + | |
82 | +Confirmation Thme Instance | |
83 | +Confirmation Attr ConfirmationDescription | |
84 | + | |
85 | +Reminder Thme Instance | |
86 | + | |
87 | +Action Agnt Person | |
88 | +Action Doer Person | |
89 | +Action Client Person | |
90 | +Action Client Animal | |
91 | +Action Thme Instance | |
92 | +Action Thme Service | |
93 | +Action Thme Confirmation | |
94 | +Action Doer Division | |
95 | +Action Has Time | |
96 | +Action Has Location | |
97 | +Action Thme Location | |
98 | +Action Thme Time | |
99 | +Action Thme ServiceParam | |
100 | +Action Thme Division | |
101 | +Action Thme Name | |
102 | +Action Thme Rating | |
103 | +Action Thme Email | |
104 | +Action Thme Reminder | |
105 | +Action Goal Instance | |
106 | + | |
107 | + Service | |
108 | + | |
109 | +Attitude Thme Situation | |
110 | +#FIXME: poniższe wymaga przemyślena użycia kontekstów | |
111 | +Attitude Thme Action | |
112 | +Attitude Thme Instance | |
113 | +Attitude Thme Confirmation | |
114 | +Attitude Thme Action | |
115 | +Attitude Agnt Person | |
116 | +Attitude Agnt2 Person | |
117 | +Attitude Manr GenericDescription | |
118 | + | |
119 | +MentalState Expr Person | |
120 | +MentalState Thme Time | |
121 | +MentalState Thme Instance | |
122 | +MentalState Thme Rating | |
123 | +MentalState Thme ServiceParam | |
124 | + | |
125 | +State Agnt Person | |
126 | +State Thme Instance | |
127 | +State Attr GenericDescription | |
128 | +State Thme ServiceParam | |
129 | +State Thme Division | |
130 | + | |
131 | +Situation Next Situation | |
... | ... |
corpora/data/pair_patterns.tab
0 → 100644
1 | +np:$n:$c:$g _:subst:$n:$c:$g -> nummod -> _:num:$n:$c:$g:congr | |
2 | +np:$n:$c:$g _:subst:$n:gen:$g -> nummod -> _:num:$n:$c:$g:rec | |
3 | +adjp:$n:$c:$g _:adj:$n:$c:$g -> nummod -> _:num:$n:$c:$g:congr | |
4 | +adjp:$n:$c:$g _:adj:$n:gen:$g -> nummod -> _:num:$n:$c:$g:rec | |
5 | +np:$n:$c:$g _:brev -> nummod -> _:num:$n:$c:$g | |
6 | + | |
7 | +pp _:subst.ger.num.ppron12.ppron3.adj:_:$c:_ -> case -> _:prep:$c | |
8 | +pp _:siebie:$c -> case -> _:prep:$c | |
9 | +pp _:brev -> case -> _:prep | |
10 | +pp na:prep -> fixed -> _:adv | |
11 | +pp _:adv -> case -> na:prep | |
12 | +comprep:gen _:prep:$c -> fixed -> _:subst:_:$c:_ | |
13 | +pp _:subst.ger.num.ppron12.ppron3.adj:_:$c:_ -> case -> comprep:$c | |
14 | + | |
15 | +ip _:subst:_:nom:_ -> cop -> to:pred | |
16 | +ip _:adj:$n:nom:_ -> cop -> być:fin.bedzie:$n:$p | |
17 | +ip _:adj:$n:nom:$g -> cop -> być:praet:$n:$g | |
18 | +infp _:adj:_:nom:_ -> cop -> być:inf | |
19 | +ip _:subst:$n:inst:_ -> cop -> być:fin.praet.bedzie:$n:_ | |
20 | +infp _:subst:$n:inst:_ -> cop -> być:inf | |
21 | +ip _:subst:$n:nom:_ -> cop -> [ _:pred: -> aux -> _:fin:$n:ter ] | |
22 | +ip _:subst:$n:nom:$g -> cop -> [ _:pred: -> aux -> _:praet:$n:$g ] | |
23 | + | |
24 | +ip _:pred -> aux -> _:fin.praet.bedzie | |
25 | +ip _:inf -> aux -> _:bedzie | |
26 | +ip _:praet:$n:_ -> aux -> _:bedzie:$n:_ | |
27 | +ip _:ppas:$n:nom:_ -> aux:pass -> _:fin.bedzie:$n:_ | |
28 | +ip _:ppas:$n:nom:$g -> aux:pass -> _:praet:$n:$g | |
29 | +ip _:ppas:$n:nom:$g -> aux -> _:praet:$n:$g | |
30 | +infp _:ppas:_:nom:_ -> aux:pass -> _:inf | |
31 | + | |
32 | +cp _:praet.winien.fin.inf.bedzie.imps.pred -> mark -> _:comp | |
33 | +cp [ _:_ -> cop -> _:praet.fin ] -> mark -> _:comp: | |
34 | +cp [ _:_ -> aux:pass -> _:praet.fin.bedzie ] -> mark -> _:comp: | |
35 | +#jako.jak.niż:prep:nom -> rev_mark -> _:subst.adj:_:nom:_ | |
36 | +sent _:subst.adj:_:nom:_ -> mark -> _:prep:nom | |
37 | +sent _:adv:pos -> mark -> _:comp: | |
38 | + | |
39 | +conjp jak:conj -> fixed -> i:conj | |
40 | +comprep:$c wraz:adv -> fixed -> z:prep:$c | |
41 | + | |
42 | +#sent _:fin.praet.winien.imps -> punct -> .:interp | |
43 | +#sent _:fin.praet.winien.imps -> punct -> ?:interp | |
44 | +#sent [ _:_ -> cop -> _:fin.praet.winien ] -> punct -> .:interp | |
45 | +#sent [ _:_ -> cop -> _:fin.praet.winien ] -> punct -> ?:interp | |
46 | + | |
... | ... |
corpora/data/patterns.tab
0 → 100644
1 | +np:$n:$c:$g -> amod -> adjp:$n:$c:$g | |
2 | +COORD np:_:$c:_ -> amod -> adjp:_:$c:_ | |
3 | +_:subst.ger.adj:$n:$c:$g -> det -> _:adj:$n:$c:$g | |
4 | +_:siebie:$c -> amod -> _:adjp:_:$c:_ | |
5 | +_:brev -> amod -> _:adjp:_:_:_ | |
6 | +który:adj:$n:$c:$g -> amod -> adjp:$n:$c:$g | |
7 | + | |
8 | +_:subst.adj:$n:$c:$g -> nummod -> jeden:adj:$n:$c:$g | |
9 | + | |
10 | +_:subst:$n:$c:$g -> flat -> _:adj:$n:$c:$g | |
11 | +_:subst.adj:_:_:_ -> flat -> _:subst:_:gen:_ | |
12 | +_:brev -> flat -> _:subst.adj.brev | |
13 | +_:subst:$n:$c:$g -> flat -> _:subst:$n:$c:$g | |
14 | +_:subst:_:_:_ -> flat -> _:brev | |
15 | + | |
16 | +_:subst.ger:_:_:_ -> nmod -> pp | |
17 | +_:subst:_:_:_ -> nmod -> np:_:gen:_ | |
18 | +_:subst:_:_:_ -> nmod:arg -> np:_:gen:_ | |
19 | +_:subst:$n:$c:$g -> nmod -> _:adj:$n:$c:$g | |
20 | +_:subst:_:_:_ -> nmod -> _:dig | |
21 | +_:subst:_:_:_ -> nmod -> _:subst.num:_:nom:_ | |
22 | +_:subst:_:_:_ -> nmod -> [ _:subst.brev -> nummod -> _:num:_:nom:_ ] | |
23 | +_:brev -> nmod -> _:dig | |
24 | +_:brev -> nmod -> _:brev | |
25 | +_:brev -> nmod -> _:subst:_:gen:_ | |
26 | +_:brev -> nmod:arg -> _:brev | |
27 | +_:brev -> nmod:arg -> _:subst:_:gen:_ | |
28 | +_:subst:_:_:_ -> nmod -> _:brev | |
29 | +_:subst:_:_:_ -> nmod:title -> _:subst:_:nom:_ | |
30 | + | |
31 | +_:subst:_:$c:_ -> appos -> _:subst:_:$c:_ | |
32 | +_:brev -> appos -> _:subst | |
33 | +_:subst -> appos -> _:brev | |
34 | +_:subst:_:_:_ -> appos -> _:subst:_:nom:_ | |
35 | + | |
36 | +_:ppas -> obl:agent -> [ _:subst -> case -> przez:prep:acc ] | |
37 | + | |
38 | + | |
39 | +_:fin.praet.pred -> discourse:comment -> _:qub | |
40 | + | |
41 | +_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie -> cc -> conjp | |
42 | + | |
43 | + | |
44 | + | |
45 | +#jako.niż:conj -> rev_nummod -> _:subst:_:acc:_ | |
46 | + | |
47 | +_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie.ppas -> advmod -> _:qub | |
48 | +_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie.ppas -> advmod -> _:adv | |
49 | +_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie.ppas -> advmod -> [ na:prep -> fixed -> _:adv ] | |
50 | +_:adv.adj.subst.prep.comp.num.qub -> advmod -> _:qub | |
51 | +_:adv.adj -> advmod -> _:adv | |
52 | + | |
53 | +_:fin:$n:ter -> nsubj -> _:subst.ger.adj:$n:nom:_ | |
54 | +COORD _:fin:_:ter -> nsubj -> _:subst.ger.adj:_:nom:_ | |
55 | +_:fin:$n:ter -> nsubj -> [ _:subst.brev -> nummod -> _:num:$n:nom:_ ] | |
56 | +COORD _:fin:_:ter -> nsubj -> [ _:subst.brev -> nummod -> _:num:_:nom:_ ] | |
57 | +_:fin:$n:$p -> nsubj -> _:ppron12.ppron3:$n:nom:_:$p | |
58 | +_:praet.winien:$n:$g -> nsubj -> _:subst.ger.num.adj.ppron12.ppron3:$n:nom:$g | |
59 | +_:praet.winien:$n:$g -> nsubj -> [ _:subst.brev -> nummod -> _:num:$n:nom:$g ] | |
60 | +[ _:_ -> cop -> _:praet.winien:$n:$g ] -> nsubj -> _:subst.ger.num.adj.ppron12.ppron3:$n:nom:$g | |
61 | +[ _:adj:$n:nom:$g -> cop -> _:fin:$n:ter ] -> nsubj -> _:subst.ger.num.adj.ppron3:$n:nom:$g | |
62 | +_:fin:sg:ter -> nsubj -> [ _:subst.brev -> nummod -> _:num:_:nom:_:rec ] | |
63 | +_:praet.winien:sg:n -> nsubj -> [ _:subst.brev -> nummod -> _:num:_:nom:_:rec ] | |
64 | +_:fin.praet.winien:_:_ -> nsubj -> _:brev | |
65 | +[ _:ppas:$n:nom:$g -> aux:pass -> _:praet:$n:$g ] -> nsubj:pass -> np:$n:nom:$g | |
66 | +[ _:ppas:$n:nom:$g -> aux:pass -> _:fin:$n:_ ] -> nsubj:pass -> np:$n:nom:$g | |
67 | +[ _:ppas:$n:nom:$g -> aux:pass -> _:praet:$n:$g ] -> nsubj:pass -> adjp:$n:nom:$g | |
68 | +[ _:ppas:$n:nom:$g -> aux:pass -> _:fin:$n:_ ] -> nsubj:pass -> adjp:$n:nom:$g | |
69 | + | |
70 | +_:fin.praet.winien.inf.pcon.pact.pant.impt.imps.ger -> obj -> np:_:gen.acc:_ | |
71 | +_:fin.praet.winien.inf.pcon.pact.pant.impt.imps.ger -> obj -> adjp:_:gen.acc:_ | |
72 | +_:fin.praet.inf.pcon.pact.pant.impt.imps.ger.ppas -> ccomp:obj -> _:comp | |
73 | +_:fin.praet.inf.pcon.pact.impt.imps.pred.ger.ppas -> iobj -> np:_:nom.gen.dat.acc.inst:_ | |
74 | +_:fin.praet.inf.pcon.pact.impt.imps.pred.ger.ppas -> iobj -> adjp:_:nom.gen.dat.acc.inst:_ | |
75 | +_:fin.praet.inf.pcon.pact.impt.imps.pred.ger.ppas -> iobj -> [ _:subst.brev -> nummod -> _:num:_:gen.acc:_ ] | |
76 | +_:fin.praet.inf.pcon.pact.impt.imps.pred.ger.ppas -> iobj -> _:siebie:gen.dat.acc.inst | |
77 | + | |
78 | +_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie.ger.ppas.adj -> obl -> pp | |
79 | +_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie.ger.ppas.subst -> obl:arg -> pp | |
80 | +_:fin.praet.inf.pcon.pact.impt.imps.pred.ger.ppas -> obl -> np:_:inst:_ | |
81 | +daleko:adv:com -> obl -> _:subst:_:nom:_ | |
82 | +niedaleko:adv:pos -> obl:arg -> np:_:gen:_ | |
83 | + | |
84 | +_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie -> xcomp -> infp | |
85 | +trudno.dobrze.zbyt.łatwo.tyle:adv -> xcomp -> infp | |
86 | +to:subst:sg:_:n -> xcomp -> aby.żeby.by:comp | |
87 | +_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie -> xcomp -> aby.żeby.by.że:comp | |
88 | + | |
89 | +_:fin.praet.inf.pcon.pact.pant.impt.imps.pred.ger.ppas -> ccomp -> _:comp | |
90 | +_:adv.adj.subst -> ccomp -> _:comp | |
91 | +_:fin.praet.inf.impt.pred -> ccomp -> ip | |
92 | + | |
93 | +_:fin.praet.winien.inf.pred -> advcl -> _:pcon.pant | |
94 | +_:fin.praet.winien.inf.impt.pred.ppas.bedzie -> advcl -> _:comp | |
95 | +_:fin -> advcl -> _:fin | |
96 | +_:praet -> advcl -> _:praet | |
97 | +_:adv.adj.subst -> advcl -> _:comp | |
98 | + | |
99 | +_:praet.winien:$n:_ -> compound:aglt -> być:aglt:$n:_ | |
100 | +by:qub -> compound:aglt -> być:aglt | |
101 | +by.gdyby.jakby:comp -> compound:aglt -> być:aglt | |
102 | +_:praet.winien:_:_ -> compound:cnd -> by:qub | |
103 | +_:fin.praet.winien.inf.pcon.pact.ger.pant.impt.imps.pred -> expl:impers -> się:qub | |
104 | +_:fin.praet.winien.inf.pcon.pact.ger.pant.impt.imps.pred -> expl:impers -> siebie:siebie:dat | |
105 | + | |
106 | +#<conll_root>:interp -> root -> sent | |
107 | +<conll_root>:interp -> root -> ip | |
108 | +<conll_root>:interp -> root -> cp | |
109 | +#<conll_root>:interp -> root -> [ _:_ -> cop -> _:praet.winien:$n:$g | punct -> _:interp ] | |
110 | +<conll_root>:interp -> root -> np:_:nom:_ | |
111 | +<conll_root>:interp -> root -> _:brev | |
112 | +#<conll_root>:interp -> root -> _:prep:_ | |
113 | +<conll_root>:interp -> root -> trudno:adv:pos | |
114 | +<conll_root>:interp -> root -> dobrze:adv:com | |
115 | +<conll_root>:interp -> root -> tyle:adv:pos | |
116 | +<conll_root>:interp -> root -> tak:adv:pos | |
117 | +<conll_root>:interp -> root -> dobrze:adv:pos | |
118 | +<conll_root>:interp -> root -> oto:adv:pos | |
119 | +<conll_root>:interp -> root -> łatwo:adv:com | |
120 | +<conll_root>:interp -> root -> wtedy:adv:pos | |
121 | +<conll_root>:interp -> root -> trudno:adv:com | |
122 | +<conll_root>:interp -> root -> przykro:adv:pos | |
123 | +<conll_root>:interp -> root -> podobnie:adv:pos | |
124 | +<conll_root>:interp -> root -> dlatego:adv:pos | |
125 | +<conll_root>:interp -> root -> zwłaszcza:qub | |
126 | +<conll_root>:interp -> root -> notyfikować:ppas:_:nom:_ | |
127 | +<conll_root>:interp -> root -> wykluczyć:ppas:_:nom:_ | |
128 | +<conll_root>:interp -> root -> zmieniać:pact:_:nom:_ | |
129 | +<conll_root>:interp -> root -> uwzględniać:pcon:imperf | |
130 | +<conll_root>:interp -> root -> mieć:pcon:imperf | |
131 | +<conll_root>:interp -> root -> pragnąć:pcon:imperf | |
132 | +<conll_root>:interp -> root -> mówić:pcon:imperf | |
133 | +<conll_root>:interp -> root -> mieć:pcon:imperf | |
134 | + | |
135 | +#_:adj.pcon.fin.comp.impl.ppas -> punct -> ,:interp | |
136 | +_:_ -> punct -> _:interp | |
137 | + | |
138 | +#_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie.ger.ppas.adj -> obl -> [ który:adj -> case -> _:prep ] | |
139 | +#_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie.ger.ppas.subst -> obl:arg -> [ który:adj -> case -> _:prep ] | |
140 | + | |
141 | +#tu by trzeba dodać analizę "który" | |
142 | +_:subst -> acl:relcl -> ip | |
143 | + | |
144 | +[ _:adj -> cop -> _:bedzie ] -> cc -> _:conj | |
145 | +[ _:bedzie -> mark -> _:comp ] -> nsubj -> _:subst | |
146 | +cp -> ccomp -> cp | |
147 | +[ _:inf -> aux -> _:bedzie ] -> nsubj -> _:adj | |
148 | +cp -> ccomp -> cp | |
149 | +pp -> cop -> _:fin | |
150 | +pp -> cop -> _:praet | |
151 | +[ _:subst -> cop -> _:praet ] -> cc -> _:conj | |
152 | +_:adj -> advcl -> cp | |
153 | +_:adj -> obl -> _:subst | |
154 | +_:adv -> aux -> _:praet | |
155 | +_:adv -> obl -> [ _:subst -> nummod -> _:num ] | |
156 | +_:fin -> cc:preconj -> _:conj | |
157 | +_:fin -> ccomp -> [ _:adj -> mark -> _:comp | cop -> _:fin ] | |
158 | +_:fin -> xcomp -> [ _:inf -> mark -> _:comp ] | |
159 | +_:impt -> mark -> _:comp | |
160 | +_:inf -> discourse:comment -> [ _:prep -> fixed -> _:subst ] | |
161 | +_:inf -> obl -> [ _:subst -> mark -> _:prep ] | |
162 | +_:ppas -> obl:agent -> [ _:siebie -> case -> _:prep ] | |
163 | +_:ppron12 -> appos -> _:subst | |
164 | +_:praet -> obl:comp -> cp | |
165 | +_:subst -> compound:aglt -> _:aglt | |
166 | +_:subst -> det -> _:adj | |
167 | +_:subst -> discourse:comment -> _:subst | |
168 | +cp -> advcl -> [ _:inf -> mark -> _:comp ] | |
169 | +pp -> nmod:arg -> _:adj | |
170 | +pp -> nsubj -> _:subst | |
171 | +#[ _:subst -> cop -> [ _:pred -> aux -> _:praet ] ] -> mark -> _:comp | |
172 | +_:adv -> xcomp -> [ _:inf -> mark -> _:comp ] | |
173 | +_:conj -> nmod -> _:subst | |
174 | +_:dig -> case -> _:prep | |
175 | +_:ger -> list -> _:dig | |
176 | +_:ger -> nmod -> _:subst | |
177 | +_:pant -> obl -> pp | |
178 | +_:pant -> obl:arg -> pp | |
179 | +_:ppas -> aux -> _:inf | |
180 | +_:praet -> csubj -> cp | |
181 | +_:subst -> advcl -> cp | |
182 | +#[ _:adj -> mark -> _:comp | cop -> _:fin ] -> obl:arg -> pp | |
183 | +[ _:ppas -> aux -> _:praet ] -> nsubj:pass -> _:adj | |
184 | +cp -> advcl -> cp | |
185 | +pp -> fixed -> _:subst | |
186 | +pp -> nmod -> _:adj | |
187 | +_:adj -> discourse:comment -> _:adv | |
188 | +_:adj -> iobj -> _:subst | |
189 | +_:adj -> obl:comp -> [ _:adj -> mark -> _:prep ] | |
190 | +_:adv -> mark -> _:comp | |
191 | +_:fin -> acl:relcl -> _:fin | |
192 | +_:fin -> discourse:comment -> _:subst | |
193 | +_:fin -> iobj -> _:num | |
194 | +_:interp -> case -> _:prep | |
195 | +_:ppas -> cop -> _:fin | |
196 | +_:ppron12 -> mark -> _:prep | |
197 | +_:praet -> discourse:intj -> _:interj | |
198 | +_:praet -> mark -> _:conj | |
199 | +_:praet -> parataxis -> _:adv | |
200 | +_:subst -> appos -> _:adj | |
201 | +_:subst -> discourse:comment -> _:adv | |
202 | +_:subst -> mark -> _:prep | |
203 | +_:adj -> case -> _:prep | |
204 | +_:fin -> advcl -> [ _:ppas -> mark -> _:comp | aux:pass -> _:fin ] | |
205 | +_:fin -> obl:comp -> [ _:subst -> mark -> _:prep ] | |
206 | +_:ger -> mark -> _:prep | |
207 | +_:inf -> advmod -> [ _:prep -> fixed -> _:subst ] | |
208 | +_:inf -> mark -> _:conj | |
209 | +_:ppas -> cop -> _:praet | |
210 | +_:ppas -> nsubj -> _:subst | |
211 | +_:ppas -> obl:agent -> [ _:ppron3 -> case -> _:prep ] | |
212 | +_:ppron3 -> nummod -> _:num | |
213 | +_:praet -> discourse:comment -> _:subst | |
214 | +_:subst -> advcl -> cp | |
215 | +_:subst -> cop -> [ _:pred -> aux -> _:bedzie ] | |
216 | +_:subst -> nsubj -> _:ppron3 | |
217 | +[ _:adj -> cop -> _:bedzie ] -> mark -> _:comp | |
218 | +[ _:adj -> cop -> _:praet ] -> compound:cnd -> _:qub | |
219 | +pp -> amod -> _:adj | |
220 | +#[ _:subst -> cop -> [ _:pred -> aux -> _:fin ] ] -> cc -> _:conj | |
221 | +_:adj -> discourse:comment -> _:qub | |
222 | +_:adv -> discourse:comment -> _:qub | |
223 | +_:adv -> obl -> _:subst | |
224 | +_:burk -> case -> _:prep | |
225 | +_:conj -> nsubj -> _:subst | |
226 | +_:interp -> cc -> _:conj | |
227 | +_:interp -> fixed -> _:num | |
228 | +_:interp -> root -> pp | |
229 | +_:interp -> root -> _:subst | |
230 | +_:num -> fixed -> _:num | |
231 | +_:num -> nmod -> [ _:ppron3 -> case -> _:prep ] | |
232 | +_:pact -> case -> _:prep | |
233 | +_:pcon -> ccomp -> cp | |
234 | +_:ppas -> case -> _:prep | |
235 | +_:ppas -> obl:agent -> [ _:ppron12 -> case -> _:prep ] | |
236 | +_:praet -> advmod -> _:adj | |
237 | +_:prep -> discourse:comment -> _:adv | |
238 | +_:subst -> nmod -> [ _:subst -> mark -> _:prep ] | |
239 | +[ _:inf -> mark -> _:comp ] -> nsubj -> _:subst | |
240 | +[ _:subst -> cop -> _:pred ] -> obl -> pp | |
241 | +_:adj -> nmod -> _:subst | |
242 | +_:fin -> csubj -> cp | |
243 | +_:inf -> advcl -> cp | |
244 | +_:inf -> ccomp -> cp | |
245 | +_:ppas -> mark -> _:comp | |
246 | +_:prep -> obl -> [ _:subst -> nummod -> _:num ] | |
247 | +_:subst -> obl -> pp | |
248 | +_:subst -> xcomp -> _:inf | |
249 | +[ _:adj -> cop -> _:praet ] -> compound:aglt -> _:aglt | |
250 | +[ _:ppas -> aux:pass -> _:praet ] -> cc -> _:conj | |
251 | +pp -> cc:preconj -> _:conj | |
252 | +pp -> flat -> _:adj | |
253 | +pp -> flat -> _:subst | |
254 | +_:adv -> ccomp -> _:fin | |
255 | +_:brev -> nmod -> pp | |
256 | +_:fin -> mark -> _:conj | |
257 | +_:praet -> mark -> _:adv | |
258 | +_:qub -> cc -> _:conj | |
259 | +_:subst -> discourse:intj -> _:interj | |
260 | +_:subst -> nmod:arg -> _:brev | |
261 | +_:adj -> obl:arg -> [ _:ger -> case -> _:prep ] | |
262 | +_:adv -> fixed -> _:comp | |
263 | +_:adv -> obl:arg -> _:subst | |
264 | +_:brev -> case -> [ _:prep -> fixed -> _:subst ] | |
265 | +_:fin -> ccomp:obj -> cp | |
266 | +_:fin -> discourse:comment -> _:brev | |
267 | +_:fin -> obl -> [ _:subst -> nummod -> _:num ] | |
268 | +_:ger -> nmod:arg -> _:subst | |
269 | +_:inf -> nsubj -> _:subst | |
270 | +_:interp -> nmod:arg -> _:subst | |
271 | +_:interp -> nsubj -> _:subst | |
272 | +_:subst -> acl:relcl -> _:subst | |
273 | +_:subst -> flat -> _:adja | |
274 | +_:subst -> xcomp -> [ _:inf -> mark -> _:comp ] | |
275 | +[ _:pred -> mark -> _:comp ] -> aux -> _:praet | |
276 | +_:fin -> advmod -> _:adj | |
277 | +_:fin -> obl -> [ _:subst -> mark -> _:prep ] | |
278 | +_:subst -> nsubj -> _:adj | |
279 | +[ _:adj -> cop -> _:praet ] -> cc -> _:conj | |
280 | +[ _:ger -> case -> _:prep ] -> obl:agent -> pp | |
281 | +[ _:ppas -> aux:pass -> _:fin ] -> cc -> _:conj | |
282 | +[ _:subst -> cop -> _:fin ] -> cc -> _:conj | |
283 | +[ _:subst -> cop -> _:fin ] -> obl -> pp | |
284 | +_:adj -> acl:relcl -> _:fin | |
285 | +_:adv -> obl:arg -> pp | |
286 | +_:fin -> nsubj -> _:adj | |
287 | +_:inf -> ccomp:obj -> cp | |
288 | +_:inf -> fixed -> _:qub | |
289 | +_:praet -> advcl -> cp | |
290 | +_:prep -> case -> [ _:prep -> fixed -> _:subst ] | |
291 | +[ _:adj -> case -> _:prep ] -> fixed -> _:dig | |
292 | +[ _:subst -> cop -> _:fin ] -> nsubj -> _:ger | |
293 | +[ _:subst -> cop -> _:praet ] -> obl -> pp | |
294 | +_:conj -> advmod -> _:qub | |
295 | +_:fin -> advmod:arg -> _:adv | |
296 | +_:fin -> list -> _:dig | |
297 | +_:impt -> advcl -> cp | |
298 | +_:subst -> fixed -> _:adj | |
299 | +[ _:subst -> cop -> _:praet ] -> advmod -> _:adv | |
300 | +_:fin -> discourse:intj -> _:interj | |
301 | +_:fin -> obj -> _:subst | |
302 | +_:ger -> advmod -> _:qub | |
303 | +_:subst -> flat -> _:dig | |
304 | +cp -> advcl -> cp | |
305 | +cp -> nsubj -> _:subst | |
306 | +_:adj -> acl:relcl -> _:praet | |
307 | +_:inf -> advcl -> [ _:inf -> mark -> _:comp ] | |
308 | +_:inf -> ccomp -> cp | |
309 | +_:praet -> ccomp:obj -> cp | |
310 | +_:praet -> parataxis -> _:impt | |
311 | +_:praet -> parataxis -> _:subst | |
312 | +_:prep -> obl -> pp | |
313 | +_:subst -> case -> _:prep | |
314 | +cp -> nsubj -> _:subst | |
315 | +_:ppas -> aux -> _:fin | |
316 | +pp -> xcomp -> [ _:inf -> mark -> _:comp ] | |
317 | +_:adj -> amod -> _:adj | |
318 | +_:adj -> cop -> _:inf | |
319 | +_:num -> cc -> _:conj | |
320 | +_:num -> obl -> _:brev | |
321 | +_:praet -> obl:comp -> [ _:subst -> mark -> _:prep ] | |
322 | +_:prep -> flat -> _:subst | |
323 | +_:subst -> nmod:arg -> _:subst | |
324 | +[ _:adj -> cop -> _:fin ] -> obl:arg -> pp | |
325 | +_:adv -> mark -> _:conj | |
326 | +_:fin -> mark -> _:adv | |
327 | +_:interp -> fixed -> _:dig | |
328 | +_:subst -> parataxis -> _:fin | |
329 | +#[ _:subst -> cop -> [ _:pred -> aux -> _:fin ] ] -> mark -> _:comp | |
330 | +[ _:subst -> nummod -> _:num ] -> cc -> _:conj | |
331 | +_:adj -> cop -> _:pred | |
332 | +_:adv -> ccomp -> cp | |
333 | +_:adv -> mark -> _:adv | |
334 | +_:prep -> discourse:comment -> _:brev | |
335 | +_:adv -> case -> _:prep | |
336 | +_:ppas -> nsubj:pass -> _:subst | |
337 | +_:subst -> discourse:comment -> pp | |
338 | +pp -> advmod -> _:adv | |
339 | +[ _:subst -> mark -> _:prep ] -> cc -> _:conj | |
340 | +_:adj -> fixed -> _:dig | |
341 | +[ _:ger -> case -> _:prep ] -> cc -> _:conj | |
342 | +[ _:subst -> cop -> _:bedzie ] -> nsubj -> _:subst | |
343 | +_:prep -> fixed -> _:adj | |
344 | +_:subst -> cop -> _:inf | |
345 | +_:subst -> list -> _:dig | |
346 | +_:praet -> discourse:comment -> pp | |
347 | +_:praet -> vocative -> _:subst | |
348 | +_:subst -> ccomp -> cp | |
349 | +_:subst -> nmod:arg -> _:adj | |
350 | +[ _:inf -> mark -> _:comp ] -> aux -> _:bedzie | |
351 | +pp -> nmod -> _:subst | |
352 | +_:adv -> acl:relcl -> _:praet | |
353 | +_:praet -> obl -> [ _:subst -> nummod -> _:num ] | |
354 | +pp -> ccomp -> cp | |
355 | +_:adj -> mark -> _:conj | |
356 | +_:adv -> acl:relcl -> _:fin | |
357 | +_:fin -> advmod -> [ _:prep -> fixed -> _:subst ] | |
358 | +_:qub -> fixed -> _:adv | |
359 | +[ _:ppas -> aux:pass -> _:bedzie ] -> nsubj:pass -> _:subst | |
360 | +[ _:subst -> cop -> _:pred ] -> cc -> _:conj | |
361 | +_:bedzie -> nsubj -> _:subst | |
362 | +_:fin -> discourse:comment -> pp | |
363 | +_:fin -> ccomp:obj -> cp | |
364 | +_:ppron3 -> advmod -> _:qub | |
365 | +_:prep -> cc -> _:conj | |
366 | +pp -> cc -> _:conj | |
367 | +#[ _:subst -> mark -> _:comp | cop -> _:fin ] -> nsubj -> _:subst | |
368 | +_:fin -> compound:imp -> _:qub | |
369 | +_:brev -> amod -> _:adj | |
370 | +_:ger -> obl:agent -> pp | |
371 | +_:num -> nmod -> pp | |
372 | +_:praet -> obl -> [ _:subst -> mark -> _:prep ] | |
373 | +_:subst -> fixed -> _:subst | |
374 | +[ _:ger -> case -> _:prep ] -> nsubj -> _:subst | |
375 | +_:adv -> fixed -> _:qub | |
376 | +_:praet -> advmod -> [ _:prep -> fixed -> _:subst ] | |
377 | +_:praet -> discourse:comment -> [ _:prep -> fixed -> _:subst ] | |
378 | +_:subst -> cop -> _:pact | |
379 | +_:ppron12 -> advmod -> _:qub | |
380 | +_:prep -> discourse:comment -> [ _:prep -> fixed -> _:subst ] | |
381 | +cp -> advcl -> cp | |
382 | +_:adv -> ccomp -> cp | |
383 | +_:adv -> obl:comp -> [ _:subst -> mark -> _:prep ] | |
384 | +pp -> mark -> _:conj | |
385 | +_:interp -> fixed -> _:subst | |
386 | +pp -> ccomp -> cp | |
387 | +pp -> obl -> pp | |
388 | +[ _:subst -> cop -> _:pred ] -> mark -> _:comp | |
389 | +_:adj -> nmod:arg -> _:subst | |
390 | +pp -> mark -> _:comp | |
391 | +_:fin -> obl -> _:subst | |
392 | +_:adv -> obl -> pp | |
393 | +[ _:adj -> cop -> _:bedzie ] -> nsubj -> _:subst | |
394 | +_:fin -> advcl -> cp | |
395 | +_:fin -> discourse:comment -> _:adv | |
396 | +_:interp -> nummod -> _:num | |
397 | +_:subst -> discourse:comment -> [ _:prep -> fixed -> _:subst ] | |
398 | +_:subst -> nmod -> _:adj | |
399 | +[ _:inf -> aux -> _:bedzie ] -> nsubj -> _:subst | |
400 | +[ _:brev -> case -> _:prep ] -> amod -> _:adj | |
401 | +_:ppas -> cc -> _:conj | |
402 | +_:subst -> cc:preconj -> _:conj | |
403 | +[ _:brev -> case -> _:prep ] -> nummod -> _:num | |
404 | +_:praet -> advcl -> [ _:inf -> mark -> _:comp ] | |
405 | +_:subst -> ccomp -> cp | |
406 | +_:subst -> nmod -> _:subst | |
407 | +_:num -> fixed -> _:subst | |
408 | +[ _:adj -> cop -> _:fin ] -> cc -> _:conj | |
409 | +_:adjp -> case -> _:prep | |
410 | +_:adv -> fixed -> _:adv | |
411 | +_:impt -> vocative -> _:subst | |
412 | +_:conj -> fixed -> _:qub | |
413 | +_:qub -> fixed -> _:qub | |
414 | +pp -> nummod -> _:num | |
415 | +_:burk -> flat -> _:subst | |
416 | +cp -> aux -> _:bedzie | |
417 | +_:subst -> flat -> _:subst | |
418 | +_:subst -> amod -> _:adj | |
419 | +pp -> amod -> _:adja | |
420 | +_:fin -> advcl -> [ _:inf -> mark -> _:comp ] | |
421 | +_:fin -> discourse:comment -> [ _:prep -> fixed -> _:subst ] | |
422 | +_:praet -> parataxis -> _:praet | |
423 | +_:subst -> discourse:comment -> _:brev | |
424 | +_:adj -> cop -> _:praet | |
425 | +_:praet -> ccomp -> cp | |
426 | +_:adj -> mark -> _:comp | |
427 | +_:praet -> ccomp -> cp | |
428 | +_:subst -> cop -> _:praet | |
429 | +_:num -> advmod -> _:adv | |
430 | +pp -> mark -> _:adv | |
431 | +_:adj -> obl:comp -> [ _:subst -> mark -> _:prep ] | |
432 | +_:praet -> ccomp:obj -> cp | |
433 | +_:ger -> cc -> _:conj | |
434 | +_:praet -> discourse:comment -> _:adv | |
435 | +_:praet -> obl -> _:subst | |
436 | +_:adj -> cop -> _:fin | |
437 | +_:fin -> nsubj -> _:subst | |
438 | +_:fin -> parataxis -> _:praet | |
439 | +_:adv -> cc -> _:conj | |
440 | +_:subst -> nummod -> _:num | |
441 | +_:fin -> vocative -> _:subst | |
442 | +_:fin -> parataxis -> _:fin | |
443 | +_:interp -> root -> _:inf | |
444 | +_:subst -> obl -> pp | |
445 | +_:adj -> obl:arg -> pp | |
446 | +_:adj -> fixed -> _:adj | |
447 | +_:conj -> fixed -> _:conj | |
448 | +_:subst -> cop -> _:fin | |
449 | +_:prep -> fixed -> _:adjp | |
450 | +_:prep -> fixed -> _:adv | |
451 | +_:fin -> ccomp -> cp | |
452 | +_:subst -> amod -> _:adja | |
453 | +_:subst -> mark -> _:adv | |
454 | +_:praet -> nsubj -> _:subst | |
455 | +_:subst -> nsubj -> _:subst | |
456 | +[ _:subst -> cop -> _:pred ] -> nsubj -> _:subst | |
457 | +_:num -> fixed -> _:brev | |
458 | +_:subst -> mark -> _:comp | |
459 | +_:subst -> advmod -> _:adv | |
460 | +_:praet -> advcl -> cp | |
461 | +_:ger -> nsubj -> _:subst | |
462 | +_:prep -> advmod -> _:adv | |
463 | +_:adj -> nsubj -> _:subst | |
464 | +_:fin -> ccomp -> cp | |
465 | +_:praet -> parataxis -> _:fin | |
466 | +_:fin -> advcl -> cp | |
467 | +[ _:subst -> cop -> _:fin ] -> nsubj -> _:subst | |
468 | +_:adv -> fixed -> _:prep | |
469 | +_:adj -> cc -> _:conj | |
470 | +_:subst -> mark -> _:conj | |
471 | +[ _:subst -> nummod -> _:num ] -> case -> _:prep | |
472 | +_:subst -> fixed -> _:prep | |
473 | +pp -> cc -> _:conj | |
474 | +pp -> nummod -> _:num | |
475 | +_:subst -> cc -> _:conj | |
476 | + | |
... | ... |
corpora/data/proper_cats.tab
0 → 100644
1 | +rok Year | |
2 | +miesiąc Month | |
3 | +dzień Day | |
4 | +dzień tygodnia WeekDay | |
5 | +godzina Hour | |
6 | +miasto stołeczne, na prawach powiatu Town | |
7 | +gmina miejska, miasto stołeczne Town | |
8 | +miasto Town | |
9 | +wieś Town | |
10 | +przysiółek Town | |
11 | +osada leśna Town | |
12 | +osada Town | |
13 | +kolonia Town | |
14 | +dzielnica Quarter | |
15 | +część miejscowości Quarter | |
16 | +część miasta Quarter | |
17 | +aleje Street | |
18 | +aleja Street | |
19 | +ulica Street | |
20 | +most Street | |
21 | +plac Street | |
22 | +rondo Street | |
23 | +wiadukt Street | |
24 | +szosa Street | |
25 | +trakt Street | |
26 | +trasa Street | |
27 | +building-number HouseNumber | |
28 | +geograficzna | |
29 | +obj-id | |
30 | +TimeOfDay TimeOfDay | |
31 | +data Day | |
32 | +nazwisko LastName | |
33 | +imię FirstName | |
34 | + | |
35 | + | |
36 | + | |
... | ... |
corpora/data/sem-lexicon.dic
0 → 100644
1 | + | |
2 | +np: | |
3 | + /(1+np*Hour*number*case*gender*person):Apoz | |
4 | + |(1+np*Hour*sg*nom*f*ter+prepnp*Hour*sem*T*T):PHas | |
5 | + /(1+np*TimeOfDay*sg*inst*T*ter+np*TimeOfDay*sg*loc*T*ter+prepnp*TimeOfDay*sem*T*T+advp*TimeOfDay*T):PHas | |
6 | + /(1+np*Day*number*case*gender*person):Apoz | |
7 | +# |(1+np*Day*number*case*gender*person+adj*number*case*gender):unk | |
8 | + |(1+np*Day*sg*gen*m3*ter):PHas | |
9 | + /(1+np*Month*sg*gen*T*T):PHas | |
10 | + |(1+np*Year*number*case*gender*person):Apoz | |
11 | + /(1+np*Year*sg*gen*T*T):PHas | |
12 | + |(1+np*HouseNumber*sg*nom*T*T):PHas | |
13 | + |(1+np*Street*T*nom*T*ter+prepnp*Street*sem*T*T):PHas | |
14 | + |(1+np*Quarter*T*nom*T*ter+prepnp*Quarter*sem*T*T):PHas | |
15 | + /(1+np*Person*number*case*T*person):PApoz | |
16 | +# |(1+np*FirstName*number*case*gender*person):unk | |
17 | + \(1+num*number*case*gender*person*congr*nsem+num*number*case*gender*person*rec*nsem):Count \(1+qub):adjunct /(1+inclusion):adjunct | |
18 | + \(1+measure*unumber*ucase*ugender*uperson):Measure | |
19 | + /(1+date+day+day-month+date-interval+day-interval+day-month-interval+year+year-interval+hour+hour-minute+hour-interval+hour-minute-interval+roman+roman-interval):Coref | |
20 | + |(1+year):unk /(1+obj-id):Coref | |
21 | + |(1+adjp*X*number*case*gender):unk | |
22 | + \(1+nie+advp*X*T):unk | |
23 | + /(1+np*Currency*number*gen*gender*person):Has | |
24 | + /(1+np*Currency*number*case*gender*person):Has | |
25 | + /(1+np*cat*number*case*gender*person):Apoz | |
26 | + /(1+np*cat*T*case*T*T):adjunct | |
27 | + \(1+np*cat*T*case*T*T):adjunct | |
28 | + \(1+np*Measure*unumber*ucase*ugender*uperson):Measure | |
29 | + |(1+adjp*TimeDescription*number*case*gender):unk | |
30 | + |(1+adjp*GenericDescription*number*case*gender):unk | |
31 | + |(1+advp*GenericDescription*T):unk | |
32 | + |(1+adjp*ServiceDescription*number*case*gender):unk | |
33 | + |(1+qub*T):unk | |
34 | + |(1+qub*OpAdNum):unk; | |
35 | + | |
36 | +num: \(1+qub):adjunct /(1+inclusion):adjunct; | |
37 | + | |
38 | +measure: | |
39 | + \(1+num*number*case*gender*person*congr*count+num*number*case*gender*person*rec*count):adjunct \(1+qub):adjunct /(1+inclusion):adjunct; | |
40 | + | |
41 | +#prepnp: \(1+advp*T):adjunct /(np*T*case*T*T+day-month+day+year+date+hour+hour-minute):unk \(1+qub):adjunct /(1+inclusion):adjunct; | |
42 | +#prepadjp: \(1+advp*T):adjunct /(adjp*T*case*T+adjp*sg*dat*m1+adjp*T*postp*T+adjp*sg*nom*f+advp*T):unk \(1+qub):adjunct /(1+inclusion):adjunct; | |
43 | +#compar: \(1+advp*T):adjunct /(np*T*case*T*T+prepnp*T*T+prepadjp*T*T):unk \(1+qub):adjunct /(1+inclusion):adjunct; | |
44 | +prepnp: | |
45 | + /(1+prepnp*coerced*sem*T*T):Merge | |
46 | + \(1+nie+advp*GenericDescription*T):adjunct | |
47 | + \(1+advp*Attitude*T):adjunct | |
48 | + \(1+qub):adjunct /(1+inclusion):adjunct | |
49 | + /(1+np*cat*T*case*T*T):CORE | |
50 | + \(1+prepnp*Time*sem*plemma*case):unk | |
51 | + /(1+advp*cat*T):CORE; | |
52 | + | |
53 | +prepadjp: \(1+advp*T*T):adjunct \(1+qub):adjunct /(1+inclusion):adjunct; | |
54 | +compar: | |
55 | + /(1+prepnp*coerced*sem*T*T):Merge | |
56 | + \(1+advp*T*T):adjunct | |
57 | + \(1+qub):adjunct | |
58 | + /(1+inclusion):adjunct; | |
59 | + | |
60 | +colonp: /(np*cat*T*nom*T*ter+prepnp*cat*sem*T*T+advp*cat*T):CORE; | |
61 | + | |
62 | +adjp: | |
63 | + \(1+qub):adjunct | |
64 | + /(1+inclusion):adjunct | |
65 | + \(1+adja):unk | |
66 | + /(1+adjp*cat*number*case*gender):adjunct | |
67 | + \(1+adjp*cat*number*case*gender):adjunct | |
68 | + \(1+jak):unk; | |
69 | + | |
70 | +adja: /hyphen:nosem; | |
71 | + | |
72 | +advp: | |
73 | + \(1+qub):adjunct | |
74 | + /(1+inclusion):adjunct | |
75 | + \(1+adja):unk | |
76 | + |(1+np*Hour*sg*nom*f*ter+prepnp*Hour*sem*T*T):unk | |
77 | + |(1+np*TimeOfDay*sg*inst*T*ter+np*TimeOfDay*sg*loc*T*ter+prepnp*TimeOfDay*sem*T*T+advp*TimeOfDay*T):unk | |
78 | + /(1+compar*coerced*niż*T):unk | |
79 | + \(1+nie+advp*X*T):unk | |
80 | + \(1+prepnp*cat*sem*T*T):adjunct | |
81 | + /(1+prepnp*cat*sem*T*T):adjunct | |
82 | + \(1+advp*cat*T):adjunct | |
83 | + /(1+advp*cat*T):adjunct | |
84 | + \(1+jak):unk; | |
85 | + | |
86 | +#FIXME: sprawdzić czy 'or' czy 'or2' | |
87 | +ip: | |
88 | + |(1+adjp*Interrogative*number*nom*gender):unk | |
89 | + |(1+np*Location*number*nom*gender*person):unk | |
90 | + /(1+int):unk | |
91 | + \(1+qub*GenericDescription):adjunct | |
92 | + \(1+qub*T):adjunct | |
93 | + /(1+inclusion):adjunct | |
94 | + \(1+nie):nosem | |
95 | + |(1+aux-imp):nosem | |
96 | + |(1+aux-fut*number*gender*person+aux-past*number*gender*person):nosem | |
97 | + |(1+aglt*number*person):nosem | |
98 | + |(1+by):nosem | |
99 | + /(1+or):adjunct | |
100 | + \(1+ip*cat*number*gender*person):unk; | |
101 | + | |
102 | +aux-fut: null; | |
103 | +aux-past: null; | |
104 | +aglt: null; | |
105 | + | |
106 | +infp: | |
107 | + /(1+int):unk | |
108 | + \(1+qub*GenericDescription):adjunct | |
109 | + \(1+qub*T):adjunct | |
110 | + /(1+inclusion):adjunct | |
111 | + \(1+nie):nosem | |
112 | + /(1+infp*cat*T):adjunct | |
113 | + \(1+infp*cat*T):adjunct; | |
114 | +padvp: \(1+qub):adjunct /(1+inclusion):adjunct \(1+nie):nosem; | |
115 | + | |
116 | +cp: | |
117 | + /(1+ip*T*T*T*T+infp*T*T):CORE | |
118 | + /(1+ip*cat*T*T*T+infp*cat*T):CORE | |
119 | + /(1+ip*Attitude*T*T*T):CORE | |
120 | + /(1+ip*State*number*gender*T):CORE | |
121 | + /(1+ip*Attitude*number*gender*T):CORE | |
122 | + /(1+np*ncat*number*case*gender*nperson):CORE | |
123 | + \(1+cp*int*plemma):unk; | |
124 | + | |
125 | +ncp: | |
126 | + \(1+qub):adjunct | |
127 | + /(1+inclusion):adjunct | |
128 | + /cp*cat*ctype*plemma:unk; | |
129 | + | |
130 | +prepncp: | |
131 | + /ncp*cat*T*case*T*T*ctype*plemma:CORE; | |
132 | + | |
133 | +#lemma=i|lub|czy|bądź,pos=conj: | |
134 | +# QUANT[number=all_numbers,gender=all_genders,person=all_persons] | |
135 | +# (ip*number*gender*person/ip*T*T*T)\ip*T*T*T; | |
136 | +#lemma=,|i|lub|czy|bądź,pos=conj: (advp*mod/prepnp*T*T)\prepnp*T*T; | |
137 | +#lemma=,|i|lub|czy|bądź,pos=conj: QUANT[mode=0] (advp*mode/advp*mode)\prepnp*T*T; | |
138 | +#lemma=,|i|lub|czy|bądź,pos=conj: QUANT[mode=0] (advp*mode/prepnp*T*T)\advp*mode; | |
139 | +#lemma=,|i|lub|czy|bądź,pos=conj: (advp*mod/advp*T)\advp*T; #FIXME: przydałaby się wersja zachowująca mode | |
140 | +#lemma=,|i|lub|czy|bądź,pos=conj: | |
141 | +# QUANT[plemma=0,case=all_cases] | |
142 | +# (prepnp*plemma*case/prepnp*plemma*case)\prepnp*plemma*case; | |
143 | +#lemma=,|i|lub|czy|bądź,pos=conj: | |
144 | +# QUANT[number=all_numbers,case=all_cases,gender=all_genders,person=all_persons] | |
145 | +# (np*number*case*gender*person/np*T*case*T*T)\np*T*case*T*T; | |
146 | +#lemma=,|i|lub|czy|bądź,pos=conj: | |
147 | +# QUANT[number=all_numbers,case=all_cases,gender=all_genders] | |
148 | +# (adjp*number*case*gender/adjp*number*case*gender)\adjp*number*case*gender; | |
149 | + | |
150 | +jak: null; | |
151 | + | |
152 | +lex-się-qub: null; | |
153 | +nie: null; | |
154 | +by: null; | |
155 | +aux-imp: null; | |
156 | +qub: null; | |
157 | +interj: null; | |
158 | +sinterj: null; | |
159 | +hyphen: null; | |
160 | +int: null; | |
161 | +#lemma=„,pos=interp: QUANT[number=0,case=0,gender=0,person=0] (np*number*case*gender*person/rquot)/np*number*case*gender*person; #SetAttr("QUOT",Val "+",Var "x" | |
162 | +#lemma=«,pos=interp: QUANT[number=0,case=0,gender=0,person=0] (np*number*case*gender*person/rquot2)/np*number*case*gender*person; #SetAttr("QUOT",Val "+",Var "x" | |
163 | +#lemma=»,pos=interp: QUANT[number=0,case=0,gender=0,person=0] (np*number*case*gender*person/rquot3)/np*number*case*gender*person; #SetAttr("QUOT",Val "+",Var "x" | |
164 | +rquot: null; | |
165 | +rquot2: null; | |
166 | +rquot3: null; | |
167 | +#lemma=(,pos=interp: (inclusion/rparen)/(np*T*T*T*T+ip*T*T*T+adjp*T*T*T+prepnp*T*T); #SetAttr("INCLUSION",Val "+", | |
168 | +#lemma=[,pos=interp: (inclusion/rparen2)/(np*T*T*T*T+ip*T*T*T+adjp*T*T*T+prepnp*T*T); #SetAttr("INCLUSION",Val "+", | |
169 | +rparen: null; | |
170 | +rparen2: null; | |
171 | + | |
172 | +intp: | |
173 | + \(ip*cat*T*T*T+np*cat*T*nom*T*T+cp*cat*int*T):CORE; | |
174 | + | |
175 | +mp: | |
176 | + /(1+ip*cat*T*T*T+cp*cat*int*T+intp*cat+interj*cat):unk | |
177 | + /(1+np*cat*T*nom*T*T+intp*cat):unk; | |
178 | + | |
179 | + | |
180 | +s: | |
181 | + \?(ip*T*T*T*T+cp*T*int*T+cp*T*sub*T+mp*T+intp*T+interj*T+sinterj*T):null | |
182 | + \?(mp*Time*T*T+intp*Time+np*Time*sg*T*T*ter+prepnp*Time*sem*T*T+advp*Time*T+interj*T+sinterj*T):adjunct | |
183 | + \(1+prepnp*Time*nosem*na*acc):adjunct | |
184 | + \?(mp*Telephone*T*T+intp*Telephone+np*Telephone*sg*nom*T*ter+np*Email*sg*nom*T*ter):adjunct | |
185 | + \?(mp*Price*T*T+intp*Price+np*Price*sg*nom*T*ter+prepnp*Price*sem*T*T+interj*T+sinterj*T):adjunct | |
186 | + \?(mp*Rating*T*T+intp*Rating+np*Rating*T*nom*T*ter+prepnp*Rating*sem*T*T+interj*T+sinterj*T):adjunct | |
187 | + \(1+mp*Person*T*T+intp*Person+np*Person*sg*nom*T*ter):adjunct | |
188 | + \?(1mp*Location*T*T+intp*Location+np*Location*T*T*T*ter+prepnp*Location*sem*T*T+advp*Location*T+interj*T+sinterj*T):adjunct | |
189 | + \(1+mp*Service*T*T+intp*Service+np*Service*T*nom*T*ter):adjunct | |
190 | + \(1+prepnp*Name*nosem*na*acc):adjunct | |
191 | + \(1+mp*Animal*T*T+intp*Animal+np*Animal*sg*nom*T*ter+prepnp*Animal*nosem*z*inst):adjunct | |
192 | + \(1+advp*Confirmation*T):adjunct | |
193 | + \(1+adjp*RateDescription*T*T*T):adjunct; | |
194 | +# |?(fragment):null; | |
195 | + | |
196 | +<sentence>: /?s:unk; | |
197 | +<paragraph>: \?<sentence>:unk; | |
198 | + | |
199 | +<root>: /(1+<paragraph>):unk /(1+s):unk /(1+<speaker-end>):unk /(1+or):unk /(1+np*T*nom*T*T):unk /(1+ip*T*T*T):unk; | |
200 | +<merge>: |?(fragment):unk; | |
201 | +<conll_root>: /(ip*T*T*T+cp*int*T+np*sg*voc*T*T+interj):unk; | |
202 | + | |
203 | +pro: null; | |
204 | + | |
205 | +or: null; | |
206 | +<colon>: \<speaker>:unk /(1+<squery>):unk; | |
207 | +or2: \?(ip*T*T*T*T+cp*int*T+np*sg*voc*T*T+interj):unk; | |
208 | +<speaker-end>: null | |
... | ... |