Commit 7b958d71f96e5566a8099ce5afa9b62f3d325e28

Authored by Wojciech Jaworski
1 parent d6fcfbc6

analiza banku drzew zależnościowych

Too many changes to show.

To preserve performance only 23 of 53 files are displayed.

LCGparser/ENIAM_LCGchart.ml
... ... @@ -250,8 +250,9 @@ let is_dep_parsed = function
250 250 [] -> false
251 251 | [Bracket(false,false,Tensor[Atom "<conll_root>"]),_] -> true
252 252 | [Bracket(false,false,Imp(Tensor[Atom("<conll_root>")],Forward,Maybe _)),sem]-> true
253   - | [t,_] -> print_endline @@ ENIAM_LCGstringOf.grammar_symbol_prime t; failwith "is_dep_parsed"
254   - | l -> failwith ("is_dep_parsed " ^ (String.concat " " @@ List.map (fun x -> ENIAM_LCGstringOf.grammar_symbol 0 @@ fst x) l))
  253 + | _ -> false
  254 + (* | [t,_] -> print_endline @@ ENIAM_LCGstringOf.grammar_symbol_prime t; failwith "is_dep_parsed"
  255 + | l -> failwith ("is_dep_parsed " ^ (String.concat " " @@ List.map (fun x -> ENIAM_LCGstringOf.grammar_symbol 0 @@ fst x) l)) *)
255 256 (* | l -> failwith ("is_dep_parsed " ^ (string_of_int @@ List.length l))
256 257 *)
257 258 let get_parsed_term chart =
... ...
compile.sh
1 1 #!/bin/bash
2 2  
3   -cd xt
4   -make clean
5   -sudo make install
6   -make clean
7   -cd ..
8   -
9 3 cd tokenizer
10 4 make clean
11 5 sudo make install
... ... @@ -45,6 +39,12 @@ sudo make install
45 39 make clean
46 40 cd ..
47 41  
  42 +cd xt
  43 +make clean
  44 +sudo make install
  45 +make clean
  46 +cd ..
  47 +
48 48 cd lexSemantics
49 49 make clean
50 50 sudo make install
... ...
corpora/CONLL2.ml 0 → 100644
  1 +(*
  2 + * ENIAMcorpora is a library that integrates ENIAM with corpora in CONLL format
  3 + * Copyright (C) 2016 Daniel Oklesinski <oklesinski dot daniel atSPAMfree gmail dot com>
  4 + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  5 + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  6 + *
  7 + * This library is free software: you can redistribute it and/or modify
  8 + * it under the terms of the GNU Lesser General Public License as published by
  9 + * the Free Software Foundation, either version 3 of the License, or
  10 + * (at your option) any later version.
  11 + *
  12 + * This library is distributed in the hope that it will be useful,
  13 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 + * GNU Lesser General Public License for more details.
  16 + *
  17 + * You should have received a copy of the GNU Lesser General Public License
  18 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
  19 + *)
  20 +
  21 +open Xstd
  22 +open ENIAMsubsyntaxTypes
  23 +open ENIAMtokenizerTypes
  24 +
  25 +exception Comment_line
  26 +exception Empty_line
  27 +exception Empty_sentence
  28 +exception Sent_id of string
  29 +exception Raw_text of string
  30 +exception Orig of string
  31 +exception Interval_id of int
  32 +
  33 +let load_token beg compound in_channel =
  34 + let n_token id orth beg lemma interp sl sem sp =
  35 + let sp = match sp with
  36 + "_" -> if compound > 0 then 0 else 100
  37 + | "SpaceAfter=No" -> 0
  38 + | _ -> failwith ("load_token sp: " ^ sp) in
  39 + let sem = match sem with
  40 + "_" -> ""
  41 + | _ -> sem in
  42 + let len = (Xlist.size (Xunicode.utf8_chars_of_utf8_string orth)) * 100 in
  43 + let next = beg+len+sp in
  44 + let id = try int_of_string id with _ ->
  45 + let len = match Xstring.split "-" id with
  46 + [a;b] -> (try int_of_string b - int_of_string a with _ -> failwith "load_token: interval id")
  47 + | _ -> failwith "load_token: interval id" in
  48 + raise (Interval_id len) in
  49 + let pos,tags = match ENIAMtokens.parse_postags interp with [x] -> x | _ -> failwith "n_token" in
  50 + {empty_token_env with orth = orth; beg=beg; len=len; next=next;
  51 + token = Lemma(lemma,pos,[tags])}, next, id, sl, sem in
  52 + let line = input_line in_channel in
  53 + if line = ""
  54 + then raise Empty_line
  55 + else if line.[0] = '#'
  56 + then
  57 + if Xstring.check_prefix "# sent_id = " line then
  58 + raise (Sent_id(Xstring.cut_prefix "# sent_id = " line)) else
  59 + if Xstring.check_prefix "# text = " line then
  60 + raise (Raw_text(Xstring.cut_prefix "# text = " line)) else
  61 + if Xstring.check_prefix "# orig_file_sentence = " line then
  62 + raise (Orig(Xstring.cut_prefix "# orig_file_sentence = " line)) else
  63 + raise Comment_line
  64 + else
  65 + match Xstring.split "\t" line with
  66 + [id; orth; lemma; ucat; interp; uinterp; super; label; "_"; sp] ->
  67 + let super = if super = "_" then 0 else try int_of_string super with _ -> failwith ("load_token super: " ^ super) in
  68 + n_token id orth beg lemma interp [super,label] "_" sp
  69 + | [id; orth; lemma; ucat; interp; uinterp; super; label; sl; sp; sem] ->
  70 + let sl = match sl with
  71 + "_:_" -> []
  72 + | _ -> Xlist.map (Xstring.split "|" sl) (fun s ->
  73 + match Xstring.split ":" s with
  74 + super :: l -> (try int_of_string super, String.concat ":" l with _ -> failwith ("load_token sl: " ^ sl))
  75 + | _ -> failwith ("load_token sl: " ^ sl)) in
  76 + n_token id orth beg lemma interp sl sem sp
  77 + | _ -> failwith ("load_token: " ^ line)
  78 +
  79 +let substract_next tokens = function
  80 + ((id,_,_) :: _) as rev_paths ->
  81 + let t = ExtArray.get tokens id in
  82 + ExtArray.set tokens id {t with next=t.next-100};
  83 + rev_paths
  84 + | _ -> failwith "substract_next"
  85 +
  86 +let load_sentence in_channel =
  87 + let tokens = ExtArray.make 100 empty_token_env in
  88 + let _ = ExtArray.add tokens {empty_token_env with token = Interp "<conll_root>"} in
  89 + let rec pom rev_paths next compound sent_id text orig =
  90 + try
  91 + let token, next, conll_id, sl, sem = load_token next compound in_channel in
  92 + let id_a = ExtArray.add tokens token in
  93 + if id_a <> conll_id then failwith "load_sentence: different ids" else
  94 + pom ((id_a,sl,sem) :: rev_paths) next (max 0 (compound-1)) sent_id text orig
  95 + with
  96 + Sent_id sent_id -> pom rev_paths next compound sent_id text orig
  97 + | Raw_text text -> pom rev_paths next compound sent_id text orig
  98 + | Orig orig -> pom rev_paths next compound sent_id text orig
  99 + | Comment_line -> failwith "load_sentence: Comment_line"
  100 + | Interval_id len -> (*print_endline line;*) pom rev_paths next len sent_id text orig
  101 + | Empty_line -> substract_next tokens rev_paths, sent_id, text, orig
  102 + | End_of_file -> if rev_paths = []
  103 + then raise End_of_file
  104 + else substract_next tokens rev_paths, sent_id, text, orig in
  105 + let rev_paths, sent_id, text, orig = pom [] 100 0 "" "" "" in
  106 + {id = sent_id; beg = -1; len = -1; next = -1; file_prefix = ""; sentence = DepSentence[Array.of_list ((0,[],"") :: List.rev rev_paths)]}, text, orig, tokens
  107 +(* {s_id = id; s_text = ""; s_paths = (List.rev rev_paths)} *)
  108 +
  109 +let load_corpus in_channel =
  110 + let rec pom res =
  111 + try
  112 + let conll_sentence, text, orig, tokens = load_sentence in_channel in
  113 + pom ((conll_sentence, text, orig, tokens) :: res)
  114 + with End_of_file -> res
  115 + (*| e -> prerr_endline (Printexc.to_string e); res*) in
  116 + List.rev @@ pom []
  117 +
  118 +let substring a beg len =
  119 + String.concat "" (List.rev (Int.fold beg (beg+len-1) [] (fun l i ->
  120 + a.(i) :: l)))
  121 +
  122 +let verify_lengths corpus =
  123 + Xlist.iter corpus (fun (conll_sentence, text, orig, tokens) ->
  124 + let text = Array.of_list (Xunicode.utf8_chars_of_utf8_string text) in
  125 + Int.iter 1 (ExtArray.size tokens - 1) (fun i ->
  126 + let t = ExtArray.get tokens i in
  127 + let beg = t.beg/100 - 1 in
  128 + let len = t.len/100 in
  129 + let next = t.next/100 - 1 in
  130 + let s = substring text beg len in
  131 + if s <> t.orth then Printf.printf "%s: %s %s\n" conll_sentence.id s t.orth;
  132 + if beg + len = next then () else
  133 + if beg + len + 1 = next then
  134 + if substring text (next-1) 1 = " " then () else Printf.printf "%s: space problem\n" conll_sentence.id else
  135 + Printf.printf "%s: next problem\n" conll_sentence.id))
  136 +
  137 +let get_tagset corpus =
  138 + Xlist.fold corpus StringQMap.empty (fun qmap (conll_sentence, text, orig, tokens) ->
  139 + Int.fold 1 (ExtArray.size tokens - 1) qmap (fun qmap i ->
  140 + let t = ExtArray.get tokens i in
  141 + match t.token with
  142 + Lemma(lemma,cat,interp) -> StringQMap.add qmap (cat ^ ":" ^ ENIAMtokens.string_of_interps interp)
  143 + | _ -> failwith "get_tagset"))
  144 +
  145 +let numbers = StringSet.of_list ["sg";"pl"]
  146 +let cases = StringSet.of_list ["nom";"gen";"dat";"acc";"inst";"loc";"voc"]
  147 +let genders = StringSet.of_list ["m1";"m2";"m3";"n";"f"]
  148 +let degrees = StringSet.of_list ["pos";"com";"sup"]
  149 +
  150 +let convert_n n =
  151 + if StringSet.mem numbers n then n else failwith ("convert_n: " ^ n)
  152 +
  153 +let convert_c c =
  154 + if StringSet.mem cases c then c else failwith ("convert_c: " ^ c)
  155 +
  156 +let convert_g = function
  157 + "n1" -> "n"
  158 + | "n2" -> "n"
  159 + | "p1" -> "m1"
  160 + | "p2" -> "n"
  161 + | g -> if StringSet.mem genders g then g else failwith ("convert_g: " ^ g)
  162 +
  163 +let convert_d d =
  164 + if StringSet.mem degrees d then d else failwith ("convert_d: " ^ d)
  165 +
  166 +let convert_tagset_token id = function
  167 + Lemma(lemma,"adj",[[[n];[c];[g];[d]]]) -> Lemma(lemma,"adj",[[[convert_n n];[convert_c c];[convert_g g];[convert_d d]]])
  168 + | Lemma(lemma,"adja",[[]]) as t -> t
  169 + | Lemma(lemma,"adjc",[[]]) as t -> t
  170 + | Lemma(lemma,"adjp",[[]]) as t -> t
  171 + | Lemma(lemma,"adv",[[]]) -> Lemma(lemma,"adv",[[["pos"]]])
  172 + | Lemma(lemma,"adv",[[[d]]]) -> Lemma(lemma,"adv",[[[convert_d d]]])
  173 + | Lemma(lemma,"aglt",_) as t -> t
  174 + | Lemma(lemma,"bedzie",_) as t -> t
  175 + | Lemma(lemma,"brev",_) as t -> t
  176 + | Lemma(lemma,"burk",[[]]) as t -> t
  177 + | Lemma(lemma,"burk",[_]) -> Lemma(lemma,"burk",[[]])
  178 + | Lemma(lemma,"comp",[[]]) as t -> t
  179 + | Lemma(lemma,"conj",[[]]) as t -> t
  180 + | Lemma(lemma,"depr",[[["pl"];["nom"];["m2"]]]) as t -> t
  181 + | Lemma(lemma,"depr",[[["pl"];["voc"];["m2"]]]) as t -> t
  182 + | Lemma(lemma,"depr",[[[n];[c];["m1"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["m1"]]])
  183 + | Lemma(lemma,"depr",[[[n];[c];["m2"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["m2"]]])
  184 + | Lemma(lemma,"depr",[[[n];[c];["m3"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["m3"]]])
  185 + | Lemma(lemma,"depr",[[[n];[c];["f"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["f"]]])
  186 + | Lemma(lemma,"dig",[[]]) as t -> t
  187 + | Lemma(lemma,"emo",[[]]) as t -> t
  188 + | Lemma(lemma,"fin",_) as t -> t
  189 + | Lemma(lemma,"ger",[[[n];[c];[g];[a];[neg]]]) -> Lemma(lemma,"ger",[[[convert_n n];[convert_c c];[convert_g g];[a];[neg]]])
  190 + | Lemma(lemma,"imps",_) as t -> t
  191 + | Lemma(lemma,"impt",_) as t -> t
  192 + | Lemma(lemma,"inf",_) as t -> t
  193 + | Lemma(lemma,"interj",[[]]) as t -> t
  194 + | Lemma(lemma,"interp",[[]]) as t -> t
  195 + | Lemma(lemma,"num",[[[n];[c];["m1"];[acm]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["m1"];[acm]]])
  196 + | Lemma(lemma,"num",[[[n];[c];["m2"];[acm]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["m2"];[acm]]])
  197 + | Lemma(lemma,"num",[[[n];[c];["m3"];[acm]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["m3"];[acm]]])
  198 + | Lemma(lemma,"num",[[[n];[c];["f"];[acm]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["f"];[acm]]])
  199 + | Lemma(lemma,"num",[[[n];[c];["n"];[acm];["col"]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["n"];[acm];["col"]]])
  200 + | Lemma(lemma,"num",[[[n];[c];["n"];[acm];["ncol"]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["n"];[acm];["ncol"]]])
  201 + | Lemma(lemma,"num",[[[n];[c];["n1"];[acm]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["n"];[acm];["col"]]])
  202 + | Lemma(lemma,"num",[[[n];[c];["n2"];[acm]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["n"];[acm];["ncol"]]])
  203 + | Lemma(lemma,"num",[[[n];[c];["p1"];[acm]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["m1"];[acm]]])
  204 + | Lemma(lemma,"num",[[[n];[c];["p2"];[acm]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["n"];[acm];["col";"ncol"]]])
  205 + | Lemma(lemma,"num",[[[n];[c];["m1"];[acm];[_]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["m1"];[acm]]])
  206 + | Lemma(lemma,"num",[[[n];[c];["m2"];[acm];[_]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["m2"];[acm]]])
  207 + | Lemma(lemma,"num",[[[n];[c];["m3"];[acm];[_]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["m3"];[acm]]])
  208 + | Lemma(lemma,"num",[[[n];[c];["f"];[acm];[_]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["f"];[acm]]])
  209 + | Lemma(lemma,"num",[[[n];[c];["n"];[acm]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["n"];[acm];["col";"ncol"]]])
  210 + | Lemma(lemma,"num",[[[n];[c];["m3"]]]) -> Lemma(lemma,"num",[[[convert_n n];[convert_c c];["m3"];["congr";"rec"]]])
  211 + | Lemma(lemma,"pact",[[[n];[c];[g];[a];[neg]]]) -> Lemma(lemma,"pact",[[[convert_n n];[convert_c c];[convert_g g];[a];[neg]]])
  212 + | Lemma(lemma,"pant",_) as t -> t
  213 + | Lemma(lemma,"pcon",_) as t -> t
  214 + | Lemma(lemma,"ppas",[[[n];[c];[g];[a];[neg]]]) -> Lemma(lemma,"ppas",[[[convert_n n];[convert_c c];[convert_g g];[a];[neg]]])
  215 + | Lemma(lemma,"ppron12",[[[n];[c];[g];[p]]]) -> Lemma(lemma,"ppron12",[[[convert_n n];[convert_c c];[convert_g g];[p]]])
  216 + | Lemma(lemma,"ppron12",[[[n];[c];[g];[p];[akc]]]) -> Lemma(lemma,"ppron12",[[[convert_n n];[convert_c c];[convert_g g];[p];[akc]]])
  217 + | Lemma(lemma,"ppron3",[[[n];[c];[g];[p];[akc];[praep]]]) -> Lemma(lemma,"ppron3",[[[convert_n n];[convert_c c];[convert_g g];[p];[akc];[praep]]])
  218 + | Lemma(lemma,"praet",[[[n];[g];[a]]]) -> Lemma(lemma,"praet",[[[convert_n n];[convert_g g];[a]]])
  219 + | Lemma(lemma,"praet",[[[n];[g];[a];[agl]]]) -> Lemma(lemma,"praet",[[[convert_n n];[convert_g g];[a];[agl]]])
  220 + | Lemma(lemma,"pred",[[]]) as t -> t
  221 + | Lemma(lemma,"prep",_) as t -> t
  222 + | Lemma(lemma,"qub",_) as t -> t
  223 + | Lemma(lemma,"romandig",[[]]) as t -> t
  224 + | Lemma(lemma,"siebie",_) as t -> t
  225 + | Lemma(lemma,"subst",[[[n];[c];["m1"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["m1"]]])
  226 + | Lemma(lemma,"subst",[[[n];[c];["m2"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["m2"]]])
  227 + | Lemma(lemma,"subst",[[[n];[c];["m3"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["m3"]]])
  228 + | Lemma(lemma,"subst",[[[n];[c];["f"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["f"]]])
  229 + | Lemma(lemma,"subst",[[[n];[c];["n"];["col"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["n"];["col"]]])
  230 + | Lemma(lemma,"subst",[[[n];[c];["n"];["ncol"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["n"];["ncol"]]])
  231 + | Lemma(lemma,"subst",[[[n];[c];["m1"];["pt"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["m1"];["pt"]]])
  232 + | Lemma(lemma,"subst",[[[n];[c];["n"];["pt"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["n"];["pt"]]])
  233 + | Lemma(lemma,"subst",[[[n];[c];["n1"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["n"];["col"]]])
  234 + | Lemma(lemma,"subst",[[[n];[c];["n2"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["n"];["ncol"]]])
  235 + | Lemma(lemma,"subst",[[[n];[c];["p1"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["m1"];["pt"]]])
  236 + | Lemma(lemma,"subst",[[[n];[c];["p2"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["n"];["pt"]]])
  237 + | Lemma(lemma,"subst",[[[n];[c];["m3"];[_]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["m3"]]])
  238 + | Lemma(lemma,"subst",[[[n];[c];["n"]]]) -> Lemma(lemma,"subst",[[[convert_n n];[convert_c c];["n"];["ncol"]]])
  239 + | Lemma(lemma,"winien",[[[n];[g];[a]]]) -> Lemma(lemma,"winien",[[[convert_n n];[convert_g g];[a]]])
  240 + | Lemma("Crimeboys" as lemma,"ign",[[]]) -> Lemma(lemma,"subst",[[["pl"];["nom"];["m1"]]])
  241 + | Lemma("109P4" as lemma,"ign",[[]]) -> Lemma(lemma,"subst",[[["sg"];["nom"];["m2"]]])
  242 + | Lemma("1a." as lemma,"ign",[[]]) -> Lemma(lemma,"list-item",[[]])
  243 + | Lemma("orfano" as lemma,"ign",[[]]) -> Lemma(lemma,"xxx",[[]])
  244 + | Lemma("650-91-58" as lemma,"ign",[[]]) -> Lemma(lemma,"phone-number",[[]])
  245 + | Lemma("654-66-91" as lemma,"ign",[[]]) -> Lemma(lemma,"phone-number",[[]])
  246 + | Lemma("U2" as lemma,"ign",[[]]) -> Lemma(lemma,"subst",[[["sg"];["gen"];["m1"]]])
  247 + | Lemma("Uudenkaupungin" as lemma,"ign",[[]]) -> Lemma(lemma,"subst",[[["sg"];["gen"];["m1"]]])
  248 + | Lemma("kaupunki" as lemma,"ign",[[]]) -> Lemma(lemma,"subst",[[["sg"];["gen"];["m1"]]])
  249 + | Lemma("AKP" as lemma,"ign",[[]]) -> Lemma(lemma,"subst",[[[""];[""];[""]]])
  250 + | Lemma("Beginning" as lemma,"ign",[[]]) -> Lemma(lemma,"xxx",[[]])
  251 + | Lemma("with" as lemma,"ign",[[]]) -> Lemma(lemma,"xxx",[[]])
  252 + | Lemma("my" as lemma,"ign",[[]]) -> Lemma(lemma,"xxx",[[]])
  253 + | Lemma("streets" as lemma,"ign",[[]]) -> Lemma(lemma,"xxx",[[]])
  254 + | t -> print_endline ("convert_tagset_token: " ^ id ^ " " ^ ENIAMtokens.string_of_token t);t
  255 +
  256 +let convert_tagset corpus =
  257 + Xlist.iter corpus (fun (conll_sentence, text, orig, tokens) ->
  258 + Int.iter 1 (ExtArray.size tokens - 1) (fun i ->
  259 + let t = ExtArray.get tokens i in
  260 + let token = convert_tagset_token conll_sentence.id t.token in
  261 + ExtArray.set tokens i {t with token=token}));
  262 + corpus
  263 +
  264 +(*let string_of_depencency = function
  265 + (* Lemma(lemma1,cat1,interp1),"punct",Lemma(lemma2,"interp",_) -> cat1 ^ " -> punct -> " ^ lemma2 ^ ":interp"
  266 + | Interp "<conll_root>","root",Lemma(lemma2,cat2,_) -> "<conll_root> -> root -> " ^ cat2 *)
  267 + | _,"nsubj",_ -> "nsubj"
  268 + | _,"amod",_ -> "amod"
  269 + | _,"root",_ -> "root"
  270 + | _,"punct",_ -> "punct"
  271 + | _,"advmod",_ -> "advmod"
  272 + | _,"expl:impers",_ -> "expl:impers"
  273 + | _,"mark",_ -> "mark" (* ??? *)
  274 + | _,"cc",_ -> "cc"
  275 + | _,"conj",_ -> "conj"
  276 + | _,"compound:aglt",_ -> "compound:aglt"
  277 + | _,"case",_ -> "case"
  278 + | _,"advcl",_ -> "advcl"
  279 + | _,"obj",_ -> "obj"
  280 + | _,"iobj",_ -> "iobj"
  281 + | _,"obl",_ -> "obl"
  282 + | _,"obl:arg",_ -> "obl:arg"
  283 + | _,"appos",_ -> "appos"
  284 + | _,"xcomp",_ -> "xcomp"
  285 + | _,"flat",_ -> "flat"
  286 + | _,"fixed",_ -> "fixed"
  287 + | _,"nmod",_ -> "nmod"
  288 + | _,"nmod:arg",_ -> "nmod:arg"
  289 + | _,"nummod",_ -> "nummod"
  290 + | _,"cop",_ -> "cop"
  291 + | _,"det",_ -> "det"
  292 + | _,"nsubj:pass",_ -> "nsubj:pass"
  293 + | _,"aux",_ -> "aux"
  294 + | _,"aux:pass",_ -> "aux:pass"
  295 + | _,"compound:cnd",_ -> "compound:cnd"
  296 + | _,"parataxis",_ -> "parataxis"
  297 + | _,"ccomp",_ -> "ccomp"
  298 + | _,"acl:relcl",_ -> "acl:relcl"
  299 + | _,"discourse:comment",_ -> "discourse:comment"
  300 + | _,"list",_ -> "list"
  301 + | _,"ccomp:obj",_ -> "ccomp:obj"
  302 + | _,"vocative",_ -> "vocative"
  303 + | _,"csubj",_ -> "csubj"
  304 + | _,"advmod:arg",_ -> "advmod:arg"
  305 + | _,"compound:imp",_ -> "compound:imp"
  306 + | _,"obl:comp",_ -> "obl:comp"
  307 + | _,"cc:preconj",_ -> "cc:preconj"
  308 + | _,"discourse:intj",_ -> "discourse:intj"
  309 + | _,"acl:attrib",_ -> "acl:attrib"
  310 + | _,"nmod:title",_ -> "nmod:title"
  311 + | _,"obl:agent",_ -> "obl:agent"
  312 + | _,"orphan",_ -> "orphan"
  313 + | _,"nmod:subj",_ -> "nmod:subj"
  314 + | _,"obl:pass",_ -> "obl:pass"
  315 + | _,"discourse:emo",_ -> "discourse:emo"
  316 + | (Lemma(lemma1,"subst",[[_] :: [c1] :: _]) as s),"case",(Lemma(lemma2,"prep",[[c2] :: _]) as t) ->
  317 + if c1 = c2 then "subst" ^ " -> case -> " ^ "prep" else ENIAMtokens.string_of_token s ^ " -> " ^ "case" ^ " -> " ^ ENIAMtokens.string_of_token t
  318 + | Lemma(lemma1,cat1,interp1),"case",Lemma(lemma2,"adv",interp2) -> cat1 ^ ":" ^ ENIAMtokens.string_of_interps interp1 ^ " -> case -> " ^ lemma2 ^ ":" ^ "adv" ^ ":" ^ ENIAMtokens.string_of_interps interp2
  319 + | Lemma(lemma1,cat1,interp1),label,Lemma(lemma2,cat2,interp2) ->
  320 + cat1 ^ ":" ^ ENIAMtokens.string_of_interps interp1 ^ " -> " ^ label ^ " -> " ^ cat2 ^ ":" ^ ENIAMtokens.string_of_interps interp2
  321 + | s,label,t -> ENIAMtokens.string_of_token s ^ " -> " ^ label ^ " -> " ^ ENIAMtokens.string_of_token t
  322 +
  323 +let list_dependencies corpus =
  324 + Xlist.fold corpus StringQMap.empty (fun qmap (conll_sentence, text, orig, tokens) ->
  325 + let a = match conll_sentence.sentence with
  326 + DepSentence[a] -> a
  327 + | _ -> failwith "list_dependencies" in
  328 + Int.fold 1 (Array.length a - 1) qmap (fun qmap i ->
  329 + let id,sl,sem = a.(i) in
  330 + Xlist.fold sl qmap (fun qmap (super,label) ->
  331 + let super_id,_,_ = a.(super) in
  332 + let t = ExtArray.get tokens id in
  333 + let s = ExtArray.get tokens super_id in
  334 + StringQMap.add qmap (string_of_depencency (s.token,label,t.token)))))*)
  335 +
  336 +type dep =
  337 + {id: int; tid: int; lemma: string; cat: string; interp: string list list list;
  338 + label: string; sem: string; sons: tree list; is_shared: bool}
  339 +
  340 +and tree =
  341 + Dep of dep
  342 + | Cluster of (string * string list list list) * dep * tree list (* nazwa frazy * komponenty * podrzędniki *)
  343 + | Coordination of string * string * tree list * tree list
  344 +
  345 +let empty_dep = {id=(-1); tid=(-1); lemma=""; cat=""; interp=[]; label=""; sem=""; sons=[]; is_shared=false}
  346 +
  347 +let string_of_sem sem =
  348 + if sem = "" then "" else "[" ^ sem ^ "]"
  349 +
  350 +let string_of_lci d =
  351 + let interp = ENIAMtokens.string_of_interps d.interp in
  352 + if interp = "" then Printf.sprintf "%s,%s" d.lemma d.cat
  353 + else Printf.sprintf "%s,%s:%s" d.lemma d.cat interp
  354 +
  355 +let string_of_phrase (phrase,interp) =
  356 + let interp = ENIAMtokens.string_of_interps interp in
  357 + if interp = "" then phrase
  358 + else Printf.sprintf "%s:%s" phrase interp
  359 +
  360 +let rec string_of_tree spaces = function
  361 + Dep d ->
  362 + if d.sons = [] then Printf.sprintf "%s%sDep(%d,%s,%s%s)" spaces (if d.is_shared then "Shared" else "") d.id (string_of_lci d) d.label (string_of_sem d.sem)
  363 + else Printf.sprintf "%s%sDep(%d,%s,%s%s,[\n%s])" spaces (if d.is_shared then "Shared" else "") d.id (string_of_lci d) d.label (string_of_sem d.sem)
  364 + (String.concat "\n" (Xlist.map d.sons (string_of_tree (" " ^ spaces))))
  365 + | Cluster((phrase,interp),d,sons) ->
  366 + let dsons = if d.sons = [] then "" else
  367 + ",{\n" ^ String.concat "\n" (Xlist.map d.sons (string_of_tree (" " ^ spaces))) ^ "}" in
  368 + let sons = if sons = [] then "" else
  369 + ",[\n" ^ String.concat "\n" (Xlist.map sons (string_of_tree (" " ^ spaces))) ^ "]" in
  370 + Printf.sprintf "%s%sCluster(%d,%s,%s,%s%s%s%s)" spaces (if d.is_shared then "Shared" else "")
  371 + d.id (string_of_phrase (phrase,interp)) (string_of_lci d) d.label (string_of_sem d.sem) dsons sons
  372 + (* | PairDep(d,d2) ->
  373 + if d.sons = [] then Printf.sprintf "%s%sPairDep(%d,%s,%s%s,%s)" spaces (if d.is_shared then "Shared" else "") d.id (string_of_lci d) d.label (string_of_sem d.sem) (string_of_lci d2)
  374 + else Printf.sprintf "%s%sPairDep(%d,%s,%s%s,%s,[\n%s])" spaces (if d.is_shared then "Shared" else "") d.id (string_of_lci d) d.label (string_of_sem d.sem) (string_of_lci d2)
  375 + (String.concat "\n" (Xlist.map d.sons (string_of_tree (" " ^ spaces)))) *)
  376 + | Coordination(label,sem,sons,[]) -> Printf.sprintf "%sCoordination(%s%s,[\n%s])" spaces label (string_of_sem sem)
  377 + (String.concat "\n" (Xlist.map sons (string_of_tree (" " ^ spaces))))
  378 + | Coordination(label,sem,sons,coords) -> Printf.sprintf "%sCoordination(%s%s,[\n%s],[\n%s])" spaces label (string_of_sem sem)
  379 + (String.concat "\n" (Xlist.map sons (string_of_tree (" " ^ spaces))))
  380 + (String.concat "\n" (Xlist.map coords (string_of_tree (" " ^ spaces))))
  381 +
  382 +let rec get_tree_node_id = function
  383 + Dep d -> d.id
  384 + | Coordination(label,sem,sons,coord) -> get_tree_node_id (List.hd sons)
  385 + | _ -> failwith "get_tree_node_id"
  386 +
  387 +let sort_dependents l =
  388 + Xlist.sort l (fun x y -> compare (get_tree_node_id x) (get_tree_node_id y))
  389 +
  390 +let rec make_tree_rec tokens id tid label b sem sons =
  391 + let l = try IntMap.find sons id with Not_found -> [] in
  392 + let l = Xlist.fold l [] (fun l (id,tid,label,b,sem) ->
  393 + make_tree_rec tokens id tid label b sem sons :: l) in
  394 + let lemma,cat,interp = match (ExtArray.get tokens tid).token with
  395 + Lemma(lemma,cat,interp) -> lemma,cat,interp
  396 + | Interp s -> s,"interp",[[]]
  397 + | _ -> failwith "make_tree_rec" in
  398 + Dep{id=id; tid=tid; lemma=lemma; cat=cat; interp=interp; label=label; sem=sem; sons=l; is_shared=b}
  399 +
  400 +let clean_coord_deps = function
  401 + [] -> []
  402 + | [i,s] -> [i,s]
  403 + | [i1,"conj";i2,s2] -> [i1,"conj"]
  404 + | [i1,s1;i2,"conj"] -> [i2,"conj"]
  405 + | (i,s) :: l ->
  406 + if Xlist.fold l true (fun b (_,t) -> if t = s then b else false) then (i,s) :: l
  407 + else ((*print_endline ("clean_coord_deps: " ^ (String.concat " " (Xlist.map ((i,s) :: l) snd)));*) (i,s) :: l)
  408 +
  409 +let make_tree tokens a =
  410 + let sons = Int.fold 1 (Array.length a - 1) IntMap.empty (fun sons i ->
  411 + let tid,sl,sem = a.(i) in
  412 + let sl = clean_coord_deps sl in
  413 + let b = Xlist.size sl > 1 in
  414 + Xlist.fold sl sons (fun sons (super,label) ->
  415 + IntMap.add_inc sons super [i,tid,label,b,sem] (fun l -> (i,tid,label,b,sem) :: l))) in
  416 + make_tree_rec tokens 0 0 "" false "" sons
  417 +
  418 +let rec split_sons pat sel rev = function
  419 + (Dep d as t) :: l ->
  420 + if pat = d.label then split_sons pat (t :: sel) rev l
  421 + else split_sons pat sel (t :: rev) l
  422 + | t :: l -> split_sons pat sel (t :: rev) l
  423 + | [] -> sel,rev
  424 +
  425 +let extract_sons pat = function
  426 + Dep d ->
  427 + let sel,sons = split_sons pat [] [] d.sons in
  428 + sel,Dep{d with sons=sons}
  429 + | _ -> failwith "extract_sons"
  430 +
  431 +let get_label = function
  432 + Dep d -> d.label
  433 + | Coordination(label,sem,sons,coord) -> label
  434 + | _ -> failwith "get_label"
  435 +
  436 +let get_sorted_sons = function
  437 + Dep d ->
  438 + List.rev (Xlist.rev_map (sort_dependents d.sons) (fun t -> get_label t, t))
  439 + | _ -> failwith "get_sorted_sons"
  440 +
  441 +let set_sons sons = function
  442 + Dep d -> Dep{d with sons=sons}
  443 + | _ -> failwith "set_sons"
  444 +
  445 +let extract_cc l = [],l
  446 +
  447 +(*let extract_cc l =
  448 + let first,rest =
  449 + match sort_dependents l with
  450 + first :: rest -> first,rest
  451 + | _ -> failwith "extract_cc" in
  452 + let cc_preconj,first =
  453 + match get_sorted_sons first with
  454 + ("cc:preconj",t) :: l -> [t],set_sons (Xlist.map l snd) first
  455 + | ("punct",t1) :: ("cc:preconj",t2) :: l -> [t1;t2],set_sons (Xlist.map l snd) first
  456 + (* | ("cc",t) :: l ->
  457 + print_endline (string_of_tree "" first);
  458 + failwith ("extract_cc: " ^ (String.concat " " (Xlist.map (("cc",t) :: l) fst))) *)
  459 + | ("punct",t) :: l ->
  460 + print_endline (string_of_tree "" first);
  461 + failwith ("extract_cc: " ^ (String.concat " " (Xlist.map (("punct",t) :: l) fst)))
  462 + | l -> [],first in
  463 + cc_preconj, first :: rest*)
  464 +
  465 +
  466 +(*let extract_cc l =
  467 + let first,middle,last =
  468 + match sort_dependents l with
  469 + [first;last] -> first,[],last
  470 + | first :: l ->
  471 + (match List.rev l with
  472 + last :: rev_middle -> first,List.rev rev_middle,last
  473 + | _ -> failwith "extract_cc")
  474 + | _ -> failwith "extract_cc" in
  475 + let cc_preconj,first = extract_sons "cc:preconj" first in
  476 + if Xlist.size cc_preconj > 1 then failwith "extract_cc: cc:preconj" else
  477 + let cc,last = extract_sons "cc" last in
  478 + let punct,last = extract_sons "punct" last in
  479 + if Xlist.size cc > 1 then failwith "extract_cc: cc" else
  480 + if Xlist.size punct > 1 then failwith "extract_cc: punct 1" else
  481 + let puncts,middle =
  482 + Xlist.fold middle ([],[]) (fun (puncts,middle) t ->
  483 + let punct,t = extract_sons "punct" t in
  484 + if Xlist.size punct > 1 then failwith "extract_cc: punct 2" else
  485 + punct @ puncts, t :: middle) in
  486 + sort_dependents (cc_preconj @ cc @ punct @ puncts),
  487 + [first] @ (List.rev middle) @ [last]*)
  488 +
  489 +let rec process_coordination = function
  490 + Dep d ->
  491 + let sons = Xlist.rev_map d.sons process_coordination in
  492 + let coord,sons = split_sons "conj" [] [] sons in
  493 + if coord = [] then Dep{d with sons=sons} else
  494 + let coord,sons = extract_cc (Dep{d with sons=sons} :: coord) in
  495 + Coordination(d.label,d.sem,sons,coord)
  496 + | _ -> failwith "process_coordination"
  497 +
  498 +(*let rec shift_case = function
  499 + Dep(id,tid,lci,label,sem,sons,is_shared) as t ->
  500 + let case,sons = split_sons "case" [] [] sons in
  501 + (match case with
  502 + [] -> Dep(id,tid,lci,label,sem,Xlist.rev_map sons shift_case,is_shared)
  503 + | [Dep(id2,tid2,lci2,label2,sem2,sons2,is_shared2)] ->
  504 + Dep(id2,tid2,lci2,label,sem,Dep(id,tid,lci,"rev_case",sem2,Xlist.rev_map sons shift_case,is_shared2) :: sons2,is_shared)
  505 + | [Dep(id2,tid2,lci2,label2,sem2,sons2,is_shared2);t2] ->
  506 + Dep(id2,tid2,lci2,label,sem,Dep(id,tid,lci,"rev_case",sem2,Xlist.rev_map (t2 :: sons) shift_case,is_shared2) :: sons2,is_shared)
  507 + | _ -> print_endline (string_of_tree "" t); failwith "shift_case")
  508 + | Coordination(label,sem,sons,coords) -> Coordination(label,sem,Xlist.rev_map sons shift_case,coords)
  509 +
  510 +let rec shift_nummod = function
  511 + Dep(id,tid,lci,label,sem,sons,is_shared) as t ->
  512 + let nummod,sons = split_sons "nummod" [] [] sons in
  513 + (match nummod with
  514 + [] -> Dep(id,tid,lci,label,sem,Xlist.rev_map sons shift_nummod,is_shared)
  515 + | [Dep(id2,tid2,lci2,label2,sem2,sons2,is_shared2)] ->
  516 + Dep(id2,tid2,lci2,label,sem,Dep(id,tid,lci,"rev_nummod",sem2,Xlist.rev_map sons shift_nummod,is_shared2) :: sons2,is_shared)
  517 + | _ -> print_endline (string_of_tree "" t); failwith "shift_nummod")
  518 + | Coordination(label,sem,sons,coords) -> Coordination(label,sem,Xlist.rev_map sons shift_nummod,coords)
  519 +
  520 +let rec shift_mark = function
  521 + Dep(id,tid,lci,label,sem,sons,is_shared) as t ->
  522 + let mark,sons = split_sons "mark" [] [] sons in
  523 + (match sort_dependents mark with
  524 + [] -> Dep(id,tid,lci,label,sem,Xlist.rev_map sons shift_mark,is_shared)
  525 + | [Dep(id2,tid2,lci2,label2,sem2,sons2,is_shared2)] ->
  526 + Dep(id2,tid2,lci2,label,sem,Dep(id,tid,lci,"rev_mark",sem2,Xlist.rev_map sons shift_mark,is_shared2) :: sons2,is_shared)
  527 + | [Dep(id2,tid2,lci2,label2,sem2,sons2,is_shared2);t2] ->
  528 + Dep(id2,tid2,lci2,label,sem,Dep(id,tid,lci,"rev_mark",sem2,Xlist.rev_map (t2 :: sons) shift_mark,is_shared2) :: sons2,is_shared)
  529 + (* | [Dep(_,_,(lem,_,_),_,_,_,_);Dep(_,_,(lem2,_,_),_,_,_,_)] -> print_endline (string_of_tree "" t); failwith ("shift_mark: " ^ lem ^ " " ^ lem2) *)
  530 + | _ -> print_endline (string_of_tree "" t); failwith "shift_mark")
  531 + | Coordination(label,sem,sons,coords) -> Coordination(label,sem,Xlist.rev_map sons shift_mark,coords)
  532 +
  533 +let rec shift_cop = function
  534 + Dep(id,tid,lci,label,sem,sons,is_shared) as t ->
  535 + let cop,sons = split_sons "cop" [] [] sons in
  536 + (match cop with
  537 + [] -> Dep(id,tid,lci,label,sem,Xlist.rev_map sons shift_cop,is_shared)
  538 + | [Dep(id2,tid2,lci2,label2,sem2,sons2,is_shared2)] ->
  539 + Dep(id2,tid2,lci2,label,sem,Dep(id,tid,lci,"rev_cop",sem2,Xlist.rev_map sons shift_cop,is_shared2) :: sons2,is_shared)
  540 + | [Dep(id2,tid2,lci2,label2,sem2,sons2,is_shared2);t2] ->
  541 + Dep(id2,tid2,lci2,label,sem,Dep(id,tid,lci,"rev_cop",sem2,Xlist.rev_map (t2 :: sons) shift_cop,is_shared2) :: sons2,is_shared)
  542 + | _ -> print_endline (string_of_tree "" t); failwith "shift_cop")
  543 + | Coordination(label,sem,sons,coords) -> Coordination(label,sem,Xlist.rev_map sons shift_cop,coords)
  544 +
  545 +let rec shift_aux_pass = function
  546 + Dep(id,tid,lci,label,sem,sons,is_shared) as t ->
  547 + let aux_pass,sons = split_sons "aux:pass" [] [] sons in
  548 + (match aux_pass with
  549 + [] -> Dep(id,tid,lci,label,sem,Xlist.rev_map sons shift_aux_pass,is_shared)
  550 + | [Dep(id2,tid2,lci2,label2,sem2,sons2,is_shared2)] ->
  551 + Dep(id2,tid2,lci2,label,sem2,Dep(id,tid,lci,"rev_aux:pass",sem,Xlist.rev_map sons shift_aux_pass,is_shared2) :: sons2,is_shared)
  552 + | _ -> print_endline (string_of_tree "" t); failwith "shift_aux_pass")
  553 + | Coordination(label,sem,sons,coords) -> Coordination(label,sem,Xlist.rev_map sons shift_aux_pass,coords)*)
  554 +
  555 +let make_trees corpus =
  556 + Xlist.rev_map corpus (fun (conll_sentence, text, orig, tokens) ->
  557 + (* try *)
  558 + let a = match conll_sentence.sentence with
  559 + DepSentence[a] -> a
  560 + | _ -> failwith "list_dependencies" in
  561 + let tree = make_tree tokens a in
  562 + let tree = process_coordination tree in
  563 +(* let tree = shift_case tree in
  564 + let tree = shift_nummod tree in
  565 + let tree = shift_mark tree in
  566 + let tree = shift_cop tree in
  567 + let tree = shift_aux_pass tree in*)
  568 + (* print_endline conll_sentence.id;
  569 + print_endline text;
  570 + print_endline (string_of_tree "" tree); *)
  571 + conll_sentence.id,text,tree,tokens
  572 + (*with e -> (print_endline (Printexc.to_string e);
  573 + print_endline conll_sentence.id;
  574 + print_endline text;
  575 + (* print_endline (string_of_tree "" tree); *)
  576 + ())*))
  577 +
  578 +let rec flatten_coordination is_coord ulabel usem = function
  579 + Dep d ->
  580 + if ulabel = "" then [is_coord,Dep d] else [is_coord,Dep{d with label=ulabel;sem=usem}]
  581 + | Coordination(label,sem,sons,coords) ->
  582 + if ulabel = "" then List.flatten (Xlist.rev_map sons (flatten_coordination true label sem))
  583 + else List.flatten (Xlist.rev_map sons (flatten_coordination true ulabel usem))
  584 + | _ -> failwith "flatten_coordination"
  585 +
  586 +let string_of_dependency2 is_coord (lemma1,cat1,interp1) label sem (lemma2,cat2,interp2) =
  587 + (if is_coord then "COORD " else "") ^
  588 + lemma1 ^ ":" ^ cat1 ^ ":" ^ ENIAMtokens.string_of_interps interp1 ^
  589 + " -> " ^ label ^ (if sem = "" then "" else "["^sem^"]") ^ " -> "
  590 + (*^ lemma2 ^ ":"*) ^ cat2 ^ ":" ^ ENIAMtokens.string_of_interps interp2
  591 +
  592 +type sel = Any | Value of string list | Agr of string
  593 +type coord = Coord | Gen
  594 +type pattern =
  595 + PatternNode of sel * sel * sel list (* (sel * pattern) list *)
  596 + | PatternPhrase of sel * sel list
  597 + | PatternEdge of pattern * sel * pattern
  598 +
  599 +let phrase_names = StringSet.of_list ["np";"adjp";"ip";"infp";"pp";"comprep";"sent";"cp";"conjp"]
  600 +
  601 +let raw_patterns = File.load_lines "data/patterns.tab"
  602 +let raw_pair_patterns = File.load_lines "data/pair_patterns.tab"
  603 +
  604 +let is_phrase = function
  605 + Value[a] :: _ -> StringSet.mem phrase_names a
  606 + | _ -> false
  607 +
  608 +let parse_pattern2 s a =
  609 + let l = Xlist.map (Xstring.split ":" a) (function
  610 + "_" -> Any
  611 + | "$l" -> Agr "l"
  612 + | "$n" -> Agr "n"
  613 + | "$c" -> Agr "c"
  614 + | "$g" -> Agr "g"
  615 + | "$p" -> Agr "p"
  616 + | "." -> Value ["."]
  617 + | t -> Value(Xstring.split "\\." t)) in
  618 + if l = [] then failwith ("parse_pattern2: " ^ s) else
  619 + if is_phrase l then
  620 + match l with
  621 + phrase :: interp -> PatternPhrase(phrase,interp)
  622 + | _ -> failwith ("parse_pattern2: " ^ s)
  623 + else
  624 + match l with
  625 + lemma :: cat :: interp -> PatternNode(lemma,cat,interp)
  626 + | _ -> failwith ("parse_pattern2: " ^ s)
  627 +
  628 +let parse_phrase s =
  629 + match parse_pattern2 s s with
  630 + PatternPhrase(Value [phrase],interp) -> phrase,interp
  631 + | _ -> failwith "parse_phrase"
  632 +
  633 +let parse_pattern s =
  634 + if s = "" then [] else
  635 + if String.get s 0 = '#' then [] else
  636 + match Xstring.split " " s with
  637 + [a;"->";"_";"->";b] -> [Gen,s,parse_pattern2 s a,Any,parse_pattern2 s b]
  638 + | [a;"->";label;"->";b] -> [Gen,s,parse_pattern2 s a,Value [label],parse_pattern2 s b]
  639 + | [a;"->";label;"->";"[";b1;"->";label_b;"->";b2;"]"] -> [Gen,s,parse_pattern2 s a,Value [label],PatternEdge(parse_pattern2 s b1,Value [label_b],parse_pattern2 s b2)]
  640 + | [a;"->";label;"->";"[";b;"->";label_b1;"->";b1;"|";label_b2;"->";b2;"]"] ->
  641 + [Gen,s,parse_pattern2 s a,Value [label],PatternEdge(PatternEdge(parse_pattern2 s b,Value [label_b1],parse_pattern2 s b1),Value [label_b2],parse_pattern2 s b2)]
  642 + | ["[";a1;"->";label_a;"->";a2;"]";"->";label;"->";b] -> [Gen,s,PatternEdge(parse_pattern2 s a1,Value [label_a],parse_pattern2 s a2),Value [label],parse_pattern2 s b]
  643 + | ["COORD";a;"->";"_";"->";b] -> [Coord,s,parse_pattern2 s a,Any,parse_pattern2 s b]
  644 + | ["COORD";a;"->";label;"->";b] -> [Coord,s,parse_pattern2 s a,Value [label],parse_pattern2 s b]
  645 + | ["COORD";a;"->";label;"->";"[";b1;"->";label_b;"->";b2;"]"] -> [Coord,s,parse_pattern2 s a,Value [label],PatternEdge(parse_pattern2 s b1,Value [label_b],parse_pattern2 s b2)]
  646 + | _ -> failwith ("parse_pattern: " ^ s)
  647 +
  648 +let parse_pair_pattern s =
  649 + if s = "" then [] else
  650 + if String.get s 0 = '#' then [] else
  651 + match Xstring.split "\t" s with
  652 + [phrase;pat] ->
  653 + let phrase,interp = parse_phrase phrase in
  654 + (match parse_pattern pat with
  655 + [coord,s,p1,plabel,p2] -> [(phrase,interp),coord,s,p1,plabel,p2]
  656 + | _ -> failwith ("parse_pair_pattern 1: " ^ s))
  657 + | _ -> failwith ("parse_pair_pattern 2: " ^ s)
  658 +
  659 +let patterns = List.flatten (Xlist.rev_map raw_patterns parse_pattern)
  660 +let pair_patterns = List.flatten (Xlist.rev_map raw_pair_patterns parse_pair_pattern)
  661 +
  662 +let match_string map s = function
  663 + Any -> (*print_endline ("match_string: Any " ^ s);*) map
  664 + | Value l ->
  665 + let b = Xlist.fold l false (fun b t -> s = t || b) in
  666 + (*print_endline ("match_string: " ^ t ^ " " ^ s);*)
  667 + if b then map else raise Not_found
  668 + | Agr n ->
  669 + if StringMap.mem map n then
  670 + if StringMap.find map n = s then map else raise Not_found
  671 + else StringMap.add map n s
  672 +
  673 +let rec match_interp_rec2 map = function
  674 + [s],pat -> match_string map s pat
  675 + | ["congr";"rec"],pat -> map
  676 + | _,pat -> failwith "match_interp_rec2"
  677 +
  678 +let rec match_interp_rec map = function
  679 + s :: l,ps :: pl ->
  680 + let map = match_interp_rec2 map (s,ps) in
  681 + match_interp_rec map (l,pl)
  682 + | _,[] -> map
  683 + | _ -> failwith "match_interp_rec"
  684 +
  685 +let match_interp map interp pinterp =
  686 + match interp with
  687 + [interp] -> match_interp_rec map (interp,pinterp)
  688 + | _ -> failwith "match_interp"
  689 +
  690 +let rec match_pattern_rec map = function
  691 + phrase,Dep({sons=[]} as d),PatternNode(plemma,pcat,pinterp) ->
  692 + (* print_endline ("match_pattern_rec 1: \n" ^ string_of_tree "" (Dep d)); *)
  693 + let map = match_string map d.lemma plemma in
  694 + let map = match_string map d.cat pcat in
  695 + let map = match_interp map d.interp pinterp in
  696 + map
  697 + | (phrase,interp),Dep d,PatternPhrase(pphrase,pinterp) ->
  698 + (* print_endline ("match_pattern_rec 1: \n" ^ string_of_tree "" (Dep d)); *)
  699 + let map = match_string map phrase pphrase in
  700 + let map = match_interp map interp pinterp in
  701 + map
  702 + | phrase,Dep({sons=[Dep d1;Dep d2]} as d),PatternEdge(PatternEdge(p,plabel1,p1),plabel2,p2) ->
  703 + (* print_endline ("match_pattern_rec 2: \n" ^ string_of_tree "" (Dep d1)); *)
  704 + let map = match_pattern_rec map (("",[]),Dep {d with sons=[]},p) in
  705 + (try
  706 + let map = match_string map d1.label plabel1 in
  707 + let map = match_pattern_rec map (("",[]),Dep d1,p1) in
  708 + let map = match_string map d2.label plabel2 in
  709 + let map = match_pattern_rec map (("",[]),Dep d2,p2) in
  710 + map
  711 + with Not_found -> (
  712 + let map = match_string map d1.label plabel2 in
  713 + let map = match_pattern_rec map (("",[]),Dep d1,p2) in
  714 + let map = match_string map d2.label plabel1 in
  715 + let map = match_pattern_rec map (("",[]),Dep d2,p1) in
  716 + map))
  717 + | phrase,Dep({sons=[Dep d2]} as d1),PatternEdge(p1,plabel,p2) ->
  718 + (* print_endline ("match_pattern_rec 2: \n" ^ string_of_tree "" (Dep d1)); *)
  719 + let map = match_pattern_rec map (("",[]),Dep {d1 with sons=[]},p1) in
  720 + let map = match_string map d2.label plabel in
  721 + let map = match_pattern_rec map (("",[]),Dep d2,p2) in
  722 + map
  723 + | _ -> raise Not_found
  724 +
  725 +
  726 +let rec match_pattern is_coord (phrase1,d1) (phrase2,d2) = function
  727 + (coord,s,p1,plabel,p2) :: l ->
  728 + (* print_endline s; *)
  729 + if is_coord || d2.is_shared || coord = Gen then
  730 + try
  731 + let map = StringMap.empty in
  732 + let map = match_pattern_rec map (phrase1,Dep d1,p1) in
  733 + let map = match_string map d2.label plabel in
  734 + let _ = match_pattern_rec map (phrase2,Dep d2,p2) in
  735 + s
  736 + with Not_found -> match_pattern is_coord (phrase1,d1) (phrase2,d2) l
  737 + else match_pattern is_coord (phrase1,d1) (phrase2,d2) l
  738 + | [] -> raise Not_found
  739 +
  740 +let match_phrase_interp s map pinterp =
  741 + let interp = Xlist.rev_map pinterp (function
  742 + Value [v] -> [v]
  743 + | Agr v -> (try [StringMap.find map v] with Not_found -> failwith ("match_phrase_interp: " ^ s))
  744 + | _ -> failwith ("match_phrase_interp: " ^ s)) in
  745 + [List.rev interp]
  746 +
  747 +let rec match_pair_pattern is_coord (phrase1,d1) (phrase2,d2) = function
  748 + ((pphrase,pinterp),coord,s,p1,plabel,p2) :: l ->
  749 + (* print_endline s; *)
  750 + if is_coord || d2.is_shared || coord = Gen then
  751 + try
  752 + let map = StringMap.empty in
  753 + let map = match_pattern_rec map (phrase1,Dep d1,p1) in
  754 + let map = match_string map d2.label plabel in
  755 + let map = match_pattern_rec map (phrase2,Dep d2,p2) in
  756 + pphrase, match_phrase_interp s map pinterp, s
  757 + with Not_found -> match_pair_pattern is_coord (phrase1,d1) (phrase2,d2) l
  758 + else match_pair_pattern is_coord (phrase1,d1) (phrase2,d2) l
  759 + | [] -> raise Not_found
  760 +
  761 +let rec fold_tree tree s f =
  762 + match tree with
  763 + Dep d -> Xlist.fold d.sons (f s (Dep d)) (fun s t -> fold_tree t s f)
  764 + | Coordination(label,sem,sons,coords) as t -> Xlist.fold sons (f s t) (fun s t -> fold_tree t s f)
  765 + | _ -> failwith "fold_tree"
  766 +
  767 +(*let list_dependencies_tree corpus =
  768 + Xlist.fold corpus StringQMap.empty (fun qmap (sentence_id, text, tree, tokens) ->
  769 + fold_tree tree qmap (fun qmap -> function
  770 + Dep d ->
  771 + Xlist.fold (List.flatten (Xlist.rev_map d.sons (flatten_coordination false "" ""))) qmap (fun qmap -> function
  772 + is_coord,Dep d2 ->
  773 + (try
  774 + let s = match_pattern is_coord (Dep d) (Dep d2) patterns in
  775 + StringQMap.add qmap ("PATTERN " ^ s)
  776 + with Not_found -> StringQMap.add qmap (string_of_dependency2 is_coord (d.lemma,d.cat,d.interp) d2.label d2.sem (d2.lemma,d2.cat,d2.interp)))
  777 + | _ -> failwith "list_dependencies_tree")
  778 + | Coordination(label,sem,sons,coords) -> StringQMap.add qmap "Coordination"
  779 + | _ -> failwith "list_dependencies_tree"))
  780 +
  781 +let list_dependencies_tree2 corpus =
  782 + Xlist.fold corpus StringMap.empty (fun map (sentence_id, text, tree, tokens) ->
  783 + fold_tree tree map (fun map -> function
  784 + Dep d ->
  785 + Xlist.fold (List.flatten (Xlist.rev_map d.sons (flatten_coordination false "" ""))) map (fun map -> function
  786 + is_coord,Dep d2 ->
  787 + (try
  788 + let _ = match_pattern is_coord (Dep d) (Dep d2) patterns in
  789 + map
  790 + with Not_found -> StringMap.add_inc map (string_of_dependency2 is_coord (d.lemma,d.cat,d.interp) d2.label d2.sem (d2.lemma,d2.cat,d2.interp)) [text] (fun l -> text :: l))
  791 + | _ -> failwith "list_dependencies_tree2")
  792 + | Coordination(label,sem,sons,coords) -> StringMap.add_inc map "Coordination" [text] (fun l -> text :: l)
  793 + | _ -> failwith "list_dependencies_tree2"))*)
  794 +
  795 +let rec parse_pair_patterns = function
  796 + Cluster(phrase,d,l),[] -> false,Cluster(phrase,d,l)
  797 + | Cluster(phrase,d,l), Cluster(phrase2,d2,[]) :: sons ->
  798 + (try
  799 + let pphrase,pinterp,_ = match_pair_pattern false (phrase,d) (phrase2,d2) pair_patterns in
  800 + true,Cluster((pphrase,pinterp),{d with sons=Dep d2 :: d.sons},sons @ l)
  801 + with Not_found -> parse_pair_patterns (Cluster(phrase,d,Cluster(phrase2,d2,[]) :: l), sons))
  802 + | Cluster(phrase,d,l), t :: sons -> parse_pair_patterns (Cluster(phrase,d,t :: l), sons)
  803 + | _ -> failwith "parse_pair_patterns"
  804 +
  805 +let rec check_cc = function
  806 + [] -> true
  807 + | Cluster(_,{lemma=",";cat="interp";label="punct"},[]) :: l -> check_cc l
  808 + (* | Dep{lemma="-";cat="interp";label="punct"} :: l -> check_cc l *)
  809 + | Cluster(_,{lemma="i";cat="conj";label="cc"},[]) :: l -> check_cc l
  810 + | Cluster(_,{lemma="a";cat="conj";label="cc"},[]) :: l -> check_cc l
  811 + | Cluster(_,{lemma="zarówno";cat="conj";label="cc:preconj"},[]) :: l -> check_cc l
  812 + | Cluster(_,{lemma="jak";cat="conj";label="cc";sons=[Dep{lemma="i";cat="conj"}]},_) :: l -> check_cc l
  813 + | _ -> false
  814 +
  815 +let parse_coordination = function
  816 + Coordination(label,sem,[
  817 + Cluster(_,{cat="adja";sons=[]},[]);
  818 + Cluster(phrase,({cat=adj;sons=[]} as d),[Cluster(_,{lemma="-";cat="interp";label="punct"},[])])],[]) ->
  819 + Cluster(phrase,{d with label=label;sem=sem},[])
  820 + | Coordination(label,sem,sons,[]) ->
  821 + let b = Xlist.fold sons true (fun b -> function
  822 + Cluster(_,_,sons) -> check_cc sons && b
  823 + (* | PairDep(d,_) -> check_cc d.sons && b *)
  824 + | _ -> failwith "parse_coordination 2") in
  825 + if b then
  826 + match List.hd sons with
  827 + Cluster(phrase,d,_) -> Cluster(phrase,{d with is_shared=true},[])
  828 + (* | PairDep(d,d2) -> PairDep({d with is_shared=true; sons=[]},d2) *)
  829 + | _ -> failwith "parse_coordination 3"
  830 + else Coordination(label,sem,sons,[])
  831 + | _ -> failwith "parse_coordination"
  832 +
  833 +let make_phrase = function
  834 + "subst" -> "np"
  835 + | "depr" -> "np"
  836 + | "ppron12" -> "np"
  837 + | "ppron3" -> "np"
  838 + | "ger" -> "np"
  839 + | "adj" -> "adjp"
  840 + | "pact" -> "adjp"
  841 + | "ppas" -> "adjp"
  842 + | "fin" -> "ip"
  843 + | "bedzie" -> "ip"
  844 + | "praet" -> "ip"
  845 + | "winien" -> "ip"
  846 + | "impt" -> "ip"
  847 + | "imps" -> "ip"
  848 + | "pred" -> "ip"
  849 + (* | "siebie" -> "np" *)
  850 + (* | "symbol" -> "noun"
  851 + | "unk" -> "noun"
  852 + | "xxx" -> "noun"
  853 + | "adjc" -> "adj"
  854 + | "adjp" -> "adj"
  855 + | "adja" -> "adj"
  856 + | "ordnum" -> "ordnum" *)
  857 + | "inf" -> "infp"
  858 + (* | "pcon" -> "verb"
  859 + | "pant" -> "verb"
  860 + | "pacta" -> "verb" *)
  861 + | "conj" -> "conjp"
  862 + (* | "fixed" -> "fixed"
  863 + | "num" -> "num"*)
  864 + | _ -> ""
  865 +
  866 +let rec parse_tree = function
  867 + Dep d ->
  868 + (* Printf.printf "parse_tree 1: |sons|=%d\n" (Xlist.size d.sons); *)
  869 + let sons = Xlist.rev_map d.sons parse_tree in
  870 + (* Printf.printf "parse_tree 2: %s |sons|=%d\n" d.lemma (Xlist.size sons); *)
  871 + let phrase = make_phrase d.cat, d.interp in
  872 + let sons = Xlist.fold sons [] (fun sons -> function
  873 + Cluster(phrase2,d2,[]) as t ->
  874 + (try
  875 + (* print_endline "parse_tree 2a"; *)
  876 + let _ = match_pattern false (phrase,{d with sons=[]}) (phrase2,d2) patterns in
  877 + (* print_endline "parse_tree 2b"; *)
  878 + sons
  879 + with Not_found -> t :: sons)
  880 + | t -> t :: sons) in
  881 + let b,t = parse_pair_patterns (Cluster(phrase,{d with sons=[]},[]),sons) in
  882 + if b then parse_tree t else t
  883 + | Coordination(label,sem,sons,coords) ->
  884 + parse_coordination (Coordination(label,sem,List.rev (Xlist.rev_map sons parse_tree),coords))
  885 + | Cluster(phrase,d,sons) ->
  886 + let sons = Xlist.fold sons [] (fun sons -> function
  887 + Cluster(phrase2,d2,[]) ->
  888 + (try
  889 + let _ = match_pattern false (phrase,d) (phrase2,d2) patterns in
  890 + sons
  891 + with Not_found -> Cluster(phrase2,d2,[]) :: sons)
  892 + | t -> t :: sons) in
  893 + let b,t = parse_pair_patterns (Cluster(phrase,d,[]),sons) in
  894 + if b then parse_tree t else t
  895 + (* | _ -> failwith "parse_tree" *)
  896 +
  897 +let is_parsed = function
  898 + Cluster(_,{lemma="<conll_root>";sons=[]},[]) -> true
  899 + | _ -> false
  900 +
  901 +let excluded = StringSet.of_list (File.load_lines "data/excluded.tab")
  902 +
  903 +let rec split_tree forest = function
  904 + Coordination(label,sem,sons,coords) ->
  905 + Xlist.fold sons forest split_tree
  906 + | Cluster(phrase,d,[]) -> forest
  907 + | Cluster(phrase,d,sons) ->
  908 + let b = Xlist.fold sons true (fun b -> function
  909 + Cluster(_,_,[]) -> b
  910 + | _ -> false) in
  911 + if b then Cluster(phrase,d,sons) :: forest else
  912 + Xlist.fold sons forest split_tree
  913 + | _ -> failwith "split_tree"
  914 +
  915 +(* let rec rules_of_tree2 = function
  916 + Dep({sons=[]} as d) ->
  917 + d.lemma ^ ":" ^ d.cat ^ ":" ^ ENIAMtokens.string_of_interps d.interp
  918 + | Dep({sons=[Dep d2]} as d) ->
  919 + "[ " ^ d.lemma ^ ":" ^ d.cat ^ ":" ^ ENIAMtokens.string_of_interps d.interp ^
  920 + " -> " ^ d2.label ^ " -> " ^ rules_of_tree2 (Dep d2) ^ " ]"
  921 + | Dep({sons=[Dep d2;Dep d3]} as d) ->
  922 + "[ " ^ d.lemma ^ ":" ^ d.cat ^ ":" ^ ENIAMtokens.string_of_interps d.interp ^
  923 + " -> " ^ d2.label ^ " -> " ^ rules_of_tree2 (Dep d2) ^ " | " ^ d3.label ^ " -> " ^ rules_of_tree2 (Dep d3) ^ " ]"
  924 + | _ -> failwith "rules_of_tree2" *)
  925 +
  926 +(* let rec rules_of_tree2 = function
  927 + Dep({sons=[]} as d) ->
  928 + "_:" ^ d.cat ^ ":" ^ ENIAMtokens.string_of_interps d.interp
  929 + | Dep({sons=[Dep d2]} as d) ->
  930 + "[ _:" ^ d.cat ^ ":" ^ ENIAMtokens.string_of_interps d.interp ^
  931 + " -> " ^ d2.label ^ " -> " ^ rules_of_tree2 (Dep d2) ^ " ]"
  932 + | Dep({sons=[Dep d2;Dep d3]} as d) ->
  933 + "[ _:" ^ d.cat ^ ":" ^ ENIAMtokens.string_of_interps d.interp ^
  934 + " -> " ^ d2.label ^ " -> " ^ rules_of_tree2 (Dep d2) ^ " | " ^ d3.label ^ " -> " ^ rules_of_tree2 (Dep d3) ^ " ]"
  935 + | _ -> failwith "rules_of_tree2" *)
  936 +
  937 +let rec rules_of_tree2 = function
  938 + Dep({sons=[]} as d) ->
  939 + "_:" ^ d.cat (*^ ":" ^ ENIAMtokens.string_of_interps d.interp*)
  940 + | Dep({sons=[Dep d2]} as d) ->
  941 + "[ _:" ^ d.cat ^ (*":" ^ ENIAMtokens.string_of_interps d.interp ^*)
  942 + " -> " ^ d2.label ^ " -> " ^ rules_of_tree2 (Dep d2) ^ " ]"
  943 + | Dep({sons=[Dep d2;Dep d3]} as d) ->
  944 + "[ _:" ^ d.cat ^ (*":" ^ ENIAMtokens.string_of_interps d.interp ^*)
  945 + " -> " ^ d2.label ^ " -> " ^ rules_of_tree2 (Dep d2) ^ " | " ^ d3.label ^ " -> " ^ rules_of_tree2 (Dep d3) ^ " ]"
  946 + | _ -> failwith "rules_of_tree2"
  947 +
  948 +
  949 +let rules_of_tree rules = function
  950 + Cluster(_,d,sons) ->
  951 + Xlist.fold sons rules (fun rules -> function
  952 + Cluster(_,d2,[]) -> (rules_of_tree2 (Dep d) ^ " -> " ^ d2.label ^ " -> " ^ rules_of_tree2 (Dep d2)) :: rules
  953 + | _ -> failwith "rules_of_tree")
  954 + | _ -> failwith "rules_of_tree"
  955 +
  956 +
  957 +let parse corpus =
  958 + Xlist.iter corpus (fun (sentence_id, text, tree, tokens) ->
  959 + if StringSet.mem excluded sentence_id then () else
  960 + (try
  961 + let tree = parse_tree tree in
  962 + if is_parsed tree then () (*print_endline ("PARSED: " ^ sentence_id)*) else (
  963 + print_endline sentence_id;
  964 + print_endline text;
  965 + print_endline (string_of_tree "" tree);
  966 + let forest = split_tree [] tree in
  967 + Xlist.iter forest (fun tree ->
  968 + (* print_endline ("\n" ^ string_of_tree "" tree); *)
  969 + let rules = rules_of_tree [] tree in
  970 + Xlist.iter rules print_endline))
  971 + with e ->
  972 + print_endline sentence_id;
  973 + print_endline text;
  974 + print_endline (string_of_tree "" tree);
  975 + print_endline (Printexc.to_string e)))
  976 +
  977 +let extract_rules corpus =
  978 + Xlist.fold corpus StringQMap.empty (fun qmap (sentence_id, text, tree, tokens) ->
  979 + if StringSet.mem excluded sentence_id then qmap else
  980 + (try
  981 + let tree = parse_tree tree in
  982 + if is_parsed tree then StringQMap.add qmap "PARSED" else (
  983 + let forest = split_tree [] tree in
  984 + Xlist.fold forest qmap (fun qmap tree ->
  985 + let rules = rules_of_tree [] tree in
  986 + Xlist.fold rules qmap StringQMap.add))
  987 + with e -> StringQMap.add qmap (Printexc.to_string e)))
... ...
corpora/data/OrdNumber.tab 0 → 100644
  1 +ostatni
  2 +przedostatni
  3 +pierwszy
  4 +drugi
  5 +trzeci
  6 +czwarty
  7 +piąty
  8 +szósty
  9 +siódmy
  10 +ósmy
  11 +dziewiąty
  12 +dziesiąty
  13 +jedenasty
  14 +dwunasty
  15 +trzynasty
  16 +czternasty
  17 +piętnasty
  18 +szesnasty
  19 +siedemnasty
  20 +osiemnasty
  21 +dziewiętnasty
  22 +dwudziesty
  23 +trzydziesty
  24 +czterdziesty
  25 +pięćdziesiąty
  26 +sześćdziesiąty
  27 +siedemdziesiąty
  28 +osiemdziesiąty
  29 +dziewięćdziesiąty
  30 +setny
  31 +dwusetny
  32 +trzysetny
  33 +
... ...
corpora/data/OrdNumberCompound.tab 0 → 100644
  1 +dwudziesty
  2 +trzydziesty
  3 +czterdziesty
  4 +pięćdziesiąty
  5 +sześćdziesiąty
  6 +siedemdziesiąty
  7 +osiemdziesiąty
  8 +dziewięćdziesiąty
  9 +
... ...
corpora/data/OrdNumberUnit.tab 0 → 100644
  1 +pierwszy
  2 +drugi
  3 +trzeci
  4 +czwarty
  5 +piąty
  6 +szósty
  7 +siódmy
  8 +ósmy
  9 +dziewiąty
  10 +
... ...
corpora/data/both-correct.tab 0 → 100644
  1 +U . S . U.S.
  2 +... . . .
  3 +.. . .
  4 +(...) ( . . . )
  5 +?! ? !
  6 +!!! ! ! !
  7 +??? ? ? ?
  8 +.... . . . .
  9 +?... ? . . .
  10 +,, , ,
  11 +?? ? ?
  12 +S . A S.A
  13 +...? . . . ?
  14 +..... . . . . .
  15 +!!!! ! ! ! !
  16 +!! ! !
  17 +!... ! . . .
  18 +[...] [ . . . ]
  19 +’’ . ’ ’ .
  20 +[..] [ . . ]
  21 +....... . . . . . . .
  22 +…? … ?
  23 +(*) ( * )
  24 +***** * * * * *
  25 +[+] [ + ]
  26 +[-] [ - ]
  27 +[?] [ ? ]
  28 +1975-1998 1975 - 1998
  29 +’’ ’ ’
  30 +P . S P.S
  31 +2-3 2 - 3
  32 +17.00 17 . 00
  33 +...... . . . . . .
  34 +16.00 16 . 00
  35 +!? ! ?
  36 +2:0 2 : 0
  37 +22.00 22 . 00
  38 +1:0 1 : 0
  39 +........ . . . . . . . .
  40 +???? ? ? ? ?
  41 +30-40 30 - 40
  42 +3-4 3 - 4
  43 +20.00 20 . 00
  44 +1:1 1 : 1
  45 +14.00 14 . 00
  46 +13.00 13 . 00
  47 +12.00 12 . 00
  48 +1-3 1 - 3
  49 +>> > >
  50 +7-9 7 - 9
  51 +3-5 3 - 5
  52 +22.30 22 . 30
  53 +18.30 18 . 30
  54 +18.00 18 . 00
  55 +16.30 16 . 30
  56 +13.30 13 . 30
  57 +11.00 11 . 00
  58 +10.30 10 . 30
  59 +(?) ( ? )
  60 +8.00 8 . 00
  61 +6:4 6 : 4
  62 +4-5 4 - 5
  63 +3:0 3 : 0
  64 +20-30 20 - 30
  65 +2-4 2 - 4
  66 +19.30 19 . 30
  67 +14.30 14 . 30
  68 +**** * * * *
  69 +*** * * *
  70 +(-) ( - )
  71 +(!) ( ! )
  72 +’80 ’ 80
  73 +k . p . c k.p.c
  74 +(++) ( + + )
  75 +(--) ( -- )
  76 +(..) ( . . )
  77 +(?!) ( ? ! )
  78 +(…) ( … )
  79 +-10 - 10
  80 +-124 - 124
  81 +-17 - 17
  82 +-2007 - 2007
  83 +-22 - 22
  84 +-23 - 23
  85 +-367 - 367
  86 +-40 - 40
  87 +-5 - 5
  88 +/.../ / . . . /
  89 +0,05-1,0 0,05 - 1,0
  90 +0,05-1,5 0,05 - 1,5
  91 +0,5–1,0 0,5 – 1,0
  92 +0,5—1 0,5 — 1
  93 +0,9-2,75 0,9 - 2,75
  94 +0-46 855 -45 -26 ) . 0-46 855-45-26 ) .
  95 +0-5 0 - 5
  96 +00:39 00 : 39
  97 +01:26 01 : 26
  98 +01:40 01 : 40
  99 +03.03. 03 . 03 .
  100 +04:02 04 : 02
  101 +05:10 05 : 10
  102 +06:15 06 : 15
  103 +06:37 06 : 37
  104 +07:43 07 : 43
  105 +07:54 07 : 54
  106 +09:56 09 : 56
  107 +0:00 0 : 00
  108 +0:30 0 : 30
  109 +0:35 0 : 35
  110 +0:5 0 : 5
  111 +1,24 1 , 24
  112 +1,5-2 1,5 - 2
  113 +1,50-2 1 , 50 - 2
  114 +1,6-1,8 1,6 - 1,8
  115 +1-11 1 - 11
  116 +1-17 1 - 17
  117 +1-23 1 - 23
  118 +1-8 1 - 8
  119 +1.01.1993 1 . 01 . 1993
  120 +1.04. 1 . 04 .
  121 +1.04.286 1 . 04 . 286
  122 +1.12 1 . 12
  123 +1.3 1 . 3
  124 +10-11 10 - 11
  125 +10-13 10 - 13
  126 +10-14 10 - 14
  127 +10-15 10 - 15
  128 +10-20 10 - 20
  129 +10.10.1995 10 . 10 . 1995
  130 +10.20 10 . 20
  131 +10 - proc 10-proc
  132 +100 - % 100-%
  133 +100-110 100 - 110
  134 +1000-1500 1000 - 1500
  135 +10–15 10 – 15
  136 +10—12 10 — 12
  137 +10—15 10 — 15
  138 +11-12 11 - 12
  139 +11.10 11 . 10
  140 +11.X.2001 11 . X . 2001
  141 +1150-1250 1150 - 1250
  142 +11:39 11 : 39
  143 +12,8-12,9 12,8 - 12,9
  144 +12-16 12 - 16
  145 +12-17 12 - 17
  146 +12-24 12 - 24
  147 +12.07.1982 12 . 07 . 1982
  148 +12.09. 12 . 09 .
  149 +12.20 12 . 20
  150 +12.25 12 . 25
  151 +12.28 12 . 28
  152 +12.5 . 12 . 5 .
  153 +12.50 12 . 50
  154 +120-140 120 - 140
  155 +1200-1259 1200 - 1259
  156 +120–140 120 – 140
  157 +120–410 120 – 410
  158 +1230-1246 1230 - 1246
  159 +1248–60 1248 – 60
  160 +1253-1810 1253 - 1810
  161 +1256-1258 1256 - 1258
  162 +1288–90 1288 – 90
  163 +12:15 12 : 15
  164 +12:29 12 : 29
  165 +12:52 12 : 52
  166 +13-17 13 - 17
  167 +13-23 13- 23
  168 +13-35 13 - 35
  169 +13.01.1228 13 . 01 . 1228
  170 +13.15 13 . 15
  171 +13.28 13 . 28
  172 +13.40 13 . 40
  173 +1333-70 1333 - 70
  174 +1340-1405 1340 - 1405
  175 +1394-1831 1394 - 1831
  176 +13:02 13 : 02
  177 +13:06 13 : 06
  178 +13:39 13 : 39
  179 +13–14 13 – 14
  180 +14,36 14 , 36
  181 +14-11 14 - 11
  182 +14-18 14 - 18
  183 +14.00-16.00 14 . 00 - 16 . 00
  184 +14.01.1928 14 . 01 . 1928
  185 +14.07.1995 14 . 07 . 1995
  186 +14.12 14 . 12
  187 +14.38 14 . 38
  188 +14.50 14 . 50
  189 +14.X.1954 14 . X . 1954
  190 +1400-1600 1400 - 1600
  191 +142-131 142 - 131
  192 +1466–1772 1466 – 1772
  193 +1486-93 1486 - 93
  194 +14:11 14 : 11
  195 +14:54 14 : 54
  196 +15,22 15 , 22
  197 +15-10 15 - 10
  198 +15-17 15 - 17
  199 +15-20 15 - 20
  200 +15-25 15 - 25
  201 +15.15 15 . 15
  202 +15.40 15 . 40
  203 +15.46 15 . 46
  204 +15.50 15 . 50
  205 +150-180 150 - 180
  206 +150-300 150- 300
  207 +150–180 150 – 180
  208 +1511-21 1511 - 21
  209 +1558-62 1558 - 62
  210 +1566-82 1566 - 82
  211 +1582-1727 1582 - 1727
  212 +1585-1590 1585 - 1590
  213 +15:10 15 : 10
  214 +15:18 15 : 18
  215 +15:48 15 : 48
  216 +15:53 15 : 53
  217 +15:55 15 : 55
  218 +15—20 15 — 20
  219 +16.15 16 . 15
  220 +1600–03 1600 – 03
  221 +161 c 161c
  222 +1615-17 1615 - 17
  223 +1618-22 1618 - 22
  224 +1643-1670 1643 - 1670
  225 +1687–1716 1687 – 1716
  226 +1691-1692 1691 - 1692
  227 +1699-1766 1699 - 1766
  228 +16:18 16 : 18
  229 +16:24 16 : 24
  230 +16:30 16 : 30
  231 +16–23 16 – 23
  232 +17.03 17 . 03
  233 +17.05 17 . 05
  234 +17.11 17 . 11
  235 +17.15 17 . 15
  236 +17.34 17 . 34
  237 +17.40 17 . 40
  238 +17.X.64 17 . X . 64
  239 +1709-1713 1709 - 1713
  240 +1727–28 1727 – 28
  241 +1770-84 1770 - 84
  242 +1777–86 1777 – 86
  243 +1797-1805 1797 - 1805
  244 +1797-1863 1797 - 1863
  245 +1799–1873 1799 – 1873
  246 +17:25 17 : 25
  247 +17:26 17 : 26
  248 +17:35 17 : 35
  249 +17:40 17 : 40
  250 +18-20 18 - 20
  251 +18.09. 18 . 09 .
  252 +18.15 18 . 15
  253 +18.50 18 . 50
  254 +1809-1814 1809 - 1814
  255 +1809–1881 1809 – 1881
  256 +1812–23 1812 – 23
  257 +1821-1914 1821 - 1914
  258 +1822-24 1822 - 24
  259 +1824-1891 1824 - 1891
  260 +1829–1907 1829 – 1907
  261 +1830–36 1830 – 36
  262 +1831-1846 1831 - 1846
  263 +1831-1880 1831 - 1880
  264 +1839–41 1839 – 41
  265 +1848-49 1848 - 49
  266 +1848–1931 1848 – 1931
  267 +1850-1920 1850 - 1920
  268 +1855-1940 1855 - 1940
  269 +1857–60 1857 – 60
  270 +1858–61 1858 – 61
  271 +1859–1913 1859 – 1913
  272 +1859–1922 1859 – 1922
  273 +1862-1951 1862 - 1951
  274 +1865-1940 1865 - 1940
  275 +1870-1914 1870 - 1914
  276 +1871-1893 1871 - 1893
  277 +1874-1959 1874 - 1959
  278 +1875-76 1875 - 76
  279 +1875–1975 1875 – 1975
  280 +1876-1917 1876 - 1917
  281 +1878-1949 1878 - 1949
  282 +1879-1885 1879 - 1885
  283 +1879–98 1879 – 98
  284 +1883-1913 1883 - 1913
  285 +1886-87 1886 - 87
  286 +1893-1971 1893 - 1971
  287 +1894-1972 1894 - 1972
  288 +1894-97 1894 - 97
  289 +1895-1968 1895 - 1968
  290 +1896-1901 1896 - 1901
  291 +1896-1952 1896 - 1952
  292 +1896-1962 1896 - 1962
  293 +1897-1936 1897 - 1936
  294 +1898-1901 1898 - 1901
  295 +18:22 18 : 22
  296 +18:24 18 : 24
  297 +18:30 18 : 30
  298 +18:48 18 : 48
  299 +18–20 18 – 20
  300 +18–24 18 – 24
  301 +18–28 18 – 28
  302 +19,3 19 , 3
  303 +19-2 19 - 2
  304 +19-20 19 - 20
  305 +19-21 19 - 21
  306 +19-7 19 - 7
  307 +19.09 19 . 09
  308 +19.12.1956 19 . 12 . 1956
  309 +19.2.—20.3. 19 . 2 . — 20 . 3 .
  310 +19.20 19 . 20
  311 +19.25 19 . 25
  312 +19.27 19 . 27
  313 +1900-2000 1900 - 2000
  314 +1901-49 1901 - 49
  315 +1905-1928 1905 - 1928
  316 +1905-40 1905 - 40
  317 +1906-07 1906 - 07
  318 +1906-1907 1906 - 1907
  319 +1906-1979 1906 - 1979
  320 +1906-1980 1906 - 1980
  321 +1906–59 1906 – 59
  322 +1907-1983 1907 - 1983
  323 +1907–09 1907 – 09
  324 +1909-1987 1909 - 1987
  325 +1910-1963 1910 - 1963
  326 +1912-13 1912 - 13
  327 +1913-1983 1913 - 1983
  328 +1913-21 1913 - 21
  329 +1914-1945 1914 - 1945
  330 +1915–30 1915 – 30
  331 +1917-1919 1917 - 1919
  332 +1918-1919 1918 - 1919
  333 +1918-1922 1918 - 1922
  334 +1918-1932 1918 - 1932
  335 +1918-9 1918 - 9
  336 +1919-1932 1919 - 1932
  337 +1920-1932 1920 - 1932
  338 +1922-36 1922 - 36
  339 +1928-1929 1928 - 1929
  340 +1929-1933 1929 - 1933
  341 +1929-1939 1929 - 1939
  342 +1931-39 1931 - 39
  343 +1934-1939 1934 - 1939
  344 +1937-39 1937 - 39
  345 +1938-1957 1938 - 1957
  346 +1938-1978 1938 - 1978
  347 +1939-1945 1939 - 1945
  348 +1941–44 1941 – 44
  349 +1942-1944 1942 - 1944
  350 +1944-48 1944 - 48
  351 +1944-89 1944 - 89
  352 +1945-1965 1945 - 1965
  353 +1947-48 1947 - 48
  354 +1947-49 1947 - 49
  355 +1948-49 1948 - 49
  356 +1948–49 1948 – 49
  357 +1950-52 1950 - 52
  358 +1951-77 1951 - 77
  359 +1952-1954 1952 - 1954
  360 +1952-57 1952 - 57
  361 +1955-1991 1955 - 1991
  362 +1960-1970 1960 - 1970
  363 +1960-1993 1960 - 1993
  364 +1962-1980 1962 - 1980
  365 +1962-66 1962 - 66
  366 +1963-1967 1963 - 1967
  367 +1966—1969 1966 — 1969
  368 +1968-1980 1968 - 1980
  369 +1969-71 1969 - 71
  370 +1970-1972 1970 - 1972
  371 +1970–1973 1970 – 1973
  372 +1970–1975 1970 – 1975
  373 +1971-1974 1971 - 1974
  374 +1971-1980 1971 - 1980
  375 +1971–79 1971 – 79
  376 +1973-75 1973 - 75
  377 +1975:132 1975 : 132
  378 +1975:133 1975 : 133
  379 +1978-2001 1978 - 2001
  380 +1979-81 1979 - 81
  381 +1979-83 1979 - 83
  382 +1980-1981 1980 - 1981
  383 +1980-1982 1980 - 1982
  384 +1980-1989 1980 - 1989
  385 +1981-1984 1981 - 1984
  386 +1981-1990 1981 - 1990
  387 +1982-1983 1982 - 1983
  388 +1982-1984 1982 - 1984
  389 +1982–91 1982 – 91
  390 +1982–92 1982 – 92
  391 +1983-1987 1983 - 1987
  392 +1983-87 1983 - 87
  393 +1984-1985 1984 - 1985
  394 +1984-1991 1984 - 1991
  395 +1987–95 1987 – 95
  396 +1989-2009 1989 - 2009
  397 +1989-93 1989 - 93
  398 +1990-1993 1990 - 1993
  399 +1990-1994 1990 - 1994
  400 +1990-93 1990 - 93
  401 +1991-1993 1991 - 1993
  402 +1993-2008 1993 - 2008
  403 +1994-1995 1994 - 1995
  404 +1994-2000 1994 - 2000
  405 +1994-95 1994 - 95
  406 +1995-1996 1995 - 1996
  407 +1997-1998 1997 - 1998
  408 +1997-2001 1997 - 2001
  409 +1997–2001 1997 – 2001
  410 +1999-2000 1999 - 2000
  411 +1999-2001 1999 - 2001
  412 +1:00 1 : 00
  413 +1:10 1 : 10
  414 +1:100 000 . 1 : 100 000 .
  415 +1:2 1 : 2
  416 +1:25 1 : 25
  417 +1:30000 1 : 30000
  418 +1:4 1 : 4
  419 +1:50 1 : 50
  420 +1–3 1 – 3
  421 +1–8 1 – 8
  422 +2-11 2 - 11
  423 +2-7 2 - 7
  424 +2.00 2 . 00
  425 +2.2 2 . 2
  426 +2.30 2 . 30
  427 +20,12 20 , 12
  428 +20-26 20 - 26
  429 +20.10.2005 20 . 10 . 2005
  430 +20.30 20 . 30
  431 +200-300 200 - 300
  432 +2000-2006 2000 - 2006
  433 +20000-30000 20000 - 30000
  434 +2001-2002 2001 - 2002
  435 +2001-2006 2001 - 2006
  436 +2003-2006 2003 - 2006
  437 +2004–2006 2004 – 2006
  438 +2008-2018 2008 - 2018
  439 +2025-30 2025 - 30
  440 +20:13 20 : 13
  441 +20:25 20 : 25
  442 +20:47 20 : 47
  443 +20—30 20 — 30
  444 +21-23 21 - 23
  445 +21.1.—18.2. 21 . 1 . — 18 . 2 .
  446 +21.12.1796 21 . 12 . 1796
  447 +211-215 211 - 215
  448 +21:11 21 : 11
  449 +21:12 21 : 12
  450 +21:21 21 : 21
  451 +21:25 21 : 25
  452 +21:30 21 : 30
  453 +21:32 21 : 32
  454 +21:34 21 : 34
  455 +22.02 22 . 02
  456 +22.05.1689 22 . 05 . 1689
  457 +22.05.1859 22 . 05 . 1859
  458 +22.05.2002 22 . 05 . 2002
  459 +22.5.—21.6. 22 . 5 . — 21 . 6 .
  460 +22.6.—22.7. 22 . 6 . — 22 . 7 .
  461 +22:07 22 : 07
  462 +22:12 22 : 12
  463 +22:33 22 : 33
  464 +22:51 22 : 51
  465 +23.00 23 . 00
  466 +23.11.—21.12. 23 . 11 . — 21 . 12 .
  467 +230–280 230 – 280
  468 +231-198 231 - 198
  469 +23:00 23 : 00
  470 +24-26 24 - 26
  471 +24-48 24 - 48
  472 +24.00 24 . 00
  473 +24.12.1926 24 . 12 . 1926
  474 +24.II.1993 24 . II . 1993
  475 +24.VII.1997 24 . VII . 1997
  476 +240-249 240 - 249
  477 +242206083.2 242206083 . 2
  478 +25-30 25 - 30
  479 +25:19 25 : 19
  480 +25:9 25 : 9
  481 +25–27 25 – 27
  482 +25–30 25 – 30
  483 +26.06. 26 . 06 .
  484 +27,46 27 , 46
  485 +27.01.99 27 . 01 . 99
  486 +27.06. 27 . 06 .
  487 +27.06.1997 27 . 06 . 1997
  488 +270-300 270 - 300
  489 +286–298 286 – 298
  490 +28:13 28 : 13
  491 +29.06.1929 29 . 06 . 1929
  492 +2:1 2 : 1
  493 +2:13 2 : 13
  494 +2:2 2 : 2
  495 +2:26 2 : 26
  496 +2:27 2 : 27
  497 +2:3 2 : 3
  498 +2:35 2 : 35
  499 +2–3 2 – 3
  500 +2—3 2 — 3
  501 +2—4 2 — 4
  502 +3-10 3 - 10
  503 +3-16 3 - 16
  504 +3-2 3 - 2
  505 +3-7 3 - 7
  506 +3-8 3 - 8
  507 +3.00 3 . 00
  508 +3.15 3 . 15
  509 +3.29 , 46 3.29,46
  510 +3.30 , 47 3.30,47
  511 +3.32 , 91 3.32,91
  512 +3.47 3 . 47
  513 +30 - proc 30-proc
  514 +30-31 30 - 31
  515 +30-35 30 - 35
  516 +30.03.1993 30 . 03 . 1993
  517 +300-350 300 - 350
  518 +300-400 300 - 400
  519 +31.03.1146 31 . 03 . 1146
  520 +31.10.97 31 . 10 . 97
  521 +32-40 32 - 40
  522 +32-76 32 - 76
  523 +333-4 333 - 4
  524 +34-64 34 - 64
  525 +34:19 34 : 19
  526 +35-38 35 - 38
  527 +3:11 3 : 11
  528 +3:25 3 : 25
  529 +3:6 3 : 6
  530 +3–4 3 – 4
  531 +4-25 4 - 25
  532 +4-7 4 - 7
  533 +4.00 4 . 00
  534 +4.07.1610 4 . 07 . 1610
  535 +4.07.1890 4 . 07 . 1890
  536 +4.10 4 . 10
  537 +4.12 4 . 12
  538 +4.30 4 . 30
  539 +4.9 4 . 9
  540 +428-348 428 - 348
  541 +43-69 43 - 69
  542 +4:2 4 : 2
  543 +4:3 4 : 3
  544 +4–5 4 – 5
  545 +4–6 4 – 6
  546 +5 % 5%
  547 +5,16 5 , 16
  548 +5-2 5 - 2
  549 +5.13 5 . 13
  550 +5.16.26 5 . 16 . 26
  551 +5.21 5 . 21
  552 +5.23 5 . 23
  553 +5.30 5 . 30
  554 +5.40 5 . 40
  555 +5.55 5 . 55
  556 +50-100 50 - 100
  557 +50-56 50 - 56
  558 +50-69 50 - 69
  559 +53-58 53 - 58
  560 +536–552 536 – 552
  561 +54-56 54 - 56
  562 +5:0 5 : 0
  563 +5:2 5 : 2
  564 +5–10 5 – 10
  565 +5–8 5 – 8
  566 +5—15 5 — 15
  567 +570918348.10 570918348 . 10
  568 +57:56 57 : 56
  569 +6.02 6 . 02
  570 +6.15 6 . 15
  571 +6.38 6 . 38
  572 +60-70 60 - 70
  573 +63:63 63 : 63
  574 +68-75 68 - 75
  575 +6:0 6 : 0
  576 +6–8 6 – 8
  577 +7,15 7 , 15
  578 +7-2 7 - 2
  579 +7-3 7 - 3
  580 +7-5 7 - 5
  581 +7-7 7 - 7
  582 +7.07.2005 7 . 07 . 2005
  583 +7.20 7 . 20
  584 +7.56 7 . 56
  585 +7.8.9 7 . 8 . 9
  586 +74:71 74 : 71
  587 +75-139 75 - 139
  588 +7:0 7 : 0
  589 +8-12 8 - 12
  590 +8-19 8 - 19
  591 +8-4 8 - 4
  592 +8.02 8 . 02
  593 +8.28 8 . 28
  594 +8.45 8 . 45
  595 +80-100 80 - 100
  596 +80-90 80 - 90
  597 +800-1000 800 - 1000
  598 +80000-130000 80000 - 130000
  599 +85:0 85 : 0
  600 +87:62 87 : 62
  601 +89-109 89 - 109
  602 +8:25 8 : 25
  603 +9-11 9 - 11
  604 +9-12 9 - 12
  605 +9-14 9 - 14
  606 +9.15 9 . 15
  607 +9.40 9 . 40
  608 +9.45 9 . 45
  609 +900-1900 900 - 1900
  610 +93-97 93 - 97
  611 +95-98 95 - 98
  612 +960-1127 960 - 1127
  613 +99,5-99,7 99,5 - 99,7
  614 +9:4 9 : 4
  615 +9–15 9 – 15
  616 +0:0 0 : 0
  617 +0:1 0 : 1
  618 +0:3 0 : 3
  619 +1-10 1 - 10
  620 +1-2 1 - 2
  621 +1-5 1 - 5
  622 +10-12 10 - 12
  623 +10-17 10 - 17
  624 +11.30 11 . 30
  625 +12.30 12 . 30
  626 +14-16 14 - 16
  627 +14-17 14 - 17
  628 +14.20 14 . 20
  629 +15.00 15 . 00
  630 +15.30 15 . 30
  631 +16-17 16 - 17
  632 +16.20 16 . 20
  633 +16:21 16 : 21
  634 +17-20 17 - 20
  635 +17.30 17 . 30
  636 +17.50 17 . 50
  637 +18 . 18.
  638 +18-19 18 - 19
  639 +1849–1910 1849 – 1910
  640 +19.00 19 . 00
  641 +1939-1941 1939 - 1941
  642 +1971—1975 1971 — 1975
  643 +1989-1991 1989 - 1991
  644 +1992-96 1992 - 96
  645 +1997-1999 1997 - 1999
  646 +1:5 1 : 5
  647 +2,5-3 2,5 - 3
  648 +2007-2013 2007 - 2013
  649 +21.00 21 . 00
  650 +21.30 21 . 30
  651 +25:21 25 : 21
  652 +26-28 26 - 28
  653 +3.30 3 . 30
  654 +3:3 3 : 3
  655 +4.2.1 4 . 2 . 1
  656 +4:1 4 : 1
  657 +5-10 5 - 10
  658 +5-6 5 - 6
  659 +5-7 5 - 7
  660 +5-8 5 - 8
  661 +5.00 5 . 00
  662 +6-7 6 - 7
  663 +6-8 6 - 8
  664 +6.30 6 . 30
  665 +7 - proc 7-proc
  666 +7-13 7 - 13
  667 +7-14 7 - 14
  668 +7.00 7 . 00
  669 +8-10 8 - 10
  670 +8.15 8 . 15
  671 +8.30 8 . 30
  672 +8:2 8 : 2
  673 +9.30 9 . 30
  674 +0:2 0 : 2
  675 +10.00 10 . 00
  676 +2-5 2 - 5
  677 +3:1 3 : 1
  678 +40-50 40 - 50
  679 +4:0 4 : 0
  680 +6.00 6 . 00
  681 +7:6 7 : 6
  682 +8-9 8 - 9
  683 +8:15 8 : 15
  684 +9.00 9 . 00
  685 +?!!! ? ! ! !
  686 +?!... ? ! . . .
  687 +północno-wschodniej północno - wschodniej
  688 +warmińsko-mazurskie warmińsko - mazurskie
  689 +kujawsko-pomorskim kujawsko - pomorskim
  690 +północno-wschodnim północno - wschodnim
  691 +warmińsko-mazurskiego warmińsko - mazurskiego
  692 +warmińsko-mazurskim warmińsko - mazurskim
  693 +północno-wschodni północno - wschodni
  694 +północno-wschodniego północno - wschodniego
  695 +północno-wschodnią północno - wschodnią
  696 +kujawsko-pomorskie kujawsko - pomorskie
  697 +kujawsko-pomorskiego kujawsko - pomorskiego
  698 +kulturalno-oświatowe kulturalno - oświatowe
  699 +austro-węgierskiej austro - węgierskiej
  700 +austro-węgierską austro - węgierską
  701 +XVI-XVII XVI - XVII
  702 +XVII-XIX XVII - XIX
  703 +XV–XVI XV – XVI
  704 +XIII-XV XIII - XV
  705 +XIV-XVIII XIV - XVIII
  706 +XIX–XX XIX – XX
  707 +XI–XV XI – XV
  708 +XV-XVI XV - XVI
  709 +VI–VIII VI – VIII
  710 +SGGW-AR SGGW - AR
  711 +IV-V IV - V
  712 +III–V III – V
  713 +I-III I - III
  714 +I-IV I - IV
  715 +I-VIII I - VIII
  716 +(+) ( + )
  717 +40000-50000 40000 - 50000
  718 +45,72 45 , 72
  719 +45-46 45 - 46
  720 +49-70 49 - 70
  721 +70-75 70 - 75
  722 +71:61 71 : 61
  723 +9.00-16.00 9 . 00 - 16 . 00
  724 +Bielska-Białej Bielska - Białej
  725 +Bielsko-Biała Bielsko - Biała
  726 +Bim-bam Bim - bam
  727 +1.05. 1 . 05 .
  728 +1.05 1 . 05
  729 +10.10. 10 . 10 .
  730 +10.12. 10 . 12 .
  731 +11.12. 11 . 12 .
  732 +14.05. 14 . 05 .
  733 +21.06. 21 . 06 .
  734 +29.09. 29 . 09 .
  735 +9.02. 9 . 02 .
  736 +Tse-tungiem Tse - tungiem
  737 +kulturalno-oświatowym kulturalno - oświatowym
  738 +n - ru n-ru
  739 +pif-paf pif - paf
  740 +serbo - chorwackimi serbo-chorwackimi
  741 +gadu-gadu gadu - gadu
  742 +p . s p.s
  743 +w . c w.c
  744 +z - ca z-ca
  745 +e.cz e . cz
  746 +25-30 25 - 30
  747 +<< < <
  748 +?!? ? ! ?
  749 +?… ? …
  750 +MORTKOWICZ - OLCZAKOWA MORTKOWICZ-OLCZAKOWA
  751 +Międzychodzko - Sierakowskiego Międzychodzko-Sierakowskiego
  752 +Strona | Format Strona|Format
  753 +Wołk - Karczewska Wołk-Karczewska
  754 +** * *
  755 +0-0 0 - 0
  756 +1-0 1 - 0
  757 +1-1 1 - 1
  758 +165-168 165 - 168
  759 +1727-31 1727 - 31
  760 +17:00 17 : 00
  761 +18-1 18 - 1
  762 +2.14 2 . 14
  763 +20.15 20 . 15
  764 +22.12. - 20.01 . 22 . 12 . - 20 . 01 .
  765 +23-1 23 - 1
  766 +25-2 25 - 2
  767 +65-8 65 - 8
  768 +7.30 - 15.30 . 7 . 30 - 15 . 30 .
  769 +AWS - UW AWS-UW
  770 +Dz . U Dz.U
  771 +R . P R.P
  772 +S . A . S.A.
  773 +S . C S.C
  774 +S . T S.T
  775 +16.01. 16 . 01 .
  776 +niby - romantyczna niby-romantyczna
  777 +beta - laktamaz beta-laktamaz
  778 +zrobiłe ( a ) m zrobiłe(a)m
  779 +zawiodłe ( a ) m zawiodłe(a)m
  780 +z - dy z-dy
  781 +s . c s.c
  782 +c . k c.k
  783 +c . o c.o
  784 +k . w k.w
  785 +10-tys. 10-tys .
  786 +400-tys. 400-tys .
  787 +P W . PW .
  788 +jakże śmy jak że śmy
  789 +`99 ` 99
  790 +b . r b.r
  791 +talk-show talk - show
  792 +tingel-tanglu tingel - tanglu
  793 +środkowo - środkowo-
  794 +e . w e.w
  795 +e ( a ) e(a)
  796 +zobaczył ( am zobaczył(a m
  797 +P. P .
  798 +R . P . R.P.
  799 +Ś. Ś .
  800 +pomaga my pomaga my
  801 +SS ' manie SS'manie
  802 +P. O. N . R . P . [ IM | I M ] . MJR . H . D . H P.O.N.R.P.IM.MJR.H.D.H
  803 +1-13 1 - 13
  804 +1.300 1 . 300
  805 +11—19 11 — 19
  806 +16–18 16 – 18
  807 +17–21 17 – 21
  808 +18-35 18 - 35
  809 +1960-62 1960 - 62
  810 +1973-1975 1973 - 1975
  811 +2.3 2 . 3
  812 +2000–2006 2000 – 2006
  813 +2000–2007 2000 – 2007
  814 +2006-2007 2006 - 2007
  815 +2007-2010 2007 - 2010
  816 +20–23 20 – 23
  817 +226–236 226 – 236
  818 +270-300 mln 270 - 300 mln
  819 +30—45 30 — 45
  820 +4,5 4 , 5
  821 +406.100 406 . 100
  822 +5,5 5 , 5
  823 +56–59 56 – 59
  824 +61–63 61 – 63
  825 +6–11 6 – 11
  826 +one ż oneż
  827 +Bielska - Białej Bielska-Białej
  828 +B 12 B12
  829 +μ g μg
  830 +pięć - sześć pięć-sześć
  831 +krio - elektronową krio-elektronową
  832 +benzo [ a ] pirenu benzo[a]pirenu
  833 +Winfryd - Bonifacy Winfryd-Bonifacy
  834 +Staręga - Piasek Staręga-Piasek
  835 +N - telopeptyd N-telopeptyd
  836 +Mettler - Toledo Mettler-Toledo
  837 +McMillan - Scott McMillan-Scott
  838 +L - askorbinowego L-askorbinowego
  839 +Kędzierzynie - Koźlu Kędzierzynie-Koźlu
  840 +Jean - Yves Jean-Yves
  841 +Gołota - Lewis Gołota-Lewis
  842 +Bielsku - Białej Bielsku-Białej
  843 +8 - hydroksychinoliny 8-hydroksychinoliny
  844 +- y -y
  845 +' ' ''
  846 +-- - -
  847 +: ) :)
  848 +: ) ) ) ) ) ) ) ) ) :)))))))))
  849 +: - ) :-)
  850 +’ ’ ’’
  851 +' ' ''
  852 +2 - propanu 2-propanu
  853 +3 - merkaptopropanol -1,2 - diol 3-merkaptopropanol-1,2-diol
  854 +niby - pies niby-pies
  855 +15 - 15-
  856 +15-16-latków 15 - 16-latków
  857 +srakie - takie srakie-takie
... ...
corpora/data/brev.tab 0 → 100644
  1 +b r. br .
  2 +n p. np .
  3 +m. in. m.in .
  4 +t zw. tzw .
  5 +p n. pn .
  6 +d s. ds .
  7 +t zn. tzn .
  8 +i t p. itp .
  9 +i t d. itd .
  10 +N p. Np .
  11 +r. r .
  12 +ub. ub .
  13 +ub. r. ub.r .
  14 +o. o. o.o .
  15 +m. in. m.in.
  16 +p t. pt .
  17 +n. p. m. n.p.m .
  18 +m. m .
  19 +in. in .
  20 +p. n. e. p.n.e .
  21 +n t. nt .
  22 +b m. bm .
  23 +p w. pw .
  24 +ub. r. ub . r .
  25 +ub. r. ub.r.
  26 +m kw. mkw .
  27 +n. e. n . e .
  28 +n. n .
  29 +b p. bp .
  30 +ś p. śp .
  31 +T zw. Tzw .
  32 +m. st. m . st .
  33 +w w. ww .
  34 +w. w. w.w .
  35 +d/ s d/s
  36 +M. in. M.in .
  37 +p. p .
  38 +p. o. p.o .
  39 +p. t. p.t .
  40 +c d. cd .
  41 +ś. p. ś.p .
  42 +m. st. m.st .
  43 +m. in. m . in .
  44 +C D. CD .
  45 +T zn. Tzn .
  46 +t j. tj .
  47 +i t p itp
  48 +i t d itd
  49 +km ² km²
  50 +km 2 km2
  51 +m 3 m3
  52 +m 2 m2
  53 +μ m μm
  54 +p t pt
  55 +b m bm
  56 +m / s m/s
  57 +m ^2 m^2
  58 +n p np
  59 +n. e n.e
  60 +w/ m w/m
  61 +w/ w w/w
  62 +M / s M/s
  63 +N T G NTG
  64 +dm 3 dm3
  65 +m. in m.in
  66 +b r br
  67 +M. in. M.in.
  68 +C d. Cd .
  69 +P W PW
  70 +c cm ccm
  71 +c. d. n c.d.n
  72 +cm 3 . cm 3 .
  73 +i te de itede
  74 +km. 2 km.2
  75 +cm 2 cm2
  76 +j. m. j . m .
  77 +m.in m . in
  78 +m.in . m.in.
... ...
corpora/data/coercions.tab 0 → 100644
  1 +Time State Time,State,stan
  2 +Hour State Arg,Time,w Time,State,stan
  3 +Hour Time Arg,Time,w
  4 +HourNumber State Number,Hour,godzina Arg,Time,w Time,State,stan
  5 +HourNumber Time Number,Hour,godzina Arg,Time,w
  6 +HourNumber Hour Number,Hour,godzina
  7 +Day State Arg,Time,w Time,State,stan
  8 +Day Time Arg,Time,w
  9 +DayNumber State Number,Day,dzień Arg,Time,w Time,State,stan
  10 +DayNumber Time Number,Day,dzień Arg,Time,w
  11 +DayNumber Day Number,Day,dzień
  12 +Month State Arg,Time,w Time,State,stan
  13 +Month Time Arg,Time,w
  14 +Year State Arg,Time,w Time,State,stan
  15 +Year Time Arg,Time,w
  16 +YearNumber State Number,Year,rok Arg,Time,w Time,State,stan
  17 +YearNumber Time Number,Year,rok Arg,Time,w
  18 +YearNumber Year Number,Year,rok
  19 +WeekDay State Arg,Time,w Time,State,stan
  20 +WeekDay Time Arg,Time,w
  21 +Week State Arg,Time,w Time,State,stan
  22 +Week Time Arg,Time,w
  23 +TimeOfDay State Arg,Time,w Time,State,stan
  24 +TimeOfDay Time Arg,Time,w
  25 +TimeOrder State Arg,Time,w Time,State,stan
  26 +TimeOrder Time Arg,Time,w
  27 +TimeApr Time
  28 +TimePoint Time
  29 +Street Location
  30 +Town Location
  31 +miasto Town
  32 +miasto Location
  33 +Quarter Location
  34 +dzielnica Location
  35 +OrganizationType Division
  36 +OrganizationName Division
  37 +Profession Person
  38 +nazwisko Person
  39 +LastName Person
  40 +imię Person
  41 +FirstName Person
  42 +Service Instance
  43 +
... ...
corpora/data/colours.tab 0 → 100644
corpora/data/eniam-correct.tab 0 → 100644
  1 +:)) :) )
  2 +:))) :) ) )
  3 +:-)) :-) )
  4 +;)) ;) )
  5 +AWS - AWS-
  6 +czterdziesto - czterdziesto-
  7 +dwudziesto - dwudziesto-
  8 +długo - długo-
  9 +kilku - kilku-
  10 +osiemnasto - osiemnasto-
  11 +ośmio - ośmio-
  12 +pięcio - pięcio-
  13 +pięcio -- pięcio- -
  14 +piętnasto -- piętnasto- -
  15 +przed - przed-
  16 +pseudo - pseudo-
  17 +˝ 1 ˝1
  18 +˝ albo ˝ . ˝albo˝ .
  19 +˝ lub ˝ ˝lub˝
  20 +’70 ’ 70
  21 +dwu -- dwu- -
  22 +trzy -- trzy- -
  23 +jedno - jedno-
  24 +-89 - 89
  25 +dwu - dwu-
  26 +1-2 -3 1 - 2-3
  27 +-7,16 - 7 , 16
  28 +rowecki@wp.pl rowecki @ wp . pl
  29 +:-) : - )
  30 +- Ekon -Ekon
  31 +- Sadat -Sadat
  32 +- wsch -wsch
  33 +28-29 28 -29
  34 +Praca ˝ Praca˝
  35 +marzycielem - marzycielem-
  36 +:)))))) :) ) ) ) ) )
  37 +EKO-U EKO - U
  38 +używane . ˝ używane˝ .
  39 +kuba.rowecki@wp.pl kuba . rowecki @ wp . pl
  40 +-20 - 20
  41 +
... ...
corpora/data/excluded.tab 0 → 100644
  1 +dev-s82
  2 +
... ...
corpora/data/letni.tab 0 → 100644
  1 +1 - klasowa 1-klasowa
  2 +1 - letni 1-letni
  3 +1 - majowy 1-majowy
  4 +10 - dniowe 10-dniowe
  5 +10 - dniową 10-dniową
  6 +10 - hektarowe 10-hektarowe
  7 +10 - kilometrowa 10-kilometrowa
  8 +10 - krotnie 10-krotnie
  9 +10 - letni 10-letni
  10 +10 - letnich 10-letnich
  11 +10 - letniego 10-letniego
  12 +10 - osobowy 10-osobowy
  13 +10 - procentowy 10-procentowy
  14 +10 - procentowych 10-procentowych
  15 +10 - punktowy 10-punktowy
  16 +100 - krotnie 100-krotnie
  17 +100 - metrowe 100-metrowe
  18 +100 - milionową 100-milionową
  19 +100 - tysięczny 100-tysięczny
  20 +11 - dniowej 11-dniowej
  21 +11 - krotnie 11-krotnie
  22 +11 - letni 11-letni
  23 +11 - letnia 11-letnia
  24 +11 - letniego 11-letniego
  25 +11 - metrową 11-metrową
  26 +11 - minutowy 11-minutowy
  27 +11 - procentowe 11-procentowe
  28 +12 - dniową 12-dniową
  29 +12 - godzinnej 12-godzinnej
  30 +12 - letnia 12-letnia
  31 +12 - metrowej 12-metrowej
  32 +120 - metrowej 120-metrowej
  33 +13 - letni 13-letni
  34 +13 - letnia 13-letnia
  35 +13 - miesięcznego 13-miesięcznego
  36 +13 - miesięczną 13-miesięczną
  37 +14 - letnia 14-letnia
  38 +14 - letnich 14-letnich
  39 +14 - letnią 14-letnią
  40 +15 - dniowy 15-dniowy
  41 +15 - letnią 15-letnią
  42 +15,5 - tysięczna 15,5-tysięczna
  43 +16 - bitowe 16-bitowe
  44 +16 - dniowej 16-dniowej
  45 +16 - letni 16-letni
  46 +16 - procentowe 16-procentowe
  47 +16 - stopniowej 16-stopniowej
  48 +17 - calowe 17-calowe
  49 +17 - letnie 17-letnie
  50 +17 - letnią 17-letnią
  51 +17 - osobową 17-osobową
  52 +18 - drużynowa 18-drużynowa
  53 +18 - godzinne 18-godzinne
  54 +18 - letnia 18-letnia
  55 +18 - letnie 18-letnie
  56 +18 - letniego 18-letniego
  57 +180 - tysięcznego 180-tysięcznego
  58 +19 - calowy 19-calowy
  59 +19 - kilometrowej 19-kilometrowej
  60 +19 - letnich 19-letnich
  61 +19 - letnie 19-letnie
  62 +19 - letniemu 19-letniemu
  63 +190 - letnią 190-letnią
  64 +2 - godzinny 2-godzinny
  65 +2 - letnia 2-letnia
  66 +2 - letnie 2-letnie
  67 +2 - metrowy 2-metrowy
  68 +2 - odcinkowy 2-odcinkowy
  69 +2 - oddziałowa 2-oddziałowa
  70 +2 - osobowa 2-osobowa
  71 +2,5 - kilogramowy 2,5-kilogramowy
  72 +2,5 - letnią 2,5-letnią
  73 +2-3 - stopniowe 2 - 3-stopniowe
  74 +20 - letniej 20-letniej
  75 +20 - letnią 20-letnią
  76 +20 - metrowa 20-metrowa
  77 +20 - osobowa 20-osobowa
  78 +20 - osobowy 20-osobowy
  79 +20 - procentowej 20-procentowej
  80 +20 - stopniowy 20-stopniowy
  81 +21 - letnia 21-letnia
  82 +21 - letniej 21-letniej
  83 +22 - osobowa 22-osobowa
  84 +23 - calowy 23-calowy
  85 +23 - letniego 23-letniego
  86 +24 - bitowy 24-bitowy
  87 +24 - godzinnego 24-godzinnego
  88 +24 - godzinnych 24-godzinnych
  89 +24 - kondygnacjowej 24-kondygnacjowej
  90 +25 - letnia 25-letnia
  91 +25 - letnią 25-letnią
  92 +25 - metrowej 25-metrowej
  93 +25 - procentowy 25-procentowy
  94 +26 - letniemu 26-letniemu
  95 +28 - letniego 28-letniego
  96 +29 - letni 29-letni
  97 +29 - letniego 29-letniego
  98 +3 - dniowe 3-dniowe
  99 +3 - letni 3-letni
  100 +3 - letnia 3-letnia
  101 +3 - letnie 3-letnie
  102 +3 - letnią 3-letnią
  103 +3 - miesięczne 3-miesięczne
  104 +3 - miesięczny 3-miesięczny
  105 +3 - nawowy 3-nawowy
  106 +3 - osobowych 3-osobowych
  107 +3 - procentowe 3-procentowe
  108 +3,5 - kilogramowe 3,5-kilogramowe
  109 +30 - krotnego 30-krotnego
  110 +30 - letnią 30-letnią
  111 +30 - procentowych 30-procentowych
  112 +300 - kilogramowa 300-kilogramowa
  113 +300 - kilogramowego 300-kilogramowego
  114 +300 - tysięcznej 300-tysięcznej
  115 +31 - letni 31-letni
  116 +31 - letnia 31-letnia
  117 +31 - letniego 31-letniego
  118 +32 - letnia 32-letnia
  119 +32 - letnią 32-letnią
  120 +33 - letniemu 33-letniemu
  121 +34 - letnia 34-letnia
  122 +34 - letniego 34-letniego
  123 +35 - letnia 35-letnia
  124 +35 - letnią 35-letnią
  125 +36 - centymetrowa 36-centymetrowa
  126 +36 - letni 36-letni
  127 +36 - letnia 36-letnia
  128 +38 - letniego 38-letniego
  129 +39 - letni 39-letni
  130 +4 - drzwiowe 4-drzwiowe
  131 +4 - kołowy 4-kołowy
  132 +4 - letni 4-letni
  133 +4 - letnie 4-letnie
  134 +4 - letniego 4-letniego
  135 +4 - letniej 4-letniej
  136 +4 - tysięcznego 4-tysięcznego
  137 +4,5 - metrowej 4,5-metrowej
  138 +40 - letnia 40-letnia
  139 +40 - metrowy 40-metrowy
  140 +40 - osobowa 40-osobowa
  141 +42 - letnia 42-letnia
  142 +42 - letnią 42-letnią
  143 +43 - letniego 43-letniego
  144 +44 - letni 44-letni
  145 +44 - letniemu 44-letniemu
  146 +44 - letnią 44-letnią
  147 +45 - procentowego 45-procentowego
  148 +46 - letni 46-letni
  149 +46 - letniego 46-letniego
  150 +46 - osobowa 46-osobowa
  151 +47 - letni 47-letni
  152 +5 - dniowe 5-dniowe
  153 +5 - krotnie 5-krotnie
  154 +5 - letnich 5-letnich
  155 +5 - letniego 5-letniego
  156 +5 - minutowy 5-minutowy
  157 +5 - osobową 5-osobową
  158 +5 - skrzydłowy 5-skrzydłowy
  159 +50 - metrowej 50-metrowej
  160 +50 - metrowy 50-metrowy
  161 +50 - osobowe 50-osobowe
  162 +50 - procentowego 50-procentowego
  163 +50 - tysięczna 50-tysięczna
  164 +500 - kilometrową 500-kilometrową
  165 +500 - złotowy 500-złotowy
  166 +52 - letniego 52-letniego
  167 +53 - letni 53-letni
  168 +53 - letnia 53-letnia
  169 +53 - letniego 53-letniego
  170 +54 - letnia 54-letnia
  171 +55 - letni 55-letni
  172 +56 - letni 56-letni
  173 +56 - letniego 56-letniego
  174 +59 - letni 59-letni
  175 +59 - letnią 59-letnią
  176 +6 - cylindrowy 6-cylindrowy
  177 +6 - godzinne 6-godzinne
  178 +6 - letni 6-letni
  179 +6 - letnią 6-letnią
  180 +6 - osobowej 6-osobowej
  181 +6,5 - godzinnej 6,5-godzinnej
  182 +6,5 - metrowa 6,5-metrowa
  183 +6-9 - letni 6-9-letni
  184 +60 - letnie 60-letnie
  185 +60 - letniego 60-letniego
  186 +60 - minutowej 60-minutowej
  187 +600 - litrowe 600-litrowe
  188 +61 - letnia 61-letnia
  189 +62 - letni 62-letni
  190 +62 - letniego 62-letniego
  191 +63 - letni 63-letni
  192 +63 - letnia 63-letnia
  193 +64 - letni 64-letni
  194 +65 - tysięcznej 65-tysięcznej
  195 +67 - letni 67-letni
  196 +68 - letni 68-letni
  197 +7 - kondygnacyjnego 7-kondygnacyjnego
  198 +7 - krotnie 7-krotnie
  199 +7 - letniego 7-letniego
  200 +7 - letniej 7-letniej
  201 +7 - procentowy 7-procentowy
  202 +70 - letnia 70-letnia
  203 +70 - letniej 70-letniej
  204 +74 - letni 74-letni
  205 +77 - letni 77-letni
  206 +8 - godzinnych 8-godzinnych
  207 +8 - kilometrowego 8-kilometrowego
  208 +8,5 - procentową 8,5-procentową
  209 +80 - kilogramowego 80-kilogramowego
  210 +80 - letnia 80-letnia
  211 +80 - tonowe 80-tonowe
  212 +81 - letnia 81-letnia
  213 +82 - letni 82-letni
  214 +83 - letnia 83-letnia
  215 +84 - letniej 84-letniej
  216 +9 - hektarowy 9-hektarowy
  217 +9 - letnia 9-letnia
  218 +9 - letniego 9-letniego
  219 +9 - letniej 9-letniej
  220 +9 - letniemu 9-letniemu
  221 +9 - miesięczna 9-miesięczna
  222 +9 - miesięczny 9-miesięczny
  223 +90 - letniej 90-letniej
  224 +900 - osobowej 900-osobowej
  225 +12 - letni 12-letni
  226 +14 - letni 14-letni
  227 +14 - letniego 14-letniego
  228 +15 - letniego 15-letniego
  229 +16 - letnia 16-letnia
  230 +16 - letniej 16-letniej
  231 +17 - letnia 17-letnia
  232 +18 - letni 18-letni
  233 +19 - letni 19-letni
  234 +20 - letni 20-letni
  235 +20 - letnia 20-letnia
  236 +22 - letniego 22-letniego
  237 +22 - letniej 22-letniej
  238 +24 - letni 24-letni
  239 +26 - letniego 26-letniego
  240 +28 - letni 28-letni
  241 +28 - letnią 28-letnią
  242 +3 - letniego 3-letniego
  243 +3 - letniej 3-letniej
  244 +30 - letnia 30-letnia
  245 +35 - letni 35-letni
  246 +36 - letniej 36-letniej
  247 +37 - letni 37-letni
  248 +37 - letnia 37-letnia
  249 +4 - krotnie 4-krotnie
  250 +40 - letni 40-letni
  251 +40 - letniej 40-letniej
  252 +43 - letni 43-letni
  253 +43 - letnia 43-letnia
  254 +45 - osobowa 45-osobowa
  255 +49 - letniego 49-letniego
  256 +5 - letniej 5-letniej
  257 +50 - letniego 50-letniego
  258 +52 - letni 52-letni
  259 +60 - letni 60-letni
  260 +65 - letniego 65-letniego
  261 +80 - letni 80-letni
  262 +87 - letni 87-letni
  263 +15 - letnia 15-letnia
  264 +17 - letniego 17-letniego
  265 +19 - letniego 19-letniego
  266 +25 - letniego 25-letniego
  267 +30 - letni 30-letni
  268 +34 - letni 34-letni
  269 +36 - letniego 36-letniego
  270 +38 - letni 38-letni
  271 +5 - letnia 5-letnia
  272 +54 - letni 54-letni
  273 +76 - letni 76-letni
  274 +15 - letni 15-letni
  275 +17 - letni 17-letni
  276 +19 - letnia 19-letnia
  277 +22 - letnia 22-letnia
  278 +26 - letni 26-letni
  279 +30 - letniego 30-letniego
  280 +48 - letni 48-letni
  281 +10 - minutowych 10-minutowych
  282 +21 - letni 21-letni
  283 +22 - letni 22-letni
  284 +23 - letni 23-letni
  285 +32 - letni 32-letni
  286 +25 - letni 25-letni
  287 +27 - letni 27-letni
  288 +33 - letni 33-letni
  289 +5 - minutowych 5-minutowych
  290 +XIX - wiecznego XIX-wiecznego
  291 +XIX - wieczny XIX-wieczny
  292 +XVII - wiecznych XVII-wiecznych
  293 +7 - procentowym 7-procentowym
  294 +XVI - wieczne XVI-wieczne
  295 +XVI - wieczny XVI-wieczny
  296 +XVII - wieczna XVII-wieczna
  297 +XVII - wieczny XVII-wieczny
  298 +XVIII - wieczna XVIII-wieczna
  299 +XVIII - wieczny XVIII-wieczny
  300 +XIII - wiecznym XIII-wiecznym
  301 +XIII - wiecznych XIII-wiecznych
  302 +XIV - wiecznego XIV-wiecznego
  303 +XIX - wiecznych XIX-wiecznych
  304 +XV - wieczny XV-wieczny
  305 +IV - ligowej IV-ligowej
  306 +III - ligowym III-ligowym
  307 +III - ligową III-ligową
  308 +II - ligowego II-ligowego
  309 +II - ligowemu II-ligowemu
  310 +I - ligowej I-ligowej
  311 +60 - stopniowym 60-stopniowym
  312 +435 - litrowym 435-litrowym
  313 +41 - letni 41-letni
  314 +40 - godzinnym 40-godzinnym
  315 +40 - tysięcznym 40-tysięcznym
  316 +3,5 - tonowym 3,5-tonowym
  317 +24 - karatowym 24-karatowym
  318 +24 - osobowym 24-osobowym
  319 +22 - procentowym 22-procentowym
  320 +20 - tomowym 20-tomowym
  321 +20 - złotowym 20-złotowym
  322 +2 - godzinnym 2-godzinnym
  323 +17 - letnim 17-letnim
  324 +16 - letnim 16-letnim
  325 +15 - letnim 15-letnim
  326 +150 - tysięcznym 150-tysięcznym
  327 +150 - procentowego 150-procentowego
  328 +100 - tysięcznym 100-tysięcznym
  329 +10 - letnim 10-letnim
  330 +72 - letniego 72-letniego
  331 +11 - letnim 11-letnim
  332 +12 - letnim 12-letnim
  333 +8 - osobowych 8-osobowych
  334 +800 - gramowy 800-gramowy
  335 +100 - ml 100-ml
  336 +12 - cyfrowy 12-cyfrowy
  337 +185 - osobową 185-osobową
  338 +27 - Ietni 27-Ietni
  339 +4.4 - litrowy 4.4-litrowy
  340 +44 - krotne 44-krotne
  341 +6 - minutowego 6-minutowego
... ...
corpora/data/lexicon.dic 0 → 100644
  1 +@PHRASE_NAMES
  2 + infp np prepnp adjp ip cp ncp prepncp advp padvp colonp mp intp conj-np
  3 + adja prepadjp compar measure num aglt aux-fut
  4 + aux-past aux-imp qub interj sinterj hyphen
  5 + rparen rparen2 rquot rquot2 rquot3 inclusion
  6 + day-interval day-lex day-month-interval date-interval
  7 + month-lex month-interval year-interval roman roman-interval
  8 + hour-minute-interval hour-interval obj-id match-result
  9 + url email day-month day year date hour hour-minute lex
  10 + się nie roku to by s <root> <conll_root> or or2 <colon> <speaker> <speaker-end> <squery> <sentence> <paragraph>
  11 + <subst> <depr> <ppron12> <ppron3> <siebie> <prep> <num> <numcomp> <intnum>
  12 + <realnum> <intnum-interval> <realnum-interval> <symbol> <ordnum>
  13 + <date> <date-interval> <hour-minute> <hour> <hour-minute-interval>
  14 + <hour-interval> <year> <year-interval> <day> <day-interval> <day-month>
  15 + <day-month-interval> <month-interval> <roman> <roman-interval> <roman-ordnum>
  16 + <match-result> <url> <email> <phone-number> <postal-code> <obj-id> <list-item> <fixed> <adj> <apron> <adjc> <adjp> <adja>
  17 + <adv> <ger> <pact> <ppas> <fin> <bedzie> <praet> <winien> <impt>
  18 + <imps> <pred> <aglt> <inf> <pcon> <pant> <qub> <comp> <compar> <conj> <interj>
  19 + <sinterj> <burk> <interp> <part> <unk> <building-number> jak czy za do od o w na z u dla przeciwko location time link miesiąc pod niż w_celu
  20 + title title-end token inclusion inclusion-end comparp jako quot-end
  21 + Time GenericDescription
  22 + Location Street StreetName Town TownName
  23 + Payment Person Profession ProfessionParam
  24 + Division OrganizationName OrganizationType OrganizationTypeParam
  25 + Service ServiceParam SericeEffect
  26 + Instance Issue Quarter Price Name Confirmation Email Telephone PostalCode
  27 + HouseNumber Geolocus Measure Rating OpAdNum Y Animal State Interrogative
  28 + Action Attitude PriceDescription RateDescription ServiceParamDescription
  29 + null Apoz PHas CORE Has Attr Compar PApoz Merge Count Thme Manr Lemma Arg Time
  30 + sem nosem
  31 +
  32 +@WEIGHTS
  33 +symbol_weight=1
  34 +measure_weight=1
  35 +
  36 +@LEXICON
  37 +
  38 +pos=adj,cat=HourNumber:
  39 + QUANT[person=ter]
  40 + np*number*case*gender*person*coerced*role*node{schema}{local-schema};
  41 +
  42 +pos=adj,cat=DayNumber:
  43 + QUANT[person=ter]
  44 + np*number*case*gender*person*coerced*role*node{schema}{local-schema};
  45 +
  46 +
  47 +lemma=</sentence>,pos=interp,node=relations,phrase=s: BRACKET
  48 + QUANT[role=0]
  49 + s*role*node
  50 + \?(ip*T*T*T*T*null*sit+cp*int*T*T*null*sit+cp*sub*T*T*null*sit+mp*T*null*sit+intp*T*null*sit+interj*T*null*sit+sinterj*T*null*sit+np*T*nom*T*ter*Location*null*sit);
  51 +lemma=</sentence>,pos=interp,node=relations,phrase=s: BRACKET
  52 + QUANT[role=0]
  53 + s*role*node
  54 + \?(mp*State*null*sit+intp*State*null*sit+np*T*T*T*ter*State*null*sit+xp*State*null*sit+interj*T*null*sit+sinterj*T*null*sit);
  55 +
  56 +
... ...
corpora/data/mwe.tab 0 → 100644
  1 +Y Y subst:sg:_:_
  2 +nad ranem nad ranem adv
  3 +jak daleko jak daleko adv
  4 +o ile o_ile comp
  5 +w celu w celu prep:gen
  6 +wraz z wraz z prep:inst
  7 +w górę w_górę qub
  8 +dzień dobry dzień dobry interj
  9 +Dzień Dobry dzień dobry interj
  10 +do widzenia do widzenia interj
  11 +do zobaczenia do zobaczenia interj
  12 +do następnego razu do następnego razu interj
  13 +luz luz interj
  14 +yes yes interj
  15 +yhym yhym interj
  16 +yeap yeap interj
  17 +aj aj interj
  18 +niech będzie niech będzie interj
  19 +wszystko jasne wszystko jasne interj
  20 +to ważne to ważne interj
  21 +trochę słabo trochę słabo interj
  22 +to znaczy to znaczy interj
  23 +w porządku w porządku interj
  24 +na przykład na przykład interj
  25 +
... ...
corpora/data/mwe2.tab 0 → 100644
corpora/data/ne.tab 0 → 100644
corpora/data/nkjp-correct.tab 0 → 100644
  1 +( C ) Maćka (C)Maćka
  2 +-21 - latków - 21-latków
  3 +. pl .pl
  4 +... ewski ...ewski
  5 +0-20 -21 -22 0-20-21-22
  6 +0-46 855 -45 -26 ) . 0-46 855-45-26 ) .
  7 +0-800 -20 -150 0-800-20-150
  8 +0603 / 166-367 0603/166-367
  9 +1 ) 1)
  10 +1 , 115.000 1,115.000
  11 +1 a . 1a.
  12 +1 d 1d
  13 +1 f 1f
  14 +1004 - A 1004-A
  15 +109 P 4 109P4
  16 +11 b 11b
  17 +11 c 11c
  18 +12 - b 12-b
  19 +12 E 12E
  20 +12 a 12a
  21 +12 b 12b
  22 +13 . 13.
  23 +13 b 13b
  24 +14 a 14a
  25 +146 A 146A
  26 +146 B 146B
  27 +147 h 147h
  28 +147 j 147j
  29 +158 - Pound 158-Pound
  30 +16 b 16b
  31 +18 a 18a
  32 +18 b 18b
  33 +18 c 18c
  34 +1912-199 ? 1912 - 199?
  35 +1:100 000 . 1 : 100 000 .
  36 +2 + 2+
  37 +2 - 2-
  38 +2 - 000 - 001 2-000-001
  39 +2 A 4 2A4
  40 +2 b 2b
  41 +2 c 2c
  42 +2 d 2d
  43 +2 e 2e
  44 +2 p 2p
  45 +24 A 24A
  46 +24 d 24d
  47 +248 Z 248Z
  48 +25 a 25a
  49 +25 h 25h
  50 +251 a 251a
  51 +3 . c 3.c
  52 +3 b 3b
  53 +3 d 3d
  54 +3 mamy 3mamy
  55 +35 ´ 35´
  56 +4 . a . 4.a.
  57 +4 . b 4.b
  58 +4 . c . 4.c.
  59 +4 media 4media
  60 +40 - 045 40-045
  61 +40 - krotność 40-krotność
  62 +40 b 40b
  63 +41 ai 41a i
  64 +418-26 -88 418-26-88
  65 +44 m -7 44m-7
  66 +455 - A 455-A
  67 +467-89 -45 467-89-45
  68 +492 A 492A
  69 +492 B 492B
  70 +5 a 5a
  71 +5 d 5d
  72 +5 x 5x
  73 +50 - krotność 50-krotność
  74 +50 - metre 50-metre
  75 +50 b 50b
  76 +510 256 732 . 510 256 732 .
  77 +515 - A 515-A
  78 +52 a 52a
  79 +520-59 -50 520-59-50
  80 +56 e 56e
  81 +56 zf 56zf
  82 +58 / fin / 50 58/fin/50
  83 +6 . c 6.c
  84 +6 . d . 6.d.
  85 +6 ABX 2 6ABX2
  86 +6 LX 2 6LX2
  87 +60.22.11 - 00.00 60.22.11-00.00
  88 +635-25 - 00 635-25-00
  89 +642-65 -85 642-65-85
  90 +650-91 -58 650-91-58
  91 +654-66 -91 654-66-91
  92 +6667 / 88 / IV 6667/88/IV
  93 +7 . a . 7.a.
  94 +7 . b 7.b
  95 +7 . d 7.d
  96 +7 . f 7.f
  97 +70 . 70.
  98 +71 a 71a
  99 +71 c 71c
  100 +73-65 -32 73-65-32
  101 +735 ie 735ie
  102 +8 ) ) ) ) 8))))
  103 +8 ) ) ) ) ) ) ) ) ) 8)))))))))
  104 +8 - ) 8-)
  105 +8 X 8X
  106 +8 a 8a
  107 +8.1 . 8.1.
  108 +80 ' 80'
  109 +81 A 81A
  110 +825-44 - 02 825-44-02
  111 +825-53 -94 825-53-94
  112 +9 . b 9.b
  113 +9 b 9b
  114 +9 c 9c
  115 +9 g 9g
  116 +9 x 9x
  117 +90 a 90a
  118 +96 ' 96'
  119 +: - D :-D
  120 +: D :D
  121 +: O :O
  122 +: O ( :O(
  123 +: P ) ) ) ) ) ) ) ) :P))))))))
  124 +:P ) ) ) ) ) ) ) ) :P))))))))
  125 +:P lany : Plany
  126 +: o ) ) :o))
  127 +: o ) ) ) ) :o))))
  128 +:))) ) ) ) ) :)))))))
  129 +:))) ) ) ) ) ) ) :)))))))))
  130 +:))) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) :)))))))))))))))))))
  131 +; P ) ) ) ;P)))
  132 +;P ) ) ) ;P)))
  133 +; d ;d
  134 +; ddd ;ddd
  135 +; p ;p
  136 +A -1 A-1
  137 +A . A.
  138 +A . C A.C
  139 +A . D A.D
  140 +A . D . D . A . M A.D.D.A.M
  141 +A . K . M A.K.M
  142 +A 3 A3
  143 +A 320 A320
  144 +AC + 79 AC+79
  145 +ADP - RYBOLIZACJA ADP-RYBOLIZACJA
  146 +AG -23 AG-23
  147 +AGENTA 17 AGENTA17
  148 +AN -124 AN-124
  149 +AWS -2002 AWS-2002
  150 +Abu - Tor Abu-Tor
  151 +Al - Mansur Al-Mansur
  152 +Al - Mashru Al-Mashru
  153 +Al - Qaidy Al-Qaidy
  154 +Ali - ili Ali-ili
  155 +Aujourd ' hui Aujourd'hui
  156 +Aulnay - Bois Aulnay-Bois
  157 +Avallac ' h Avallac'h
  158 +B ) B)
  159 +B + R B+R
  160 +B - 06250 B-06250
  161 +B -1 B B-1B
  162 +B -14 B-14
  163 +B -17 B-17
  164 +B -2 B-2
  165 +B -24 B-24
  166 +B -29 B-29
  167 +B -52 B-52
  168 +B . B.
  169 +B 10 M B10M
  170 +B 3 B3
  171 +Bailly - Salins Bailly-Salins
  172 +Barr ` es Barr`es
  173 +Bianco - neri Bianco-neri
  174 +Brockley - Lewisham Brockley-Lewisham
  175 +C ' est C'est
  176 +C - c C-c
  177 +C . C.
  178 +C 18 H 28 O 2 C18H28O2
  179 +C 18 H 30 O 2 C18H30O2
  180 +C 18 H 34 O 2 C18H34O2
  181 +C 2 H 5 OH C2H5OH
  182 +C 70 C70
  183 +CO 2 CO2
  184 +Canal + Canal+
  185 +Canale + Canale+
  186 +Cassino - Orbis Cassino-Orbis
  187 +Clarion - Clipperton Clarion-Clipperton
  188 +Coca - Cola Coca-Cola
  189 +Coca - Coli Coca-Coli
  190 +Cosmo - Worlds Cosmo-Worlds
  191 +Czechowic - Dziedzic Czechowic-Dziedzic
  192 +Czou - kou - tien Czou-kou-tien
  193 +D ' Arc D'Arc
  194 +D ' Viosion D'Viosion
  195 +D - dur D-dur
  196 +D . D.
  197 +D : D:
  198 +DDR 400 DDR400
  199 +DVD - XV 10 DVD-XV10
  200 +DW 592 DW592
  201 +DW 594 DW594
  202 +Daszewo -1 Daszewo-1
  203 +Do - centa Do-centa
  204 +DŁUGOPOLE - ZDRÓJ DŁUGOPOLE-ZDRÓJ
  205 +E 100 E100
  206 +E 18 E18
  207 +E 2 E2
  208 +E 261 E261
  209 +E 3 E3
  210 +E : E:
  211 +EP -3 EP-3
  212 +Eishockey - Liga Eishockey-Liga
  213 +En - laia En-laia
  214 +Era - Art Era-Art
  215 +Express - AM 22 Express-AM22
  216 +F - dur F-dur
  217 +F - ka F-ka
  218 +F -1 F-1
  219 +F -18 F-18
  220 +F -8 F-8
  221 +F 3 F3
  222 +F 5 F5
  223 +F 7 F7
  224 +Ferte - sous - Jouarre Ferte-sous-Jouarre
  225 +Fool - X Fool-X
  226 +Frieda - K Frieda-K
  227 +Fu - Czu Fu-Czu
  228 +G -14 G-14
  229 +G -7 G-7
  230 +G -8 G-8
  231 +G 11 G11
  232 +G 12 G12
  233 +Gekij _ o Gekij_o
  234 +Gheorghiu - Dej Gheorghiu-Dej
  235 +Győr - Moson - Sopron Győr-Moson-Sopron
  236 +H - K H-K
  237 +H 5 N 1 H5N1
  238 +HIV - dodatnich HIV-dodatnich
  239 +HSW - Trading HSW-Trading
  240 +Ha - Nocri Ha-Nocri
  241 +Halla - li Halla-li
  242 +Har - Homa Har-Homa
  243 +Head - Up Head-Up
  244 +Hewlett - Packard Hewlett-Packard
  245 +Home - Fleet Home-Fleet
  246 +Humięcino - Klary Humięcino-Klary
  247 +I ' ll I'll
  248 +I ' ve I've
  249 +I - szy I-szy
  250 +I -22 I-22
  251 +I . I.
  252 +II . 16 II.16
  253 +ITU - T ITU-T
  254 +IV . 5 IV.5
  255 +Idol 92 Idol92
  256 +Invest - Consult Invest-Consult
  257 +Invest - Euro Invest-Euro
  258 +Iran - Sarbas Iran-Sarbas
  259 +Iwano - Frankowsku Iwano-Frankowsku
  260 +J @ zz J@zz
  261 +JAS - MOS JAS-MOS
  262 +Ja ' raia Ja'raia
  263 +Ja 96 Ja96
  264 +Jak - mu - tam Jak-mu-tam
  265 +Jangi - Julu Jangi-Julu
  266 +Jar -32 Jar-32
  267 +Jean - Romainem Jean-Romainem
  268 +Jeana - Luca Jeana-Luca
  269 +Jeana - Paula Jeana-Paula
  270 +Jie - jie Jie-jie
  271 +K - International K-International
  272 +K -2 K-2
  273 +K 30 K30
  274 +K 6 K6
  275 +K 8 K8
  276 +KC -135 KC-135
  277 +KJ 2 KJ2
  278 +KPN - OP KPN-OP
  279 +KRL - D KRL-D
  280 +KT 400 KT400
  281 +Karl - Heinz Karl-Heinz
  282 +Kogel - mogel Kogel-mogel
  283 +Koni - Art Koni-Art
  284 +Konstancina - Jeziorny Konstancina-Jeziorny
  285 +Konstancinie - Jeziornie Konstancinie-Jeziornie
  286 +Kołomyi - Czerniowcach Kołomyi-Czerniowcach
  287 +Ku - ku - ku - ku Ku-ku-ku-ku
  288 +Kórniku - Bninie Kórniku-Bninie
  289 +L ' Authentique L'Authentique
  290 +L ' Oreal L'Oreal
  291 +L ' odeur L'odeur
  292 +L -4 L-4
  293 +L 4 L4
  294 +L 7 L7
  295 +LI _ I _ I LI_I_I
  296 +M - A M-A
  297 +M -2 M-2
  298 +M -28 M-28
  299 +M -80 M-80
  300 +M . A . S . H M.A.S.H
  301 +M . P M.P
  302 +M 12 M12
  303 +M 14 M14
  304 +M 28 M28
  305 +M 5 M5
  306 +M 6 M6
  307 +M 7 M7
  308 +MATIKI ' S MATIKI'S
  309 +MD -80 MD-80
  310 +MI -17 MI-17
  311 +MI -2 MI-2
  312 +MI 5 MI5
  313 +MNP -20 M MNP-20M
  314 +MP 5 KA 4 MP5KA4
  315 +MPEG -1 MPEG-1
  316 +MPEG -4 MPEG-4
  317 +Magdasf 1 Magdasf1
  318 +Mawsu ' at Mawsu'at
  319 +Mc - danie Mc-danie
  320 +Medix - Force Medix-Force
  321 +Mi -8 Mi-8
  322 +Montesquiou - Fezensac Montesquiou-Fezensac
  323 +Mołczat ' Mołczat'
  324 +N . T N.T
  325 +N 2 N2
  326 +N 33 N33
  327 +NSZ - NOW NSZ-NOW
  328 +NSZ - ZJ NSZ-ZJ
  329 +Na + Na+
  330 +Nag – czu Nag–czu
  331 +Niwka - Modrzejów Niwka-Modrzejów
  332 +Non - Stop Non-Stop
  333 +O - Polskich O-Polskich
  334 +O _ o O_o
  335 +OPZZ / p / 630 / 2000 OPZZ/p/630/2000
  336 +Olszewo - Reszki Olszewo-Reszki
  337 +On - the - Loose On-the-Loose
  338 +Orzeł - Łysiak Orzeł-Łysiak
  339 +Ostrowiec - Osada Ostrowiec-Osada
  340 +Ouest - France Ouest-France
  341 +P ! nk P!nk
  342 +P - ski P-ski
  343 +P -64 P-64
  344 +P . E . N . P.E.N.
  345 +P. O. N . R . P . IM . MJR . H . D . H P.O.N.R.P.IM.MJR.H.D.H
  346 +P 22 P22
  347 +PIT -37 PIT-37
  348 +PN -55 PN-55
  349 +PN -92 / T -20091 PN-92/T-20091
  350 +PR 3 PR3
  351 +PRZERZECZYN - ZDRÓJ PRZERZECZYN-ZDRÓJ
  352 +PZL -104 PZL-104
  353 +Pen - Press Pen-Press
  354 +PiS - uarami PiS-uarami
  355 +Pick - up Pick-up
  356 +Pif-paf Pif - paf
  357 +Pol - Mot Pol-Mot
  358 +Pol - Orsa Pol-Orsa
  359 +Polanicy - Zdroju Polanicy-Zdroju
  360 +Poor - Know Poor-Know
  361 +Port - Bau Port-Bau
  362 +Prefabet - Lisów Prefabet-Lisów
  363 +Przycisk 2 _ 1 Przycisk2_1
  364 +Prêt - à - Porter Prêt-à-Porter
  365 +Q 1 Q1
  366 +R - XXI R-XXI
  367 +R . E . M . R.E.M.
  368 +R 1 R1
  369 +R 2 CH 2 R2CH2
  370 +R 2 D 2 R2D2
  371 +R 3 CH R3CH
  372 +R ’ N ’ B R’N’B
  373 +RCH 3 RCH3
  374 +RS 2 RS2
  375 +RS 4 RS4
  376 +RTL 7 RTL7
  377 +Raciborzu - Markowicach Raciborzu-Markowicach
  378 +Randez - Vous Randez-Vous
  379 +Rock ’ N ’ Roll Rock’N’Roll
  380 +Rojek – Decor Rojek–Decor
  381 +S - Rejestrów S-Rejestrów
  382 +S - Video S-Video
  383 +S - ka S-ka
  384 +S -400 S-400
  385 +S . T . W . S.T.W.
  386 +SAAB -9000 SAAB- 9000
  387 +SETI @ Home SETI@Home
  388 +SLD - PSL SLD-PSL
  389 +SP 5 SP5
  390 +SQ 25 SQ25
  391 +SS - Standartenführera SS-Standartenführera
  392 +SS - Standartenführerem SS-Standartenführerem
  393 +Sado - maso Sado-maso
  394 +Saint - Germain Saint-Germain
  395 +Saint - Leu Saint-Leu
  396 +Saint - Leu - la - Forêt Saint-Leu-la-Forêt
  397 +Saint - Loup Saint-Loup
  398 +Saint - Sulpice Saint-Sulpice
  399 +Sainte - Jeanne - de - Chantal Sainte-Jeanne-de-Chantal
  400 +Saltykov - Shchedrin Saltykov-Shchedrin
  401 +Sankt - Leningrad Sankt-Leningrad
  402 +Sat - Kom Sat-Kom
  403 +Sep - Sin Sep-Sin
  404 +Shi - king Shi-king
  405 +Ship - Service Ship-Service
  406 +Shu - king Shu-king
  407 +Siczek - Zalewska Siczek-Zalewska
  408 +SnCl 2 SnCl2
  409 +Su -22 Su-22
  410 +Super - Nova Super-Nova
  411 +Szin - Bet Szin-Bet
  412 +T - Shirt T-Shirt
  413 +T . Love T.Love
  414 +T 5 T5
  415 +TV 4 TV4
  416 +TVP 1 TVP1
  417 +Tele - Energo Tele-Energo
  418 +Top -10 Top-10
  419 +Trata - ta - ta Trata-ta-ta
  420 +UOP - ki UOP-ki
  421 +UST -110 UST-110
  422 +UTT - SUB UTT-SUB
  423 +Uecker - Randow Uecker-Randow
  424 +V 1 V1
  425 +V 2 V2
  426 +VC -25 A VC-25A
  427 +VDC -300 VDC-300
  428 +Valg - Podhala Valg-Podhala
  429 +Valg - Podhale Valg-Podhale
  430 +Vigée - Lebrun Vigée-Lebrun
  431 +W - Ł - W W-Ł-W
  432 +Writers ' Writers'
  433 +Wu - El Wu-El
  434 +X - lecia X-lecia
  435 +X - tiny X-tiny
  436 +X -100 X-100
  437 +XF 11 XF11
  438 +XII . 1 XII.1
  439 +Y - usiu Y-usiu
  440 +Zosia 12 Zosia12
  441 +AGM -86 AGM-86
  442 +BGM -109 BGM-109
  443 +CM 6000 CM6000
  444 +Każe - duba Każe-duba
  445 +Lem _ ura Lem_ura
  446 +Okocimiem -2 Okocimiem-2
  447 +Rucianem - Nidzie Rucianem-Nidzie
  448 +SDM - M 61 SDM-M61
  449 +SKM - ek SKM-ek
  450 +fiksum - dyrdum fiksum-dyrdum
  451 +al - Azhar al-Azhar
  452 +al - Jihad al-Jihad
  453 +al - Mihraniego al-Mihraniego
  454 +al - Qaida al-Qaida
  455 +al - Qaidy al-Qaidy
  456 +al - Quaidy al-Quaidy
  457 +al - Sadra al-Sadra
  458 +al - Zawahiriego al-Zawahiriego
  459 +all ' amatricane all'amatricane
  460 +anty ( radio ) aktywność anty(radio)aktywność
  461 +aren ' t aren't
  462 +can ' t can't
  463 +coctail - barze coctail-barze
  464 +color - power color-power
  465 +e " S " mańskim e"S"mańskim
  466 +e " S " manami e"S"manami
  467 +e - poczta e-poczta
  468 +el - Bahari el-Bahari
  469 +fast - foodu fast-foodu
  470 +fast - foodów fast-foodów
  471 +fin - de - siecle fin-de-siecle
  472 +fin - de-siecle'u fin-de-siecle'u
  473 +h 2 - b 8 h2-b8
  474 +handy - size handy-size
  475 +happy - endem happy-endem
  476 +http://www.georgehart.com/pavilion.html, http://www.georgehart.com/pavilion.html ,
  477 +http://www.li.net/~george/pavilion.html, http://www.li.net/~george/pavilion.html ,
  478 +http://wyborcza.pl/1,75478,7072995,Na_ZUS_nie_plac... http://wyborcza.pl/1,75478,7072995,Na_ZUS_nie_plac . . .
  479 +hydro - powietrznego hydro-powietrznego
  480 +i - necie i-necie
  481 +isn ' t isn't
  482 +iugatio - capitatio iugatio-capitatio
  483 +jumbo - jeta jumbo-jeta
  484 +językowo - stylistycznego językowo-stylistycznego
  485 +kan ' iak kan'iak
  486 +koagulazo - dodatnich koagulazo-dodatnich
  487 +korned - bify korned-bify
  488 +latina - pl latina-pl
  489 +lkj 908 ż 7654 fds lkj908ż7654fds
  490 +maine - coony maine-coony
  491 +multi - master multi-master
  492 +n ' est n'est
  493 +nasza - klasa.pl nasza-klasa.pl
  494 +news : 9 ntb 5 p $ jst $ 1 @ h 1 . uw.edu.pl news:9ntb5p$jst$1@h1.uw.edu.pl
  495 +news : pl . news . czytniki news:pl.news.czytniki
  496 +news : pl . sci . psychologia news:pl.sci.psychologia
  497 +nocz ' nocz'
  498 +non - stop non-stop
  499 +o - da - da - da o-da-da-da
  500 +pl . hum . teatr pl.hum.teatr
  501 +pl . sci . filozofia pl.sci.filozofia
  502 +pl . soc . cośtam pl.soc.cośtam
  503 +pl . soc . religia pl.soc.religia
  504 +political - corect political-corect
  505 +polsko - polsko-
  506 +pop - rocku pop-rocku
  507 +punk - rocka punk-rocka
  508 +quasi - jednorodnych quasi-jednorodnych
  509 +quasi - mocarstwowej quasi-mocarstwowej
  510 +ry ( d ) zykować ry(d)zykować
  511 +s ... s...
  512 +sado - maso sado-maso
  513 +samuraj - ko samuraj-ko
  514 +science - fiction science-fiction
  515 +second - handach second-handach
  516 +seks - maniaka seks-maniaka
  517 +serdako - suknię serdako-suknię
  518 +t - shirtach t-shirtach
  519 +tic - taki tic-taki
  520 +www.gadki.lublin.pl/east / z / index . html www.gadki.lublin.pl/east/z/index.html
  521 +zwisający - mi zwisający-mi
  522 +Łódzko - Dymaczewskie Łódzko-Dymaczewskie
  523 +Świeradowa - Zdroju Świeradowa-Zdroju
  524 +1 b 1b
  525 +1 n 1n
  526 +11 a 11a
  527 +125 p 125p
  528 +2 n 2n
  529 +2000 + 2000+
  530 +50 . 50.
  531 +75 d 75d
  532 +94 a 94a
  533 +95 ' 95'
  534 +: O ) :O)
  535 +: P :P
  536 +; P ;P
  537 +A / H 1 N 1 A/H1N1
  538 +AN / SPY -1 AN/SPY-1
  539 +Ab - Rama Ab-Rama
  540 +Al - Dżazira Al-Dżazira
  541 +B - B B-B
  542 +Binowo - Park Binowo-Park
  543 +C + + C++
  544 +C - dur C-dur
  545 +C 1 C1
  546 +C 60 C60
  547 +Cobellex - Pol Cobellex-Pol
  548 +F 2 F2
  549 +F 4 F4
  550 +F 8 F8
  551 +G 3 G3
  552 +Kujawsko - Pomorskie Kujawsko-Pomorskie
  553 +L 3 L3
  554 +Lépanges - sur - Vologne Lépanges-sur-Vologne
  555 +M - skiego M-skiego
  556 +M 15 M15
  557 +MP 5 MP5
  558 +Mi -2 Mi-2
  559 +MiG -21 MiG-21
  560 +NATURA - TUR NATURA-TUR
  561 +Nord - Pas-de-Calais Nord-Pas-de-Calais
  562 +S - check S-check
  563 +S 3 S3
  564 +T -72 T-72
  565 +U 2 U2
  566 +WIG 20 WIG20
  567 +Y 2 K Y2K
  568 +Z / SI 444 SKAK 33883 AQASA Z/SI444SKAK33883AQASA
  569 +chłopka - roztropka chłopka-roztropka
  570 +news : pl . rec . kuchnia news:pl.rec.kuchnia
  571 +3 c 3c
  572 +4 a 4a
  573 +CD - R CD-R
  574 +F 1 F1
  575 +K -202 K-202
  576 +Okocimia -2 Okocimia-2
  577 +e " S " manów e"S"manów
  578 +126 p 126p
  579 +3 D 3D
  580 +3 a 3a
  581 +A 2 A2
  582 +Ab - Ram Ab-Ram
  583 +MP 3 MP3
  584 +2 a 2a
  585 +A 4 A4
  586 +Ś - W Ś-W
  587 +F -16 F-16
  588 +1 a 1a
  589 +mp 3 mp3
  590 +XM 1 XM1
  591 +( : (:
  592 +60546 . fizjks@iftia.univ.gda.pl 60546.fizjks@iftia.univ.gda.pl
  593 +9 b 16 v 3 $ pmh $ 1 @ news.onet.pl 9b16v3$pmh$1@news.onet.pl
  594 +;))) ) ) ) ) ) ) ) ) ) ) ) ) ;)))))))))))))))
  595 +bart @ 1 under.com bart@1under.com
  596 +bburski @ friko 6 . onet.pl bburski@friko6.onet.pl
  597 +goury @ o 2 . pl goury@o2.pl
  598 +izaa 83 @ interia.pl izaa83@interia.pl
  599 +latina - pl - subskrypcja@hydepark.pl latina-pl-subskrypcja@hydepark.pl
  600 +latina - pl@hydepark.pl latina-pl@hydepark.pl
  601 +lebron 26 @ vp.pl lebron26@vp.pl
  602 +marcoos _ 87 @ wp.pl marcoos_87@wp.pl
  603 +ona _ _ _ @ op.pl ona___@op.pl
  604 +pl . soc . [ cośtam | cośta m ] pl.soc.cośtam
  605 +rad _ rez@poczta.onet.pl rad_rez@poczta.onet.pl
  606 +slawek . sa@gazeta.pl slawek.sa@gazeta.pl
  607 +sonka _ 86 @ wp.pl sonka_86@wp.pl
  608 +synapse @ friko 2 . onet.pl synapse@friko2.onet.pl
  609 +wer 23451 erdfghjklłlpoiujzhzbdfghuz 786543 ihgfcym wer23451erdfghjklłlpoiujzhzbdfghuz786543ihgfcym
  610 +BZT 5 BZT5
  611 +- 0.8 -0.8
  612 +- kami -kami
  613 +- ski -ski
  614 +-- ) --)
  615 +-26 - 26
  616 +-29 - 29
  617 +-651 - 651
  618 +-9 - 9
  619 +1-2 -3 1-2-3
  620 +16-6 16 - 6
  621 +23-2 23 - 2
  622 +28-1 28 - 1
  623 +3 x 16 3x16
  624 +421-30 421 - 30
  625 +429-56 429 - 56
  626 +: o ) :o)
  627 +B P BP
  628 +C D CD
  629 +O . K O.K
  630 +SG 2 SG2
  631 +SG 3 SG3
  632 +Znaszli Znasz li
  633 +` cause `cause
  634 +Iże eś I że eś
  635 +Kurważe ż Kurwa że ż
  636 +Tyże ś Ty że ś
  637 +- 0.3 -0.3
  638 +2 - 000-001 2-000-001
  639 +B M - ki BM-ki
  640 +P W 5 PW5
  641 +atom 2004 meister@gmail.com atom2004meister@gmail.com
  642 +temperatury-i temperatury - i
  643 +włast ' włast'
  644 +x ) x)
  645 +y @ z y@z
  646 +zy ( g ) zakiem zy(g)zakiem
  647 +—16 — 16
  648 +e - e-
  649 +IV - ligowcy IV-ligowcy
  650 +byśta by śta
  651 +koszernieV koszernie V
  652 +AN ] SPY -1 AN]SPY-1
  653 +PM 63 PM63
  654 +1 B 1B
  655 +1 C 210 1C210
  656 +CYP 1 A 2 CYP1A2
  657 +CYP 2 C 19 CYP2C19
  658 +CYP 2 C 8 CYP2C8
  659 +CYP 2 C 9 CYP2C9
  660 +CYP 3 A 4 CYP3A4
  661 +D 4 D4
  662 +D 5 D5
  663 +D 7 D7
  664 +FMR 1 FMR1
  665 +Eija - Riitta Eija-Riitta
  666 +FRiB - EXu FRiB-EXu
  667 +Fuego - Falmana Fuego-Falmana
  668 +G - Star G-Star
  669 +G 20 G20
  670 +G 8 G8
  671 +H 5 H5
  672 +HA - Il HA-Il
  673 +III -1 III-1
  674 +III -2 III-2
  675 +JD 1 JD1
  676 +N 3 N3
  677 +R 2 R2
  678 +Remo - Bud Remo-Bud
  679 +SIRT 1 SIRT1
  680 +UGTA 1 UGTA1
  681 +UGTA 3 UGTA3
  682 +V 12 V12
  683 +Win 32 Win32
  684 +ex 68151010 ex68151010
  685 +ex 68159990 ex68159990
  686 +ex 69021000 ex69021000
  687 +ex 69039020 ex69039020
  688 +T -38 T-38
  689 +SRT 1720 SRT1720
  690 +SFMI - Chronopost SFMI-Chronopost
  691 +PPE - DE PPE-DE
  692 +żezostanie że zostanie
  693 +półgodziny pół godziny
  694 +musiałabybyć musiała by być
  695 +ilepodziwu ile podziwu
  696 +i 2010 i2010
  697 +dorana do rana
  698 +Spodchmurykapelusza Spod chmury kapelusza
  699 +RYBOŁÓWSTW 0 RYBOŁÓWSTW0
  700 +JEDNEG 0 JEDNEG0
  701 +Ideal - ist Ideal-ist
  702 +H - L H-L
  703 +George , a George,a
  704 +Beth - ann Beth-ann
  705 +Al - Kaidy Al-Kaidy
  706 +Żołądzie m Żołądziem
  707 +przyszłe m przyszł em
  708 +opuszki em opuszkiem
  709 +gruchnie m gruchniem
  710 +Remie ń Remień
  711 +Red em Redem
  712 +Mik em Mikem
  713 +Komorze m Komorzem
  714 +Ki - jung Ki-jung
  715 +Kai m Kaim
  716 +Jiang - qing Jiang-qing
  717 +Grabo ś Graboś
  718 +Dingi em Dingiem
  719 +Den by Denby
  720 +Ciasto ń Ciastoń
  721 +Chore ń Choreń
  722 +C -12 C-12
  723 +C -11 C-11
  724 +e - sporcie e-sporcie
  725 +non - profit non-profit
  726 +metylu -1 - ol metylu-1-ol
  727 +eks - szef eks-szef
  728 +e - wydania e-wydania
  729 +blue - box blue-box
  730 +bio - obrazowania bio-obrazowania
  731 +art . - rocka art.-rocka
  732 +Yabu - san Yabu-san
  733 +TEN - T TEN-T
  734 +alergogennymskładnikiem alergogennym składnikiem
  735 +PNCRM 17 PNCRM17
  736 +Nanim Na nim
  737 +6 - ciu 6-ciu
  738 +walijskie m walijskiem
  739 +będzie m będziem
  740 +2 n -1 2n-1
  741 +4 x 4 4x4
  742 +F M FM
  743 +Gu ` erinem Gu`erinem
  744 +LN M LNM
  745 +PL M PLM
  746 +UM Ś UMŚ
  747 +Mile m Milem
  748 +Zachodniopomorskie m Zachodniopomorskiem
  749 +Zombie m Zombiem
  750 +f m fm
  751 +hm m hmm
  752 +in ż inż
  753 +n m nm
  754 +pp m ppm
  755 +–3 – 3
  756 +- wymiarowej -wymiarowej
  757 +B r. Br .
  758 +Mowiłem Mowił em
  759 +Poszłem Poszł em
  760 +doszłem doszł em
  761 +wytuszowałam wytuszowała m
  762 +wzielibyście wzieli by ście
... ...
corpora/data/ontology.dic 0 → 100644
  1 +Time Has Day
  2 +#dnia 23
  3 +Time Has Month
  4 +Time Has Year
  5 +Time Has WeekDay
  6 +Time Has Hour
  7 +Time Has TimeOfDay
  8 +
  9 +Hour Prtc Instance
  10 +Hour Attr GenericDescription
  11 +
  12 +#miasto Warszawa
  13 +Location Has Town
  14 +Location Has Quarter
  15 +Location Has Street
  16 +Location Has HouseNumber
  17 +
  18 +Location Prtc Service
  19 +Location Attr GenericDescription
  20 +Location Attr Attitude
  21 +
  22 +Division Has OrganizationType
  23 +Division Has OrganizationName
  24 +Division Has Location
  25 +Division Has Rating
  26 +
  27 +Person Attr GenericDescription
  28 +Person Attr PersonDescription
  29 +Person Has Profession
  30 +#zawód hydraulik
  31 +Person Has FirstName
  32 +#imię Jan
  33 +Person Has LastName
  34 +#nazwisko Kowalski
  35 +Profession Has ProfessionParam
  36 +ProfessionParam Attr ProfessionParamDescription
  37 +
  38 +Name Prtc Person
  39 +
  40 +Service Doer Person
  41 +Service Doer Division
  42 +Service Param ServiceParam
  43 +Service Param Animal
  44 +Service Goal Service
  45 +Service Has Time
  46 +Service Has Location
  47 +Service SideAction Service
  48 +Service Attr ServiceDescription
  49 +Service Has Confirmation
  50 +Service Has Price
  51 +
  52 +ServiceParam Prtc Service
  53 +ServiceParam Attr ServiceParamDescription
  54 +ServiceParam Attr GenericDescription
  55 +ServiceParam Prtc Instance
  56 +
  57 +Animal Attr GenericDescription
  58 +
  59 +OrganizationType Attr OrganizationTypeDescription
  60 +OrganizationType Param OrganizationTypeParam
  61 +
  62 +Instance Has Service
  63 +Instance Doer Person
  64 +Instance Doer Division
  65 +Instance Has Time
  66 +Instance Has Location
  67 +Instance Has Price
  68 +Instance Client Name
  69 +Instance Has Confirmation
  70 +Instance Status InstanceStatus
  71 +Instance Attr GenericDescription
  72 +
  73 +#FIXME: czy nie lepiej byłoby: Price Measure Amount
  74 +Price Count Number
  75 +Price Attr RateDescription
  76 +
  77 +Rating Prtc Person
  78 +#FIXME: w poniższym trzeba zmienić relację lub typ
  79 +Rating Measure Measure
  80 +Rating Attr GenericDescription
  81 +
  82 +Confirmation Thme Instance
  83 +Confirmation Attr ConfirmationDescription
  84 +
  85 +Reminder Thme Instance
  86 +
  87 +Action Agnt Person
  88 +Action Doer Person
  89 +Action Client Person
  90 +Action Client Animal
  91 +Action Thme Instance
  92 +Action Thme Service
  93 +Action Thme Confirmation
  94 +Action Doer Division
  95 +Action Has Time
  96 +Action Has Location
  97 +Action Thme Location
  98 +Action Thme Time
  99 +Action Thme ServiceParam
  100 +Action Thme Division
  101 +Action Thme Name
  102 +Action Thme Rating
  103 +Action Thme Email
  104 +Action Thme Reminder
  105 +Action Goal Instance
  106 +
  107 + Service
  108 +
  109 +Attitude Thme Situation
  110 +#FIXME: poniższe wymaga przemyślena użycia kontekstów
  111 +Attitude Thme Action
  112 +Attitude Thme Instance
  113 +Attitude Thme Confirmation
  114 +Attitude Thme Action
  115 +Attitude Agnt Person
  116 +Attitude Agnt2 Person
  117 +Attitude Manr GenericDescription
  118 +
  119 +MentalState Expr Person
  120 +MentalState Thme Time
  121 +MentalState Thme Instance
  122 +MentalState Thme Rating
  123 +MentalState Thme ServiceParam
  124 +
  125 +State Agnt Person
  126 +State Thme Instance
  127 +State Attr GenericDescription
  128 +State Thme ServiceParam
  129 +State Thme Division
  130 +
  131 +Situation Next Situation
... ...
corpora/data/pair_patterns.tab 0 → 100644
  1 +np:$n:$c:$g _:subst:$n:$c:$g -> nummod -> _:num:$n:$c:$g:congr
  2 +np:$n:$c:$g _:subst:$n:gen:$g -> nummod -> _:num:$n:$c:$g:rec
  3 +adjp:$n:$c:$g _:adj:$n:$c:$g -> nummod -> _:num:$n:$c:$g:congr
  4 +adjp:$n:$c:$g _:adj:$n:gen:$g -> nummod -> _:num:$n:$c:$g:rec
  5 +np:$n:$c:$g _:brev -> nummod -> _:num:$n:$c:$g
  6 +
  7 +pp _:subst.ger.num.ppron12.ppron3.adj:_:$c:_ -> case -> _:prep:$c
  8 +pp _:siebie:$c -> case -> _:prep:$c
  9 +pp _:brev -> case -> _:prep
  10 +pp na:prep -> fixed -> _:adv
  11 +pp _:adv -> case -> na:prep
  12 +comprep:gen _:prep:$c -> fixed -> _:subst:_:$c:_
  13 +pp _:subst.ger.num.ppron12.ppron3.adj:_:$c:_ -> case -> comprep:$c
  14 +
  15 +ip _:subst:_:nom:_ -> cop -> to:pred
  16 +ip _:adj:$n:nom:_ -> cop -> być:fin.bedzie:$n:$p
  17 +ip _:adj:$n:nom:$g -> cop -> być:praet:$n:$g
  18 +infp _:adj:_:nom:_ -> cop -> być:inf
  19 +ip _:subst:$n:inst:_ -> cop -> być:fin.praet.bedzie:$n:_
  20 +infp _:subst:$n:inst:_ -> cop -> być:inf
  21 +ip _:subst:$n:nom:_ -> cop -> [ _:pred: -> aux -> _:fin:$n:ter ]
  22 +ip _:subst:$n:nom:$g -> cop -> [ _:pred: -> aux -> _:praet:$n:$g ]
  23 +
  24 +ip _:pred -> aux -> _:fin.praet.bedzie
  25 +ip _:inf -> aux -> _:bedzie
  26 +ip _:praet:$n:_ -> aux -> _:bedzie:$n:_
  27 +ip _:ppas:$n:nom:_ -> aux:pass -> _:fin.bedzie:$n:_
  28 +ip _:ppas:$n:nom:$g -> aux:pass -> _:praet:$n:$g
  29 +ip _:ppas:$n:nom:$g -> aux -> _:praet:$n:$g
  30 +infp _:ppas:_:nom:_ -> aux:pass -> _:inf
  31 +
  32 +cp _:praet.winien.fin.inf.bedzie.imps.pred -> mark -> _:comp
  33 +cp [ _:_ -> cop -> _:praet.fin ] -> mark -> _:comp:
  34 +cp [ _:_ -> aux:pass -> _:praet.fin.bedzie ] -> mark -> _:comp:
  35 +#jako.jak.niż:prep:nom -> rev_mark -> _:subst.adj:_:nom:_
  36 +sent _:subst.adj:_:nom:_ -> mark -> _:prep:nom
  37 +sent _:adv:pos -> mark -> _:comp:
  38 +
  39 +conjp jak:conj -> fixed -> i:conj
  40 +comprep:$c wraz:adv -> fixed -> z:prep:$c
  41 +
  42 +#sent _:fin.praet.winien.imps -> punct -> .:interp
  43 +#sent _:fin.praet.winien.imps -> punct -> ?:interp
  44 +#sent [ _:_ -> cop -> _:fin.praet.winien ] -> punct -> .:interp
  45 +#sent [ _:_ -> cop -> _:fin.praet.winien ] -> punct -> ?:interp
  46 +
... ...
corpora/data/patterns.tab 0 → 100644
  1 +np:$n:$c:$g -> amod -> adjp:$n:$c:$g
  2 +COORD np:_:$c:_ -> amod -> adjp:_:$c:_
  3 +_:subst.ger.adj:$n:$c:$g -> det -> _:adj:$n:$c:$g
  4 +_:siebie:$c -> amod -> _:adjp:_:$c:_
  5 +_:brev -> amod -> _:adjp:_:_:_
  6 +który:adj:$n:$c:$g -> amod -> adjp:$n:$c:$g
  7 +
  8 +_:subst.adj:$n:$c:$g -> nummod -> jeden:adj:$n:$c:$g
  9 +
  10 +_:subst:$n:$c:$g -> flat -> _:adj:$n:$c:$g
  11 +_:subst.adj:_:_:_ -> flat -> _:subst:_:gen:_
  12 +_:brev -> flat -> _:subst.adj.brev
  13 +_:subst:$n:$c:$g -> flat -> _:subst:$n:$c:$g
  14 +_:subst:_:_:_ -> flat -> _:brev
  15 +
  16 +_:subst.ger:_:_:_ -> nmod -> pp
  17 +_:subst:_:_:_ -> nmod -> np:_:gen:_
  18 +_:subst:_:_:_ -> nmod:arg -> np:_:gen:_
  19 +_:subst:$n:$c:$g -> nmod -> _:adj:$n:$c:$g
  20 +_:subst:_:_:_ -> nmod -> _:dig
  21 +_:subst:_:_:_ -> nmod -> _:subst.num:_:nom:_
  22 +_:subst:_:_:_ -> nmod -> [ _:subst.brev -> nummod -> _:num:_:nom:_ ]
  23 +_:brev -> nmod -> _:dig
  24 +_:brev -> nmod -> _:brev
  25 +_:brev -> nmod -> _:subst:_:gen:_
  26 +_:brev -> nmod:arg -> _:brev
  27 +_:brev -> nmod:arg -> _:subst:_:gen:_
  28 +_:subst:_:_:_ -> nmod -> _:brev
  29 +_:subst:_:_:_ -> nmod:title -> _:subst:_:nom:_
  30 +
  31 +_:subst:_:$c:_ -> appos -> _:subst:_:$c:_
  32 +_:brev -> appos -> _:subst
  33 +_:subst -> appos -> _:brev
  34 +_:subst:_:_:_ -> appos -> _:subst:_:nom:_
  35 +
  36 +_:ppas -> obl:agent -> [ _:subst -> case -> przez:prep:acc ]
  37 +
  38 +
  39 +_:fin.praet.pred -> discourse:comment -> _:qub
  40 +
  41 +_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie -> cc -> conjp
  42 +
  43 +
  44 +
  45 +#jako.niż:conj -> rev_nummod -> _:subst:_:acc:_
  46 +
  47 +_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie.ppas -> advmod -> _:qub
  48 +_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie.ppas -> advmod -> _:adv
  49 +_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie.ppas -> advmod -> [ na:prep -> fixed -> _:adv ]
  50 +_:adv.adj.subst.prep.comp.num.qub -> advmod -> _:qub
  51 +_:adv.adj -> advmod -> _:adv
  52 +
  53 +_:fin:$n:ter -> nsubj -> _:subst.ger.adj:$n:nom:_
  54 +COORD _:fin:_:ter -> nsubj -> _:subst.ger.adj:_:nom:_
  55 +_:fin:$n:ter -> nsubj -> [ _:subst.brev -> nummod -> _:num:$n:nom:_ ]
  56 +COORD _:fin:_:ter -> nsubj -> [ _:subst.brev -> nummod -> _:num:_:nom:_ ]
  57 +_:fin:$n:$p -> nsubj -> _:ppron12.ppron3:$n:nom:_:$p
  58 +_:praet.winien:$n:$g -> nsubj -> _:subst.ger.num.adj.ppron12.ppron3:$n:nom:$g
  59 +_:praet.winien:$n:$g -> nsubj -> [ _:subst.brev -> nummod -> _:num:$n:nom:$g ]
  60 +[ _:_ -> cop -> _:praet.winien:$n:$g ] -> nsubj -> _:subst.ger.num.adj.ppron12.ppron3:$n:nom:$g
  61 +[ _:adj:$n:nom:$g -> cop -> _:fin:$n:ter ] -> nsubj -> _:subst.ger.num.adj.ppron3:$n:nom:$g
  62 +_:fin:sg:ter -> nsubj -> [ _:subst.brev -> nummod -> _:num:_:nom:_:rec ]
  63 +_:praet.winien:sg:n -> nsubj -> [ _:subst.brev -> nummod -> _:num:_:nom:_:rec ]
  64 +_:fin.praet.winien:_:_ -> nsubj -> _:brev
  65 +[ _:ppas:$n:nom:$g -> aux:pass -> _:praet:$n:$g ] -> nsubj:pass -> np:$n:nom:$g
  66 +[ _:ppas:$n:nom:$g -> aux:pass -> _:fin:$n:_ ] -> nsubj:pass -> np:$n:nom:$g
  67 +[ _:ppas:$n:nom:$g -> aux:pass -> _:praet:$n:$g ] -> nsubj:pass -> adjp:$n:nom:$g
  68 +[ _:ppas:$n:nom:$g -> aux:pass -> _:fin:$n:_ ] -> nsubj:pass -> adjp:$n:nom:$g
  69 +
  70 +_:fin.praet.winien.inf.pcon.pact.pant.impt.imps.ger -> obj -> np:_:gen.acc:_
  71 +_:fin.praet.winien.inf.pcon.pact.pant.impt.imps.ger -> obj -> adjp:_:gen.acc:_
  72 +_:fin.praet.inf.pcon.pact.pant.impt.imps.ger.ppas -> ccomp:obj -> _:comp
  73 +_:fin.praet.inf.pcon.pact.impt.imps.pred.ger.ppas -> iobj -> np:_:nom.gen.dat.acc.inst:_
  74 +_:fin.praet.inf.pcon.pact.impt.imps.pred.ger.ppas -> iobj -> adjp:_:nom.gen.dat.acc.inst:_
  75 +_:fin.praet.inf.pcon.pact.impt.imps.pred.ger.ppas -> iobj -> [ _:subst.brev -> nummod -> _:num:_:gen.acc:_ ]
  76 +_:fin.praet.inf.pcon.pact.impt.imps.pred.ger.ppas -> iobj -> _:siebie:gen.dat.acc.inst
  77 +
  78 +_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie.ger.ppas.adj -> obl -> pp
  79 +_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie.ger.ppas.subst -> obl:arg -> pp
  80 +_:fin.praet.inf.pcon.pact.impt.imps.pred.ger.ppas -> obl -> np:_:inst:_
  81 +daleko:adv:com -> obl -> _:subst:_:nom:_
  82 +niedaleko:adv:pos -> obl:arg -> np:_:gen:_
  83 +
  84 +_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie -> xcomp -> infp
  85 +trudno.dobrze.zbyt.łatwo.tyle:adv -> xcomp -> infp
  86 +to:subst:sg:_:n -> xcomp -> aby.żeby.by:comp
  87 +_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie -> xcomp -> aby.żeby.by.że:comp
  88 +
  89 +_:fin.praet.inf.pcon.pact.pant.impt.imps.pred.ger.ppas -> ccomp -> _:comp
  90 +_:adv.adj.subst -> ccomp -> _:comp
  91 +_:fin.praet.inf.impt.pred -> ccomp -> ip
  92 +
  93 +_:fin.praet.winien.inf.pred -> advcl -> _:pcon.pant
  94 +_:fin.praet.winien.inf.impt.pred.ppas.bedzie -> advcl -> _:comp
  95 +_:fin -> advcl -> _:fin
  96 +_:praet -> advcl -> _:praet
  97 +_:adv.adj.subst -> advcl -> _:comp
  98 +
  99 +_:praet.winien:$n:_ -> compound:aglt -> być:aglt:$n:_
  100 +by:qub -> compound:aglt -> być:aglt
  101 +by.gdyby.jakby:comp -> compound:aglt -> być:aglt
  102 +_:praet.winien:_:_ -> compound:cnd -> by:qub
  103 +_:fin.praet.winien.inf.pcon.pact.ger.pant.impt.imps.pred -> expl:impers -> się:qub
  104 +_:fin.praet.winien.inf.pcon.pact.ger.pant.impt.imps.pred -> expl:impers -> siebie:siebie:dat
  105 +
  106 +#<conll_root>:interp -> root -> sent
  107 +<conll_root>:interp -> root -> ip
  108 +<conll_root>:interp -> root -> cp
  109 +#<conll_root>:interp -> root -> [ _:_ -> cop -> _:praet.winien:$n:$g | punct -> _:interp ]
  110 +<conll_root>:interp -> root -> np:_:nom:_
  111 +<conll_root>:interp -> root -> _:brev
  112 +#<conll_root>:interp -> root -> _:prep:_
  113 +<conll_root>:interp -> root -> trudno:adv:pos
  114 +<conll_root>:interp -> root -> dobrze:adv:com
  115 +<conll_root>:interp -> root -> tyle:adv:pos
  116 +<conll_root>:interp -> root -> tak:adv:pos
  117 +<conll_root>:interp -> root -> dobrze:adv:pos
  118 +<conll_root>:interp -> root -> oto:adv:pos
  119 +<conll_root>:interp -> root -> łatwo:adv:com
  120 +<conll_root>:interp -> root -> wtedy:adv:pos
  121 +<conll_root>:interp -> root -> trudno:adv:com
  122 +<conll_root>:interp -> root -> przykro:adv:pos
  123 +<conll_root>:interp -> root -> podobnie:adv:pos
  124 +<conll_root>:interp -> root -> dlatego:adv:pos
  125 +<conll_root>:interp -> root -> zwłaszcza:qub
  126 +<conll_root>:interp -> root -> notyfikować:ppas:_:nom:_
  127 +<conll_root>:interp -> root -> wykluczyć:ppas:_:nom:_
  128 +<conll_root>:interp -> root -> zmieniać:pact:_:nom:_
  129 +<conll_root>:interp -> root -> uwzględniać:pcon:imperf
  130 +<conll_root>:interp -> root -> mieć:pcon:imperf
  131 +<conll_root>:interp -> root -> pragnąć:pcon:imperf
  132 +<conll_root>:interp -> root -> mówić:pcon:imperf
  133 +<conll_root>:interp -> root -> mieć:pcon:imperf
  134 +
  135 +#_:adj.pcon.fin.comp.impl.ppas -> punct -> ,:interp
  136 +_:_ -> punct -> _:interp
  137 +
  138 +#_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie.ger.ppas.adj -> obl -> [ który:adj -> case -> _:prep ]
  139 +#_:fin.praet.winien.inf.pcon.pact.impt.imps.pred.bedzie.ger.ppas.subst -> obl:arg -> [ który:adj -> case -> _:prep ]
  140 +
  141 +#tu by trzeba dodać analizę "który"
  142 +_:subst -> acl:relcl -> ip
  143 +
  144 +[ _:adj -> cop -> _:bedzie ] -> cc -> _:conj
  145 +[ _:bedzie -> mark -> _:comp ] -> nsubj -> _:subst
  146 +cp -> ccomp -> cp
  147 +[ _:inf -> aux -> _:bedzie ] -> nsubj -> _:adj
  148 +cp -> ccomp -> cp
  149 +pp -> cop -> _:fin
  150 +pp -> cop -> _:praet
  151 +[ _:subst -> cop -> _:praet ] -> cc -> _:conj
  152 +_:adj -> advcl -> cp
  153 +_:adj -> obl -> _:subst
  154 +_:adv -> aux -> _:praet
  155 +_:adv -> obl -> [ _:subst -> nummod -> _:num ]
  156 +_:fin -> cc:preconj -> _:conj
  157 +_:fin -> ccomp -> [ _:adj -> mark -> _:comp | cop -> _:fin ]
  158 +_:fin -> xcomp -> [ _:inf -> mark -> _:comp ]
  159 +_:impt -> mark -> _:comp
  160 +_:inf -> discourse:comment -> [ _:prep -> fixed -> _:subst ]
  161 +_:inf -> obl -> [ _:subst -> mark -> _:prep ]
  162 +_:ppas -> obl:agent -> [ _:siebie -> case -> _:prep ]
  163 +_:ppron12 -> appos -> _:subst
  164 +_:praet -> obl:comp -> cp
  165 +_:subst -> compound:aglt -> _:aglt
  166 +_:subst -> det -> _:adj
  167 +_:subst -> discourse:comment -> _:subst
  168 +cp -> advcl -> [ _:inf -> mark -> _:comp ]
  169 +pp -> nmod:arg -> _:adj
  170 +pp -> nsubj -> _:subst
  171 +#[ _:subst -> cop -> [ _:pred -> aux -> _:praet ] ] -> mark -> _:comp
  172 +_:adv -> xcomp -> [ _:inf -> mark -> _:comp ]
  173 +_:conj -> nmod -> _:subst
  174 +_:dig -> case -> _:prep
  175 +_:ger -> list -> _:dig
  176 +_:ger -> nmod -> _:subst
  177 +_:pant -> obl -> pp
  178 +_:pant -> obl:arg -> pp
  179 +_:ppas -> aux -> _:inf
  180 +_:praet -> csubj -> cp
  181 +_:subst -> advcl -> cp
  182 +#[ _:adj -> mark -> _:comp | cop -> _:fin ] -> obl:arg -> pp
  183 +[ _:ppas -> aux -> _:praet ] -> nsubj:pass -> _:adj
  184 +cp -> advcl -> cp
  185 +pp -> fixed -> _:subst
  186 +pp -> nmod -> _:adj
  187 +_:adj -> discourse:comment -> _:adv
  188 +_:adj -> iobj -> _:subst
  189 +_:adj -> obl:comp -> [ _:adj -> mark -> _:prep ]
  190 +_:adv -> mark -> _:comp
  191 +_:fin -> acl:relcl -> _:fin
  192 +_:fin -> discourse:comment -> _:subst
  193 +_:fin -> iobj -> _:num
  194 +_:interp -> case -> _:prep
  195 +_:ppas -> cop -> _:fin
  196 +_:ppron12 -> mark -> _:prep
  197 +_:praet -> discourse:intj -> _:interj
  198 +_:praet -> mark -> _:conj
  199 +_:praet -> parataxis -> _:adv
  200 +_:subst -> appos -> _:adj
  201 +_:subst -> discourse:comment -> _:adv
  202 +_:subst -> mark -> _:prep
  203 +_:adj -> case -> _:prep
  204 +_:fin -> advcl -> [ _:ppas -> mark -> _:comp | aux:pass -> _:fin ]
  205 +_:fin -> obl:comp -> [ _:subst -> mark -> _:prep ]
  206 +_:ger -> mark -> _:prep
  207 +_:inf -> advmod -> [ _:prep -> fixed -> _:subst ]
  208 +_:inf -> mark -> _:conj
  209 +_:ppas -> cop -> _:praet
  210 +_:ppas -> nsubj -> _:subst
  211 +_:ppas -> obl:agent -> [ _:ppron3 -> case -> _:prep ]
  212 +_:ppron3 -> nummod -> _:num
  213 +_:praet -> discourse:comment -> _:subst
  214 +_:subst -> advcl -> cp
  215 +_:subst -> cop -> [ _:pred -> aux -> _:bedzie ]
  216 +_:subst -> nsubj -> _:ppron3
  217 +[ _:adj -> cop -> _:bedzie ] -> mark -> _:comp
  218 +[ _:adj -> cop -> _:praet ] -> compound:cnd -> _:qub
  219 +pp -> amod -> _:adj
  220 +#[ _:subst -> cop -> [ _:pred -> aux -> _:fin ] ] -> cc -> _:conj
  221 +_:adj -> discourse:comment -> _:qub
  222 +_:adv -> discourse:comment -> _:qub
  223 +_:adv -> obl -> _:subst
  224 +_:burk -> case -> _:prep
  225 +_:conj -> nsubj -> _:subst
  226 +_:interp -> cc -> _:conj
  227 +_:interp -> fixed -> _:num
  228 +_:interp -> root -> pp
  229 +_:interp -> root -> _:subst
  230 +_:num -> fixed -> _:num
  231 +_:num -> nmod -> [ _:ppron3 -> case -> _:prep ]
  232 +_:pact -> case -> _:prep
  233 +_:pcon -> ccomp -> cp
  234 +_:ppas -> case -> _:prep
  235 +_:ppas -> obl:agent -> [ _:ppron12 -> case -> _:prep ]
  236 +_:praet -> advmod -> _:adj
  237 +_:prep -> discourse:comment -> _:adv
  238 +_:subst -> nmod -> [ _:subst -> mark -> _:prep ]
  239 +[ _:inf -> mark -> _:comp ] -> nsubj -> _:subst
  240 +[ _:subst -> cop -> _:pred ] -> obl -> pp
  241 +_:adj -> nmod -> _:subst
  242 +_:fin -> csubj -> cp
  243 +_:inf -> advcl -> cp
  244 +_:inf -> ccomp -> cp
  245 +_:ppas -> mark -> _:comp
  246 +_:prep -> obl -> [ _:subst -> nummod -> _:num ]
  247 +_:subst -> obl -> pp
  248 +_:subst -> xcomp -> _:inf
  249 +[ _:adj -> cop -> _:praet ] -> compound:aglt -> _:aglt
  250 +[ _:ppas -> aux:pass -> _:praet ] -> cc -> _:conj
  251 +pp -> cc:preconj -> _:conj
  252 +pp -> flat -> _:adj
  253 +pp -> flat -> _:subst
  254 +_:adv -> ccomp -> _:fin
  255 +_:brev -> nmod -> pp
  256 +_:fin -> mark -> _:conj
  257 +_:praet -> mark -> _:adv
  258 +_:qub -> cc -> _:conj
  259 +_:subst -> discourse:intj -> _:interj
  260 +_:subst -> nmod:arg -> _:brev
  261 +_:adj -> obl:arg -> [ _:ger -> case -> _:prep ]
  262 +_:adv -> fixed -> _:comp
  263 +_:adv -> obl:arg -> _:subst
  264 +_:brev -> case -> [ _:prep -> fixed -> _:subst ]
  265 +_:fin -> ccomp:obj -> cp
  266 +_:fin -> discourse:comment -> _:brev
  267 +_:fin -> obl -> [ _:subst -> nummod -> _:num ]
  268 +_:ger -> nmod:arg -> _:subst
  269 +_:inf -> nsubj -> _:subst
  270 +_:interp -> nmod:arg -> _:subst
  271 +_:interp -> nsubj -> _:subst
  272 +_:subst -> acl:relcl -> _:subst
  273 +_:subst -> flat -> _:adja
  274 +_:subst -> xcomp -> [ _:inf -> mark -> _:comp ]
  275 +[ _:pred -> mark -> _:comp ] -> aux -> _:praet
  276 +_:fin -> advmod -> _:adj
  277 +_:fin -> obl -> [ _:subst -> mark -> _:prep ]
  278 +_:subst -> nsubj -> _:adj
  279 +[ _:adj -> cop -> _:praet ] -> cc -> _:conj
  280 +[ _:ger -> case -> _:prep ] -> obl:agent -> pp
  281 +[ _:ppas -> aux:pass -> _:fin ] -> cc -> _:conj
  282 +[ _:subst -> cop -> _:fin ] -> cc -> _:conj
  283 +[ _:subst -> cop -> _:fin ] -> obl -> pp
  284 +_:adj -> acl:relcl -> _:fin
  285 +_:adv -> obl:arg -> pp
  286 +_:fin -> nsubj -> _:adj
  287 +_:inf -> ccomp:obj -> cp
  288 +_:inf -> fixed -> _:qub
  289 +_:praet -> advcl -> cp
  290 +_:prep -> case -> [ _:prep -> fixed -> _:subst ]
  291 +[ _:adj -> case -> _:prep ] -> fixed -> _:dig
  292 +[ _:subst -> cop -> _:fin ] -> nsubj -> _:ger
  293 +[ _:subst -> cop -> _:praet ] -> obl -> pp
  294 +_:conj -> advmod -> _:qub
  295 +_:fin -> advmod:arg -> _:adv
  296 +_:fin -> list -> _:dig
  297 +_:impt -> advcl -> cp
  298 +_:subst -> fixed -> _:adj
  299 +[ _:subst -> cop -> _:praet ] -> advmod -> _:adv
  300 +_:fin -> discourse:intj -> _:interj
  301 +_:fin -> obj -> _:subst
  302 +_:ger -> advmod -> _:qub
  303 +_:subst -> flat -> _:dig
  304 +cp -> advcl -> cp
  305 +cp -> nsubj -> _:subst
  306 +_:adj -> acl:relcl -> _:praet
  307 +_:inf -> advcl -> [ _:inf -> mark -> _:comp ]
  308 +_:inf -> ccomp -> cp
  309 +_:praet -> ccomp:obj -> cp
  310 +_:praet -> parataxis -> _:impt
  311 +_:praet -> parataxis -> _:subst
  312 +_:prep -> obl -> pp
  313 +_:subst -> case -> _:prep
  314 +cp -> nsubj -> _:subst
  315 +_:ppas -> aux -> _:fin
  316 +pp -> xcomp -> [ _:inf -> mark -> _:comp ]
  317 +_:adj -> amod -> _:adj
  318 +_:adj -> cop -> _:inf
  319 +_:num -> cc -> _:conj
  320 +_:num -> obl -> _:brev
  321 +_:praet -> obl:comp -> [ _:subst -> mark -> _:prep ]
  322 +_:prep -> flat -> _:subst
  323 +_:subst -> nmod:arg -> _:subst
  324 +[ _:adj -> cop -> _:fin ] -> obl:arg -> pp
  325 +_:adv -> mark -> _:conj
  326 +_:fin -> mark -> _:adv
  327 +_:interp -> fixed -> _:dig
  328 +_:subst -> parataxis -> _:fin
  329 +#[ _:subst -> cop -> [ _:pred -> aux -> _:fin ] ] -> mark -> _:comp
  330 +[ _:subst -> nummod -> _:num ] -> cc -> _:conj
  331 +_:adj -> cop -> _:pred
  332 +_:adv -> ccomp -> cp
  333 +_:adv -> mark -> _:adv
  334 +_:prep -> discourse:comment -> _:brev
  335 +_:adv -> case -> _:prep
  336 +_:ppas -> nsubj:pass -> _:subst
  337 +_:subst -> discourse:comment -> pp
  338 +pp -> advmod -> _:adv
  339 +[ _:subst -> mark -> _:prep ] -> cc -> _:conj
  340 +_:adj -> fixed -> _:dig
  341 +[ _:ger -> case -> _:prep ] -> cc -> _:conj
  342 +[ _:subst -> cop -> _:bedzie ] -> nsubj -> _:subst
  343 +_:prep -> fixed -> _:adj
  344 +_:subst -> cop -> _:inf
  345 +_:subst -> list -> _:dig
  346 +_:praet -> discourse:comment -> pp
  347 +_:praet -> vocative -> _:subst
  348 +_:subst -> ccomp -> cp
  349 +_:subst -> nmod:arg -> _:adj
  350 +[ _:inf -> mark -> _:comp ] -> aux -> _:bedzie
  351 +pp -> nmod -> _:subst
  352 +_:adv -> acl:relcl -> _:praet
  353 +_:praet -> obl -> [ _:subst -> nummod -> _:num ]
  354 +pp -> ccomp -> cp
  355 +_:adj -> mark -> _:conj
  356 +_:adv -> acl:relcl -> _:fin
  357 +_:fin -> advmod -> [ _:prep -> fixed -> _:subst ]
  358 +_:qub -> fixed -> _:adv
  359 +[ _:ppas -> aux:pass -> _:bedzie ] -> nsubj:pass -> _:subst
  360 +[ _:subst -> cop -> _:pred ] -> cc -> _:conj
  361 +_:bedzie -> nsubj -> _:subst
  362 +_:fin -> discourse:comment -> pp
  363 +_:fin -> ccomp:obj -> cp
  364 +_:ppron3 -> advmod -> _:qub
  365 +_:prep -> cc -> _:conj
  366 +pp -> cc -> _:conj
  367 +#[ _:subst -> mark -> _:comp | cop -> _:fin ] -> nsubj -> _:subst
  368 +_:fin -> compound:imp -> _:qub
  369 +_:brev -> amod -> _:adj
  370 +_:ger -> obl:agent -> pp
  371 +_:num -> nmod -> pp
  372 +_:praet -> obl -> [ _:subst -> mark -> _:prep ]
  373 +_:subst -> fixed -> _:subst
  374 +[ _:ger -> case -> _:prep ] -> nsubj -> _:subst
  375 +_:adv -> fixed -> _:qub
  376 +_:praet -> advmod -> [ _:prep -> fixed -> _:subst ]
  377 +_:praet -> discourse:comment -> [ _:prep -> fixed -> _:subst ]
  378 +_:subst -> cop -> _:pact
  379 +_:ppron12 -> advmod -> _:qub
  380 +_:prep -> discourse:comment -> [ _:prep -> fixed -> _:subst ]
  381 +cp -> advcl -> cp
  382 +_:adv -> ccomp -> cp
  383 +_:adv -> obl:comp -> [ _:subst -> mark -> _:prep ]
  384 +pp -> mark -> _:conj
  385 +_:interp -> fixed -> _:subst
  386 +pp -> ccomp -> cp
  387 +pp -> obl -> pp
  388 +[ _:subst -> cop -> _:pred ] -> mark -> _:comp
  389 +_:adj -> nmod:arg -> _:subst
  390 +pp -> mark -> _:comp
  391 +_:fin -> obl -> _:subst
  392 +_:adv -> obl -> pp
  393 +[ _:adj -> cop -> _:bedzie ] -> nsubj -> _:subst
  394 +_:fin -> advcl -> cp
  395 +_:fin -> discourse:comment -> _:adv
  396 +_:interp -> nummod -> _:num
  397 +_:subst -> discourse:comment -> [ _:prep -> fixed -> _:subst ]
  398 +_:subst -> nmod -> _:adj
  399 +[ _:inf -> aux -> _:bedzie ] -> nsubj -> _:subst
  400 +[ _:brev -> case -> _:prep ] -> amod -> _:adj
  401 +_:ppas -> cc -> _:conj
  402 +_:subst -> cc:preconj -> _:conj
  403 +[ _:brev -> case -> _:prep ] -> nummod -> _:num
  404 +_:praet -> advcl -> [ _:inf -> mark -> _:comp ]
  405 +_:subst -> ccomp -> cp
  406 +_:subst -> nmod -> _:subst
  407 +_:num -> fixed -> _:subst
  408 +[ _:adj -> cop -> _:fin ] -> cc -> _:conj
  409 +_:adjp -> case -> _:prep
  410 +_:adv -> fixed -> _:adv
  411 +_:impt -> vocative -> _:subst
  412 +_:conj -> fixed -> _:qub
  413 +_:qub -> fixed -> _:qub
  414 +pp -> nummod -> _:num
  415 +_:burk -> flat -> _:subst
  416 +cp -> aux -> _:bedzie
  417 +_:subst -> flat -> _:subst
  418 +_:subst -> amod -> _:adj
  419 +pp -> amod -> _:adja
  420 +_:fin -> advcl -> [ _:inf -> mark -> _:comp ]
  421 +_:fin -> discourse:comment -> [ _:prep -> fixed -> _:subst ]
  422 +_:praet -> parataxis -> _:praet
  423 +_:subst -> discourse:comment -> _:brev
  424 +_:adj -> cop -> _:praet
  425 +_:praet -> ccomp -> cp
  426 +_:adj -> mark -> _:comp
  427 +_:praet -> ccomp -> cp
  428 +_:subst -> cop -> _:praet
  429 +_:num -> advmod -> _:adv
  430 +pp -> mark -> _:adv
  431 +_:adj -> obl:comp -> [ _:subst -> mark -> _:prep ]
  432 +_:praet -> ccomp:obj -> cp
  433 +_:ger -> cc -> _:conj
  434 +_:praet -> discourse:comment -> _:adv
  435 +_:praet -> obl -> _:subst
  436 +_:adj -> cop -> _:fin
  437 +_:fin -> nsubj -> _:subst
  438 +_:fin -> parataxis -> _:praet
  439 +_:adv -> cc -> _:conj
  440 +_:subst -> nummod -> _:num
  441 +_:fin -> vocative -> _:subst
  442 +_:fin -> parataxis -> _:fin
  443 +_:interp -> root -> _:inf
  444 +_:subst -> obl -> pp
  445 +_:adj -> obl:arg -> pp
  446 +_:adj -> fixed -> _:adj
  447 +_:conj -> fixed -> _:conj
  448 +_:subst -> cop -> _:fin
  449 +_:prep -> fixed -> _:adjp
  450 +_:prep -> fixed -> _:adv
  451 +_:fin -> ccomp -> cp
  452 +_:subst -> amod -> _:adja
  453 +_:subst -> mark -> _:adv
  454 +_:praet -> nsubj -> _:subst
  455 +_:subst -> nsubj -> _:subst
  456 +[ _:subst -> cop -> _:pred ] -> nsubj -> _:subst
  457 +_:num -> fixed -> _:brev
  458 +_:subst -> mark -> _:comp
  459 +_:subst -> advmod -> _:adv
  460 +_:praet -> advcl -> cp
  461 +_:ger -> nsubj -> _:subst
  462 +_:prep -> advmod -> _:adv
  463 +_:adj -> nsubj -> _:subst
  464 +_:fin -> ccomp -> cp
  465 +_:praet -> parataxis -> _:fin
  466 +_:fin -> advcl -> cp
  467 +[ _:subst -> cop -> _:fin ] -> nsubj -> _:subst
  468 +_:adv -> fixed -> _:prep
  469 +_:adj -> cc -> _:conj
  470 +_:subst -> mark -> _:conj
  471 +[ _:subst -> nummod -> _:num ] -> case -> _:prep
  472 +_:subst -> fixed -> _:prep
  473 +pp -> cc -> _:conj
  474 +pp -> nummod -> _:num
  475 +_:subst -> cc -> _:conj
  476 +
... ...
corpora/data/proper_cats.tab 0 → 100644
  1 +rok Year
  2 +miesiąc Month
  3 +dzień Day
  4 +dzień tygodnia WeekDay
  5 +godzina Hour
  6 +miasto stołeczne, na prawach powiatu Town
  7 +gmina miejska, miasto stołeczne Town
  8 +miasto Town
  9 +wieś Town
  10 +przysiółek Town
  11 +osada leśna Town
  12 +osada Town
  13 +kolonia Town
  14 +dzielnica Quarter
  15 +część miejscowości Quarter
  16 +część miasta Quarter
  17 +aleje Street
  18 +aleja Street
  19 +ulica Street
  20 +most Street
  21 +plac Street
  22 +rondo Street
  23 +wiadukt Street
  24 +szosa Street
  25 +trakt Street
  26 +trasa Street
  27 +building-number HouseNumber
  28 +geograficzna
  29 +obj-id
  30 +TimeOfDay TimeOfDay
  31 +data Day
  32 +nazwisko LastName
  33 +imię FirstName
  34 +
  35 +
  36 +
... ...
corpora/data/sem-lexicon.dic 0 → 100644
  1 +
  2 +np:
  3 + /(1+np*Hour*number*case*gender*person):Apoz
  4 + |(1+np*Hour*sg*nom*f*ter+prepnp*Hour*sem*T*T):PHas
  5 + /(1+np*TimeOfDay*sg*inst*T*ter+np*TimeOfDay*sg*loc*T*ter+prepnp*TimeOfDay*sem*T*T+advp*TimeOfDay*T):PHas
  6 + /(1+np*Day*number*case*gender*person):Apoz
  7 +# |(1+np*Day*number*case*gender*person+adj*number*case*gender):unk
  8 + |(1+np*Day*sg*gen*m3*ter):PHas
  9 + /(1+np*Month*sg*gen*T*T):PHas
  10 + |(1+np*Year*number*case*gender*person):Apoz
  11 + /(1+np*Year*sg*gen*T*T):PHas
  12 + |(1+np*HouseNumber*sg*nom*T*T):PHas
  13 + |(1+np*Street*T*nom*T*ter+prepnp*Street*sem*T*T):PHas
  14 + |(1+np*Quarter*T*nom*T*ter+prepnp*Quarter*sem*T*T):PHas
  15 + /(1+np*Person*number*case*T*person):PApoz
  16 +# |(1+np*FirstName*number*case*gender*person):unk
  17 + \(1+num*number*case*gender*person*congr*nsem+num*number*case*gender*person*rec*nsem):Count \(1+qub):adjunct /(1+inclusion):adjunct
  18 + \(1+measure*unumber*ucase*ugender*uperson):Measure
  19 + /(1+date+day+day-month+date-interval+day-interval+day-month-interval+year+year-interval+hour+hour-minute+hour-interval+hour-minute-interval+roman+roman-interval):Coref
  20 + |(1+year):unk /(1+obj-id):Coref
  21 + |(1+adjp*X*number*case*gender):unk
  22 + \(1+nie+advp*X*T):unk
  23 + /(1+np*Currency*number*gen*gender*person):Has
  24 + /(1+np*Currency*number*case*gender*person):Has
  25 + /(1+np*cat*number*case*gender*person):Apoz
  26 + /(1+np*cat*T*case*T*T):adjunct
  27 + \(1+np*cat*T*case*T*T):adjunct
  28 + \(1+np*Measure*unumber*ucase*ugender*uperson):Measure
  29 + |(1+adjp*TimeDescription*number*case*gender):unk
  30 + |(1+adjp*GenericDescription*number*case*gender):unk
  31 + |(1+advp*GenericDescription*T):unk
  32 + |(1+adjp*ServiceDescription*number*case*gender):unk
  33 + |(1+qub*T):unk
  34 + |(1+qub*OpAdNum):unk;
  35 +
  36 +num: \(1+qub):adjunct /(1+inclusion):adjunct;
  37 +
  38 +measure:
  39 + \(1+num*number*case*gender*person*congr*count+num*number*case*gender*person*rec*count):adjunct \(1+qub):adjunct /(1+inclusion):adjunct;
  40 +
  41 +#prepnp: \(1+advp*T):adjunct /(np*T*case*T*T+day-month+day+year+date+hour+hour-minute):unk \(1+qub):adjunct /(1+inclusion):adjunct;
  42 +#prepadjp: \(1+advp*T):adjunct /(adjp*T*case*T+adjp*sg*dat*m1+adjp*T*postp*T+adjp*sg*nom*f+advp*T):unk \(1+qub):adjunct /(1+inclusion):adjunct;
  43 +#compar: \(1+advp*T):adjunct /(np*T*case*T*T+prepnp*T*T+prepadjp*T*T):unk \(1+qub):adjunct /(1+inclusion):adjunct;
  44 +prepnp:
  45 + /(1+prepnp*coerced*sem*T*T):Merge
  46 + \(1+nie+advp*GenericDescription*T):adjunct
  47 + \(1+advp*Attitude*T):adjunct
  48 + \(1+qub):adjunct /(1+inclusion):adjunct
  49 + /(1+np*cat*T*case*T*T):CORE
  50 + \(1+prepnp*Time*sem*plemma*case):unk
  51 + /(1+advp*cat*T):CORE;
  52 +
  53 +prepadjp: \(1+advp*T*T):adjunct \(1+qub):adjunct /(1+inclusion):adjunct;
  54 +compar:
  55 + /(1+prepnp*coerced*sem*T*T):Merge
  56 + \(1+advp*T*T):adjunct
  57 + \(1+qub):adjunct
  58 + /(1+inclusion):adjunct;
  59 +
  60 +colonp: /(np*cat*T*nom*T*ter+prepnp*cat*sem*T*T+advp*cat*T):CORE;
  61 +
  62 +adjp:
  63 + \(1+qub):adjunct
  64 + /(1+inclusion):adjunct
  65 + \(1+adja):unk
  66 + /(1+adjp*cat*number*case*gender):adjunct
  67 + \(1+adjp*cat*number*case*gender):adjunct
  68 + \(1+jak):unk;
  69 +
  70 +adja: /hyphen:nosem;
  71 +
  72 +advp:
  73 + \(1+qub):adjunct
  74 + /(1+inclusion):adjunct
  75 + \(1+adja):unk
  76 + |(1+np*Hour*sg*nom*f*ter+prepnp*Hour*sem*T*T):unk
  77 + |(1+np*TimeOfDay*sg*inst*T*ter+np*TimeOfDay*sg*loc*T*ter+prepnp*TimeOfDay*sem*T*T+advp*TimeOfDay*T):unk
  78 + /(1+compar*coerced*niż*T):unk
  79 + \(1+nie+advp*X*T):unk
  80 + \(1+prepnp*cat*sem*T*T):adjunct
  81 + /(1+prepnp*cat*sem*T*T):adjunct
  82 + \(1+advp*cat*T):adjunct
  83 + /(1+advp*cat*T):adjunct
  84 + \(1+jak):unk;
  85 +
  86 +#FIXME: sprawdzić czy 'or' czy 'or2'
  87 +ip:
  88 + |(1+adjp*Interrogative*number*nom*gender):unk
  89 + |(1+np*Location*number*nom*gender*person):unk
  90 + /(1+int):unk
  91 + \(1+qub*GenericDescription):adjunct
  92 + \(1+qub*T):adjunct
  93 + /(1+inclusion):adjunct
  94 + \(1+nie):nosem
  95 + |(1+aux-imp):nosem
  96 + |(1+aux-fut*number*gender*person+aux-past*number*gender*person):nosem
  97 + |(1+aglt*number*person):nosem
  98 + |(1+by):nosem
  99 + /(1+or):adjunct
  100 + \(1+ip*cat*number*gender*person):unk;
  101 +
  102 +aux-fut: null;
  103 +aux-past: null;
  104 +aglt: null;
  105 +
  106 +infp:
  107 + /(1+int):unk
  108 + \(1+qub*GenericDescription):adjunct
  109 + \(1+qub*T):adjunct
  110 + /(1+inclusion):adjunct
  111 + \(1+nie):nosem
  112 + /(1+infp*cat*T):adjunct
  113 + \(1+infp*cat*T):adjunct;
  114 +padvp: \(1+qub):adjunct /(1+inclusion):adjunct \(1+nie):nosem;
  115 +
  116 +cp:
  117 + /(1+ip*T*T*T*T+infp*T*T):CORE
  118 + /(1+ip*cat*T*T*T+infp*cat*T):CORE
  119 + /(1+ip*Attitude*T*T*T):CORE
  120 + /(1+ip*State*number*gender*T):CORE
  121 + /(1+ip*Attitude*number*gender*T):CORE
  122 + /(1+np*ncat*number*case*gender*nperson):CORE
  123 + \(1+cp*int*plemma):unk;
  124 +
  125 +ncp:
  126 + \(1+qub):adjunct
  127 + /(1+inclusion):adjunct
  128 + /cp*cat*ctype*plemma:unk;
  129 +
  130 +prepncp:
  131 + /ncp*cat*T*case*T*T*ctype*plemma:CORE;
  132 +
  133 +#lemma=i|lub|czy|bądź,pos=conj:
  134 +# QUANT[number=all_numbers,gender=all_genders,person=all_persons]
  135 +# (ip*number*gender*person/ip*T*T*T)\ip*T*T*T;
  136 +#lemma=,|i|lub|czy|bądź,pos=conj: (advp*mod/prepnp*T*T)\prepnp*T*T;
  137 +#lemma=,|i|lub|czy|bądź,pos=conj: QUANT[mode=0] (advp*mode/advp*mode)\prepnp*T*T;
  138 +#lemma=,|i|lub|czy|bądź,pos=conj: QUANT[mode=0] (advp*mode/prepnp*T*T)\advp*mode;
  139 +#lemma=,|i|lub|czy|bądź,pos=conj: (advp*mod/advp*T)\advp*T; #FIXME: przydałaby się wersja zachowująca mode
  140 +#lemma=,|i|lub|czy|bądź,pos=conj:
  141 +# QUANT[plemma=0,case=all_cases]
  142 +# (prepnp*plemma*case/prepnp*plemma*case)\prepnp*plemma*case;
  143 +#lemma=,|i|lub|czy|bądź,pos=conj:
  144 +# QUANT[number=all_numbers,case=all_cases,gender=all_genders,person=all_persons]
  145 +# (np*number*case*gender*person/np*T*case*T*T)\np*T*case*T*T;
  146 +#lemma=,|i|lub|czy|bądź,pos=conj:
  147 +# QUANT[number=all_numbers,case=all_cases,gender=all_genders]
  148 +# (adjp*number*case*gender/adjp*number*case*gender)\adjp*number*case*gender;
  149 +
  150 +jak: null;
  151 +
  152 +lex-się-qub: null;
  153 +nie: null;
  154 +by: null;
  155 +aux-imp: null;
  156 +qub: null;
  157 +interj: null;
  158 +sinterj: null;
  159 +hyphen: null;
  160 +int: null;
  161 +#lemma=„,pos=interp: QUANT[number=0,case=0,gender=0,person=0] (np*number*case*gender*person/rquot)/np*number*case*gender*person; #SetAttr("QUOT",Val "+",Var "x"
  162 +#lemma=«,pos=interp: QUANT[number=0,case=0,gender=0,person=0] (np*number*case*gender*person/rquot2)/np*number*case*gender*person; #SetAttr("QUOT",Val "+",Var "x"
  163 +#lemma=»,pos=interp: QUANT[number=0,case=0,gender=0,person=0] (np*number*case*gender*person/rquot3)/np*number*case*gender*person; #SetAttr("QUOT",Val "+",Var "x"
  164 +rquot: null;
  165 +rquot2: null;
  166 +rquot3: null;
  167 +#lemma=(,pos=interp: (inclusion/rparen)/(np*T*T*T*T+ip*T*T*T+adjp*T*T*T+prepnp*T*T); #SetAttr("INCLUSION",Val "+",
  168 +#lemma=[,pos=interp: (inclusion/rparen2)/(np*T*T*T*T+ip*T*T*T+adjp*T*T*T+prepnp*T*T); #SetAttr("INCLUSION",Val "+",
  169 +rparen: null;
  170 +rparen2: null;
  171 +
  172 +intp:
  173 + \(ip*cat*T*T*T+np*cat*T*nom*T*T+cp*cat*int*T):CORE;
  174 +
  175 +mp:
  176 + /(1+ip*cat*T*T*T+cp*cat*int*T+intp*cat+interj*cat):unk
  177 + /(1+np*cat*T*nom*T*T+intp*cat):unk;
  178 +
  179 +
  180 +s:
  181 + \?(ip*T*T*T*T+cp*T*int*T+cp*T*sub*T+mp*T+intp*T+interj*T+sinterj*T):null
  182 + \?(mp*Time*T*T+intp*Time+np*Time*sg*T*T*ter+prepnp*Time*sem*T*T+advp*Time*T+interj*T+sinterj*T):adjunct
  183 + \(1+prepnp*Time*nosem*na*acc):adjunct
  184 + \?(mp*Telephone*T*T+intp*Telephone+np*Telephone*sg*nom*T*ter+np*Email*sg*nom*T*ter):adjunct
  185 + \?(mp*Price*T*T+intp*Price+np*Price*sg*nom*T*ter+prepnp*Price*sem*T*T+interj*T+sinterj*T):adjunct
  186 + \?(mp*Rating*T*T+intp*Rating+np*Rating*T*nom*T*ter+prepnp*Rating*sem*T*T+interj*T+sinterj*T):adjunct
  187 + \(1+mp*Person*T*T+intp*Person+np*Person*sg*nom*T*ter):adjunct
  188 + \?(1mp*Location*T*T+intp*Location+np*Location*T*T*T*ter+prepnp*Location*sem*T*T+advp*Location*T+interj*T+sinterj*T):adjunct
  189 + \(1+mp*Service*T*T+intp*Service+np*Service*T*nom*T*ter):adjunct
  190 + \(1+prepnp*Name*nosem*na*acc):adjunct
  191 + \(1+mp*Animal*T*T+intp*Animal+np*Animal*sg*nom*T*ter+prepnp*Animal*nosem*z*inst):adjunct
  192 + \(1+advp*Confirmation*T):adjunct
  193 + \(1+adjp*RateDescription*T*T*T):adjunct;
  194 +# |?(fragment):null;
  195 +
  196 +<sentence>: /?s:unk;
  197 +<paragraph>: \?<sentence>:unk;
  198 +
  199 +<root>: /(1+<paragraph>):unk /(1+s):unk /(1+<speaker-end>):unk /(1+or):unk /(1+np*T*nom*T*T):unk /(1+ip*T*T*T):unk;
  200 +<merge>: |?(fragment):unk;
  201 +<conll_root>: /(ip*T*T*T+cp*int*T+np*sg*voc*T*T+interj):unk;
  202 +
  203 +pro: null;
  204 +
  205 +or: null;
  206 +<colon>: \<speaker>:unk /(1+<squery>):unk;
  207 +or2: \?(ip*T*T*T*T+cp*int*T+np*sg*voc*T*T+interj):unk;
  208 +<speaker-end>: null
... ...