Commit aadb59c0639f7698b8020ad96ab4ed7d8ae02100

Authored by Wojciech Jaworski
1 parent 54ddc22b

dodanie guessera

guesser/generate.ml 0 → 100644
  1 +open Xstd
  2 +open Printf
  3 +
  4 +let nexus_path = "/home/yacheu/Dokumenty/Badania/Jezyk i Umysl/Przetwarzanie Języka Naturalnego/zasoby/"
  5 +let toshiba_ub_path = "/home/wjaworski/Dokumenty/zasoby/"
  6 +
  7 +let get_host_name () =
  8 + let chan = Unix.open_process_in "uname -n" in
  9 + input_line chan
  10 +
  11 +let zasoby_path =
  12 + match get_host_name () with
  13 + "nexus" -> nexus_path
  14 + | "toshiba-UB" -> toshiba_ub_path
  15 +(* | "mozart" -> "." *)
  16 + | s -> failwith ("unknown host: " ^ s)
  17 +
  18 +let morfeusz_path = zasoby_path ^ "morfeusz/"
  19 +let sgjp_filename2015 = "sgjp-20151020.tab"
  20 +let polimorf_filename2015 = "polimorf-20151020.tab"
  21 +let sgjp_filename = "sgjp-20160508.tab"
  22 +let polimorf_filename = "polimorf-20160508.tab"
  23 +
  24 +(**********************************************************************************)
  25 +
  26 +module OrderedStringList = struct
  27 +
  28 + type t = string list
  29 +
  30 + let compare x y = compare (Xlist.sort x compare) (Xlist.sort y compare)
  31 +
  32 +end
  33 +
  34 +module OrderedStringListList = struct
  35 +
  36 + type t = string list list
  37 +
  38 + let compare x y = compare (Xlist.sort x compare) (Xlist.sort y compare)
  39 +
  40 +end
  41 +
  42 +module StringListMap = Xmap.Make(OrderedStringList)
  43 +module StringListListMap = Xmap.Make(OrderedStringListList)
  44 +module StringListListSet = Xset.Make(OrderedStringListList)
  45 +
  46 +type tree = T of tree StringListMap.t | S of StringSet.t
  47 +
  48 +let single_tags = function
  49 + [_] :: _ -> true
  50 + | _ -> false
  51 +
  52 +let rec make_tree interp =
  53 + if single_tags interp then S (StringSet.of_list (List.flatten (List.flatten interp))) else
  54 + let map = Xlist.fold interp StringListMap.empty (fun map tags ->
  55 + StringListMap.add_inc map (List.hd tags) [List.tl tags] (fun l -> (List.tl tags) :: l)) in
  56 + T(StringListMap.map map make_tree)
  57 +
  58 +let is_s_tree map =
  59 + StringListListMap.fold map false (fun b _ -> function
  60 + S _ -> true
  61 + | T _ -> b)
  62 +
  63 +let rec fold_tree_rec rev s f = function
  64 + S set -> f s (List.rev rev) set
  65 + | T map -> StringListMap.fold map s (fun s tag tree ->
  66 + fold_tree_rec (tag :: rev) s f tree)
  67 +
  68 +let fold_tree tree s f = fold_tree_rec [] s f tree
  69 +
  70 +let rec combine_interps_rec map =
  71 + if is_s_tree map then
  72 + StringListListMap.fold map [] (fun interp tail_tags -> function
  73 + S tag -> ((Xlist.sort (StringSet.to_list tag) compare) :: tail_tags) :: interp
  74 + | _ -> failwith "combine_interps_rec")
  75 + else
  76 + let map = StringListListMap.fold map StringListListMap.empty (fun map tail_tags tree ->
  77 + fold_tree tree map (fun map head_tags tag ->
  78 + StringListListMap.add_inc map ((Xlist.sort (StringSet.to_list tag) compare) :: tail_tags) [head_tags] (fun l -> head_tags :: l))) in
  79 + combine_interps_rec (StringListListMap.map map make_tree)
  80 +
  81 +let combine_interp interp =
  82 + let map = StringListListMap.add StringListListMap.empty [] (make_tree interp) in
  83 + combine_interps_rec map
  84 +
  85 +let combine_pos = StringSet.of_list ["subst"; "depr"; "ppron12"; "ppron3"; "siebie"; "adj"; "num"; "ger"; "praet"; "fin"; "impt"; "imps"; "pcon"; "ppas"; "pact";
  86 + "inf"; "bedzie"; "aglt"; "winien"; "pant"; "prep"]
  87 +
  88 +let tag_map = Xlist.fold [
  89 + "sg",1;
  90 + "pl",2;
  91 + "nom",1;
  92 + "gen",2;
  93 + "dat",3;
  94 + "acc",4;
  95 + "inst",5;
  96 + "loc",6;
  97 + "voc",7;
  98 + "m1",1;
  99 + "m2",2;
  100 + "m3",3;
  101 + "n1",4;
  102 + "n2",5;
  103 + "f",6;
  104 + "p1",7;
  105 + "p2",8;
  106 + "p3",9;
  107 + "pos",1;
  108 + "com",2;
  109 + "sup",3
  110 + ] StringMap.empty (fun map (k,v) -> StringMap.add map k v)
  111 +
  112 +let tag_compare x y =
  113 + try compare (StringMap.find tag_map x) (StringMap.find tag_map y)
  114 + with Not_found -> failwith ("tag_compare: " ^ x ^ " " ^ y)
  115 +
  116 +let combine_interps interps =
  117 + let map = Xlist.fold interps StringMap.empty (fun map interp ->
  118 + match Xlist.map (Str.split (Str.regexp ":") interp) (Str.split (Str.regexp "\\.")) with
  119 + [cat] :: tags -> StringMap.add_inc map cat [tags] (fun l -> tags :: l)
  120 + | _ -> failwith "combine_interps") in
  121 + let map = StringMap.mapi map (fun cat interp ->
  122 + Xlist.map interp (fun tags ->
  123 + match cat,Xlist.size tags with
  124 + "subst",3 -> tags
  125 + | "depr",3 -> tags
  126 + | "adj",4 -> tags
  127 + | "adja",0 -> tags
  128 + | _ -> failwith ("combine_interps: " ^ cat))) in
  129 + let l = StringMap.fold map [] (fun l cat interp ->
  130 + let interp = if StringSet.mem combine_pos cat then combine_interp interp else
  131 + StringListListSet.to_list (StringListListSet.of_list interp) in
  132 + (Xlist.map interp (fun tags -> [cat] :: tags)) @ l) in
  133 + String.concat "|" (Xlist.map l (fun tags ->
  134 + String.concat ":" (Xlist.map tags (fun l ->
  135 + String.concat "." (Xlist.sort l tag_compare)))))
  136 +
  137 +(**********************************************************************************)
  138 +
  139 +let load_tab filename =
  140 + File.load_tab filename (function
  141 +(* let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in
  142 + List.rev (Xlist.fold l [] (fun l line ->
  143 + if String.length line = 0 then l else
  144 + if String.get line 0 = '#' then l else
  145 + let line = if String.get line (String.length line - 1) = '\r' then
  146 + String.sub line 0 (String.length line - 1) else line in
  147 + match Str.split (Str.regexp "\t") line with*)
  148 + orth :: lemma :: interp :: _ -> orth,lemma,interp
  149 + | line -> failwith ("load_tab: " ^ (String.concat "\t" line)))
  150 +
  151 +let load_tab_full filename =
  152 + File.load_tab filename (function
  153 +(* let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in
  154 + List.rev (Xlist.fold l [] (fun l line ->
  155 + if String.length line = 0 then l else
  156 + if String.get line 0 = '#' then l else
  157 + let line = if String.get line (String.length line - 1) = '\r' then
  158 + String.sub line 0 (String.length line - 1) else line in
  159 + match Str.split (Str.regexp "\t") line with*)
  160 + [orth; lemma; interp] -> orth,lemma,interp,"",""
  161 + | [orth; lemma; interp; cl] -> orth,lemma,interp,cl,""
  162 + | [orth; lemma; interp; cl; cl2] -> orth,lemma,interp,cl,cl2
  163 +(* | orth :: lemma :: interp :: cl :: cl2 -> (orth,lemma,interp,cl,String.concat ";" cl2) :: l *)
  164 + | line -> failwith ("load_tab_full: " ^ (String.concat "\t" line)))
  165 +(* | _ -> failwith ("load_tab_full: " ^ line)) *)
  166 +
  167 +let load_dict_as_set filename =
  168 + let l = load_tab filename in
  169 + List.sort compare (StringSet.to_list (Xlist.fold l StringSet.empty (fun set (orth,lemma,interp) ->
  170 + StringSet.add set (String.concat "\t" [orth;lemma;interp]))))
  171 +
  172 +let load_dict_as_set_full filename =
  173 + let l = load_tab_full filename in
  174 + List.sort compare (StringSet.to_list (Xlist.fold l StringSet.empty (fun set (orth,lemma,interp,cl,cl2) ->
  175 + StringSet.add set (String.concat "\t" [orth;lemma;interp;cl;cl2]))))
  176 +
  177 +let rec compare_dicts_rec file = function
  178 + [],[] -> ()
  179 + | [],b :: lb -> fprintf file "> %s\n" b; compare_dicts_rec file ([],lb)
  180 + | a :: la,[] -> fprintf file "< %s\n" a; compare_dicts_rec file (la,[])
  181 + | a :: la, b :: lb ->
  182 + if a = b then compare_dicts_rec file (la,lb) else
  183 + if a < b then (fprintf file "< %s\n" a; compare_dicts_rec file (la,b :: lb)) else
  184 + (fprintf file "> %s\n" b; compare_dicts_rec file (a :: la,lb))
  185 +
  186 +let compare_dicts path filename1 filename2 filename_out =
  187 + let dict1 = load_dict_as_set (path ^ filename1) in
  188 + let dict2 = load_dict_as_set (path ^ filename2) in
  189 + File.file_out filename_out (fun file ->
  190 + compare_dicts_rec file (dict1,dict2))
  191 +
  192 +let compare_dicts_full path filename1 filename2 filename_out =
  193 + let dict1 = load_dict_as_set_full (path ^ filename1) in
  194 + let dict2 = load_dict_as_set_full (path ^ filename2) in
  195 + File.file_out filename_out (fun file ->
  196 + compare_dicts_rec file (dict1,dict2))
  197 +
  198 +(* Porównanie wersji słowników *)
  199 +let _ =
  200 +(* compare_dicts_full morfeusz_path sgjp_filename2015 sgjp_filename "results/comparition_sgjp_full.out"; *)
  201 +(* compare_dicts_full morfeusz_path polimorf_filename2015 polimorf_filename "results/comparition_polimorf_full.out"; *)
  202 +(* compare_dicts morfeusz_path sgjp_filename2015 sgjp_filename "results/comparition_sgjp.out"; *)
  203 + ()
  204 +
  205 +let split_dict path filename =
  206 + let dict = load_tab (path ^ filename) in
  207 + File.file_out (path ^ "noun_" ^ filename) (fun noun_file ->
  208 + File.file_out (path ^ "adj_" ^ filename) (fun adj_file ->
  209 + File.file_out (path ^ "adv_" ^ filename) (fun adv_file ->
  210 + File.file_out (path ^ "verb_" ^ filename) (fun verb_file ->
  211 + File.file_out (path ^ "other_" ^ filename) (fun other_file ->
  212 + Xlist.iter dict (fun (orth,lemma,interp) ->
  213 + let cat,tags = match Str.split (Str.regexp ":") interp with
  214 + cat :: tags -> cat,tags
  215 + | _ -> failwith ("split_dict: " ^ interp) in
  216 + if cat = "subst" || cat = "depr" then
  217 + fprintf noun_file "%s\t%s\t%s\n" orth lemma interp else
  218 + if cat = "adj" || cat = "adja"|| cat = "adjc"|| cat = "adjp" then
  219 + fprintf adj_file "%s\t%s\t%s\n" orth lemma interp else
  220 + if cat = "adv" then
  221 + fprintf adv_file "%s\t%s\t%s\n" orth lemma interp else
  222 + if cat = "inf" || cat = "praet"|| cat = "fin" || cat = "ppas" || cat = "pact" || cat = "pacta" ||
  223 + cat = "impt" || cat = "imps" || cat = "pcon" || cat = "pant" || cat = "ger" || cat = "" then
  224 + fprintf verb_file "%s\t%s\t%s\n" orth lemma interp else
  225 + if cat = "bedzie" || cat = "pred"|| cat = "prep" || cat = "num" || cat = "aglt" || cat = "winien" ||
  226 + cat = "qub" || cat = "brev" || cat = "comp" || cat = "interj" || cat = "burk" || cat = "conj" || cat = "ppron12" || cat = "ppron3" || cat = "" then
  227 + fprintf other_file "%s\t%s\t%s\n" orth lemma interp else
  228 + if cat = "cond" then () else
  229 + print_endline cat))))))
  230 +
  231 +
  232 +(* Podział słownika *)
  233 +let _ =
  234 +(* split_dict morfeusz_path sgjp_filename; *)
  235 + ()
  236 +
  237 +let map_of_tab data =
  238 + Xlist.fold data StringMap.empty (fun map (orth,lemma,interp) ->
  239 + StringMap.add_inc map lemma [orth,interp] (fun data -> (orth,interp) :: data))
  240 +
  241 +let feminine = StringSet.of_list ["subst:pl:acc:f"; "subst:pl:dat:f"; "subst:pl:gen:f"; "subst:pl:inst:f"; "subst:pl:loc:f";
  242 + "subst:pl:nom:f"; "subst:pl:voc:f"; "subst:sg:acc:f"; "subst:sg:dat:f"; "subst:sg:gen:f";
  243 + "subst:sg:inst:f"; "subst:sg:loc:f"; "subst:sg:nom:f"; "subst:sg:voc:f"]
  244 +
  245 +
  246 +let extract_ndm path filename =
  247 + let dict = load_tab (path ^ filename) in
  248 + let dict = map_of_tab dict in
  249 + File.file_out (path ^ "ndm_" ^ filename) (fun ndm_file ->
  250 + File.file_out (path ^ "odm_" ^ filename) (fun odm_file ->
  251 + StringMap.iter dict (fun lemma l ->
  252 + let map = Xlist.fold l StringMap.empty (fun map (orth,interp) ->
  253 + StringMap.add_inc map interp (StringSet.singleton orth) (fun set -> StringSet.add set orth)) in
  254 + let qmap = StringMap.fold map StringQMap.empty (fun qmap interp orths ->
  255 + StringSet.fold orths qmap StringQMap.add) in
  256 + let n = StringMap.size map in
  257 + let found = StringQMap.fold qmap [] (fun found orth v ->
  258 + if v = n then orth :: found else found) in
  259 + match found with
  260 + [] ->
  261 + let fmap = StringMap.fold map StringMap.empty (fun fmap interp orths ->
  262 + if StringSet.mem feminine interp then StringMap.add fmap interp orths else fmap) in
  263 + let fqmap = StringMap.fold fmap StringQMap.empty (fun fqmap interp orths ->
  264 + StringSet.fold orths fqmap StringQMap.add) in
  265 + let fn = StringMap.size fmap in
  266 + let ffound = StringQMap.fold fqmap [] (fun ffound orth v ->
  267 + if v = fn then orth :: ffound else ffound) in
  268 + (match ffound with
  269 + [] -> Xlist.iter l (fun (orth,interp) -> fprintf odm_file "%s\t%s\t%s\n" orth lemma interp)
  270 + | [orth] ->
  271 + let interps = combine_interps(*String.concat "|" (List.sort compare*) (StringMap.fold fmap [] (fun l interp _ -> interp :: l)) in
  272 + fprintf ndm_file "%s\t%s\t%s\n" orth lemma interps;
  273 + if StringQMap.size fqmap > 1 then failwith ("extract_ndm ni: " ^ orth);
  274 + let map = StringMap.fold map StringMap.empty (fun map interp orths ->
  275 + if StringSet.mem feminine interp then map else StringMap.add map interp orths) in
  276 + StringMap.iter map (fun interp orths ->
  277 + StringSet.iter orths (fun orth ->
  278 + fprintf odm_file "%s\t%s\t%s\n" orth lemma interp))
  279 + | _ -> failwith ("extract_ndm: " ^ (String.concat " " ffound)))
  280 + | [orth] ->
  281 + let interps = combine_interps(*String.concat "|" (List.sort compare*) (StringMap.fold map [] (fun l interp _ -> interp :: l)) in
  282 + fprintf ndm_file "%s\t%s\t%s\n" orth lemma interps;
  283 + if StringQMap.size qmap > 1 then
  284 + StringMap.iter map (fun interp orths ->
  285 + let orths = if StringSet.size orths = 1 then orths else StringSet.remove orths orth in
  286 + StringSet.iter orths (fun orth ->
  287 + fprintf odm_file "%s\t%s\t%s\n" orth lemma interp))
  288 + | _ -> failwith ("extract_ndm: " ^ (String.concat " " found)))))
  289 +
  290 +(* Wydobycie nieodmiennych *)
  291 +let _ =
  292 +(* extract_ndm morfeusz_path ("adj_" ^ sgjp_filename); *)
  293 +(* extract_ndm morfeusz_path ("noun_" ^ sgjp_filename); *)
  294 + ()
  295 +
  296 +(**********************************************************************************)
  297 +
  298 +let kolwiek_lemmas = StringSet.of_list [
  299 + (* adj *)
  300 + "czyjkolwiek"; "czyjś"; "czyjże"; "jakiciś"; "jakikolwiek"; "jakisi"; "jakiś"; "jakiści";
  301 + "jakiściś"; "jakiśkolwiek"; "jakiż"; "jakiżkolwiek"; "jakowyś"; "kijże"; "kiż"; "którykolwiek";
  302 + "któryś"; "któryż"; "któryżkolwiek"; "niejakiś"; "takiż"; "takowyż"; "tenże"; "tyliż"; "ówże";
  303 + (* noun *)
  304 + "cokolwiek:s"; "cośkolwiek"; "cóżkolwiek"; "ktokolwiek"; "ktośkolwiek"; "któżkolwiek";
  305 + "cociś"; "cosi"; "cosik"; "cosiś"; "coś:s"; "cościś"; "coże"; "cóż";
  306 + "ktoś:s2"; "któż";
  307 + (* adv *)
  308 + "jakkolwiek"; "jakoś"; "małoż"; "niejakkolwiek"; "niejakoś"; (*"niemalże";*) ]
  309 +
  310 +let kolwiek_suffixes = [
  311 + "żkolwiek"; "żekolwiek"; "śkolwiek"; "kolwiek"; "ż"; "że"; "ściś"; "ciś"; "ś"; "ści"; "sik"; "si"]
  312 +
  313 +let find_kolwiek_suffixes morfs =
  314 + StringMap.mapi morfs (fun lemma interps ->
  315 + if StringSet.mem kolwiek_lemmas lemma then
  316 + Xlist.map interps (fun (orth,interp) ->
  317 + (Xlist.fold kolwiek_suffixes orth (fun orth kolwiek_suf ->
  318 + if Rules.check_sufix kolwiek_suf orth then
  319 + Rules.cut_sufix kolwiek_suf orth
  320 + else orth)), interp)
  321 + else interps)
  322 +
  323 +
  324 +let exceptional_lemmata = StringSet.of_list ([
  325 + (* błąd w słowniku *)
  326 + "ówże";
  327 + (* wiele stemów *)
  328 + "twój:a"; "swój"; "mój:a"; "wszystek";
  329 + (* oboczności w stemie *)
  330 + "co:s"; "cociś"; "cokolwiek:s"; "cosi"; "cosik"; "cosiś"; "coś:s"; "cościś"; "cośkolwiek"; "coże"; "cóż"; "cóżkolwiek";
  331 + "kto"; "ktokolwiek"; "ktoś:s2"; "ktośkolwiek"; "któż"; "któżkolwiek"; "nikt"; "nic";
  332 + "Angel"; "Apollo"; "Białystok"; "Bober"; "Dzięgiel"; "Engel"; "Gołąb:s2"; "Gózd"; "Hendel"; "Herschel"; "Jastrząb";
  333 + "Kodrąb:s2"; "Kozioł"; "Krasnystaw"; "Majcher"; "Ob"; "Omulew"; "Orzeł"; "Różanystok"; "Schuster"; "Stępień"; "Słonim";
  334 + "Wielkanoc"; "achtel"; "archiprezbiter"; "arcydzięgiel"; "bedel"; "ber"; "białagłowa"; "białodrzew"; "ceter"; "deszcz";
  335 + "drama"; "dziób:s1"; "dzięgiel"; "dżemper"; "falafel"; "grubodziób"; "harbajtel"; "harbejtel"; "harmider"; "imćpan";
  336 + "iściec"; "jarząb:s2"; "kierdel"; "kimel"; "kiper:s1"; "klaster"; "kliper"; "kosodrzew"; "kureń"; "manczester";
  337 + "nadpiersień"; "osep"; "otrząs"; "pedel"; "piksel"; "podpiersień"; "podziem"; "prezbiter"; "protokół"; "przedpiersień";
  338 + "ratel"; "rondel:s2"; "rozpiór:s1"; "rozpiór:s2"; "rzeczpospolita"; "rzep:s2"; "rzepień"; "rzewień"; "rąb"; "sosrąb";
  339 + "srebrnodrzew"; "swąd"; "szmermel"; "szpiegierz"; "ulster"; "wab:s2"; "wermiszel"; "wilczełyko"; "woleoczko"; "włosień:s2";
  340 + "zew"; "złotogłów"; "świreń"; "źreb"; "żółtodziób";
  341 + "człowiek"; "półczłowiek"; "przedczłowiek"; "praczłowiek"; "nadczłowiek"; "git-człowiek"; ""; ""; ""; ""; ""; ""; ""; "";
  342 + "przechrzest"; "chrzest"; "półdziecko"; "roczek:s2"; "rok:s1"; "tydzień"; ""; ""; ""; ""; ""; "";
  343 + (* oboczności w odmianie *)
  344 + "niekażdy"; "każdy"; "niektóry:a"; "który"; "tenże"; "ten"; "tamten"; "kijże";
  345 + "ucho:s2"; "dziecko"; "oko:s2"; "imię"; "nozdrze";
  346 + "ZHR"; "WAT"; "VAT"; "PAT"; "FAT"; "DAT"; "PAGART"; "PIT:s2"; "PIT:s1"; "OIT:s2"; "OIT:s1"; "CIT";
  347 + "NOT"; "LOT"; "KRRiT"; ""; ""; ""; ""; ""; ""; ""; ""; "";
  348 + "być"; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
  349 + ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
  350 + (* pozostawione *)
  351 + "czyjże"; "czyjś"; "czyjkolwiek"; "kiż"; "ów"; "ow"; "on:a"; "ki";
  352 + "Pia"; "jo-jo"; "+lecie"; "";
  353 + "zagrząźć"; "zrzeć";
  354 + (* niepełny paradygmat *)
  355 + "zróść"; "zląc"; "zaróść"; "zaprząc"; "zaprzysiąc"; "zanieść:v2"; "zaląc"; "wzróść"; "wyróść"; "wyprząc"; "wyprzysiąc";
  356 + "róść"; "sprzysiąc"; "sprząc"; "ugrząźć"; "uląc"; "upiec:v2"; "uprząc"; "uróść"; "wieść:v2"; "wprząc"; "wróść"; "wyląc";
  357 + "powieść:v2"; "posiąc"; "przeląc"; "przeprząc"; "przeróść"; "przyprząc"; "przysiąc"; "przyróść"; "prząc"; "pójść:v2"; "rozprząc"; "rozróść";
  358 + "krzywoprzysiąc"; "ląc"; "naróść"; "obróść"; "odprzysiąc"; "odprząc"; "odróść"; "oprzysiąc"; "podróść"; "pogrząźć"; "poprzysiąc"; "poróść";
  359 + "dojść:v2"; "doprząc"; "doróść"; "dosiąc"; "grząźć"; "iść:v2";
  360 + (* wiele stemów *)
  361 + "uwlec"; "wewlec"; "wlec"; "wwlec"; "wywlec"; "wyżec"; "zawlec"; "zażec"; "zewlec"; "zwlec"; "zżec"; "żec";
  362 + "podwlec"; "podżec"; "powlec:v1"; "powlec:v2"; "przeoblec"; "przewlec"; "przeżec"; "przyoblec"; "przywlec"; "przyżec"; "rozwlec"; "rozżec";
  363 + "dowlec"; "nawlec"; "oblec:v2"; "obwlec"; "odwlec"; "owlec"; "zeżreć";
  364 + (* inne *)
  365 + "liźć"; "iść:v1"; "wyniść"; "wynijść"; "wyjść"; "wniść"; "wnijść"; "wejść"; "ujść"; "rozejść"; "pójść:v1"; "przyjść"; "przejść:v2"; "przejść:v1"; "podejść"; "odejść"; "obejść:v2"; "obejść:v1"; "najść:v2"; "najść:v1"; "nadejść"; "dojść:v1";
  366 + "roztworzyć:v2"; "przetworzyć:v2"; "otworzyć";
  367 + "zsiąść:v2"; "zsiąść:v1"; "zesiąść"; "zasiąść"; "wysiąść"; "współposiąść"; "wsiąść"; "usiąść"; "siąść"; "rozsiąść"; "przysiąść"; "przesiąść"; "powsiąść"; "posiąść"; "podsiąść"; "osiąść"; "obsiąść"; "nasiąść"; "dosiąść";
  368 + "źreć:v1"; "zniść"; "znijść"; "znajść"; "zejść"; "zejść"; "zajść:v2"; "zajść:v1"; "wzniść"; "wznijść"; "wzejść"
  369 +(*
  370 + "moi"; "twoi";
  371 + (*"AIDS"; "BGŻ"; "BWZ"; "BZ";*) (*"Bandtkie";*) (*"CRZZ"; "FPŻ";*) (*"Jokai"; "Jókai"; "Linde";*)(* "MSZ"; "MWGzZ"; *)
  372 + (*"NSZ"; "OPZZ";*) "Radetzky"; "Tagore"; (*"UNZ"; "URz"; "WBZ"; "ZSZ"; "ZWZ"; "ZZ";*) "aids";
  373 + "arcyksiężna"; "cornflakes"; "księżna"; (*"scrabble";*) "sms"; "teścina";
  374 + "Wielkanoc"; "białagłowa"; "rzeczpospolita"; "imćpan";
  375 + "Ob"; "podziem"; "Pia"; "woleoczko"; "wilczełyko"; "jo-jo"; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
  376 + "Omulew"; "drama"; (*"Kayah";*) "ratel"; "grubodziób"; "rozpiór:s1"; "ceter"; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
  377 + "DJ"; "FIFA"; (*"manicure"; "Greenpeace"; "Google";*) ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
  378 + "włosień:s2"; "deszcz"; "falafel"; "Krasnystaw";
  379 + "Różanystok"; "Białystok"; "ZHR"; "rzep:s2"; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
  380 + "IKEA"; "makao"; "macao"; "kakao"; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
  381 + "dziecko"; "oko:s2"; "ucho:s2"; "półdziecko"; "b-cia"; ""; ""; ""; ""; ""; ""; ""; ""; "";
  382 + "idea"; "ręka"; "cześć:s"; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
  383 + "ABBA"; "UEFA"; "FAMA"; "SABENA"; "MENA"; "APA"; "NASA"; "ANSA";
  384 + "NAFTA"; "LETTA"; "ETA"; "ELTA"; "EFTA"; "CEFTA";
  385 + "WAT"; "VAT"; "PAT"; "FAT"; "DAT"; "PAGART";
  386 + "PIT:s2"; "PIT:s1"; "OIT:s2"; "OIT:s1"; "CIT"; "NOT"; "LOT"; "KRRiT";
  387 + "człowiek"; "półczłowiek"; "przedczłowiek"; "praczłowiek"; "nadczłowiek"; "git-człowiek"; ""; ""; ""; ""; ""; ""; ""; "";
  388 + "szwa"; "hawanna"; "butaforia"; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
  389 + "Skopie"; "Mathea"; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
  390 + "poema:s1"; "klima:s1"; "dylema"; "dilemma"; "apoftegma"; "aksjoma"; ""; ""; ""; ""; ""; ""; ""; "";
  391 + "burgrabia"; "gograbia"; "grabia"; "hrabia"; "margrabia"; "murgrabia"; "sędzia:s1"; "wicehrabia"; "współsędzia";
  392 + "cieśla"; "bibliopola"; "świszczypałka"; "śwircałka"; "świerczałka"; "ścierciałka"; "tatka"; "sługa:s1"; "stupajka:s1"; "stepka"; "starowinka:s2"; "skurczypałka"; "mężczyzna"; "klecha";
  393 + ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
  394 + ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
  395 + ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
  396 + ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";*)
  397 + ] @ File.load_lines "data/obce.tab" @ File.load_lines "data/validated_adj.tab" @ File.load_lines "data/validated_noun.tab" @ File.load_lines "data/validated_verb.tab" @ File.load_lines "data/adv_nieodprzymiotnikowe.tab")
  398 +
  399 +let remove_exceptional_lemmata dict =
  400 + StringMap.fold dict StringMap.empty (fun dict lemma interps ->
  401 + if StringSet.mem exceptional_lemmata lemma then dict
  402 + else StringMap.add dict lemma interps)
  403 +
  404 +let check_stem_generation stem_sel path filename =
  405 + let dict = load_tab (path ^ filename) in
  406 + let dict = map_of_tab dict in
  407 + let dict = remove_exceptional_lemmata dict in
  408 + let dict = find_kolwiek_suffixes dict in
  409 + StringMap.iter dict (fun lemma forms ->
  410 + let _ = Stem.generate_stem stem_sel lemma forms in
  411 + ())
  412 +
  413 +(* Sprawdzenie działania stemowania *)
  414 +let _ =
  415 +(* check_stem_generation Stem.adj_stem_sel morfeusz_path ("odm_adj_" ^ sgjp_filename); *)
  416 +(* check_stem_generation Stem.noun_stem_sel morfeusz_path ("odm_noun_" ^ sgjp_filename); *)
  417 + ()
  418 +
  419 +let remove_com_sup dict =
  420 + List.rev (Xlist.fold dict [] (fun l (orth,lemma,interp) ->
  421 + if Rules.check_sufix ":com" interp || Rules.check_sufix ":sup" interp then l else (orth,lemma,interp) :: l))
  422 +
  423 +let generate_adj_pos_rules rules_filename dict =
  424 + let dict = map_of_tab dict in
  425 + let dict = find_kolwiek_suffixes dict in
  426 + let dict = remove_exceptional_lemmata dict in
  427 + let rules = StringMap.fold dict StringMap.empty (RuleGenerator.generate_rules_entry Stem.adj_stem_sel) in
  428 + File.file_out rules_filename (fun file ->
  429 + StringMap.iter rules (fun interp rules2 ->
  430 + fprintf file "\n@RULES %s\n" interp;
  431 + StringMap.iter rules2 (fun rule (q,l) ->
  432 + fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l))))
  433 +
  434 +let split_into_groups interp_sel dict =
  435 + Xlist.fold dict StringMap.empty (fun dict (lemma,orth,interp) ->
  436 + let group =
  437 + try StringMap.find interp_sel interp
  438 + with Not_found -> failwith ("split_into_groups: " ^ interp) in
  439 + StringMap.add_inc dict group [lemma,orth,interp] (fun l -> (lemma,orth,interp) :: l))
  440 +
  441 +let load_interp_sel filename =
  442 + File.fold_tab filename StringMap.empty (fun interp_sel -> function
  443 +(* let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in
  444 + Xlist.fold l StringMap.empty (fun interp_sel line ->
  445 + if String.length line = 0 then interp_sel else
  446 + if String.get line 0 = '#' then interp_sel else
  447 + match Str.split (Str.regexp "\t") line with*)
  448 +(* [group;interp;label] -> Strcut_prefixingMap.add_inc interp_sel group [interp,label] (fun l -> (interp,label) :: l) *)
  449 + [group;interp;label] -> StringMap.add interp_sel interp group
  450 + | line -> failwith ("load_interp_sel: " ^ (String.concat "\t" line)))
  451 +
  452 +let generate_adj_rules path filename adj_pos_rules_filename =
  453 + let interp_sel = load_interp_sel "data/interps.tab" in
  454 + let dict = load_tab (path ^ filename) in
  455 + let dict = split_into_groups interp_sel dict in
  456 + if StringMap.size dict <> 3 then failwith ("generate_adj_rules: " ^
  457 + String.concat " " (StringMap.fold dict [] (fun l s _ -> s :: l))) else
  458 + generate_adj_pos_rules adj_pos_rules_filename (StringMap.find dict "adj");
  459 + ()
  460 +
  461 +let generate_adj_com_rules path filename adj_com_rules_filename =
  462 + let dict = load_tab (path ^ filename) in
  463 + let dict = map_of_tab dict in
  464 + let dict = find_kolwiek_suffixes dict in
  465 +(* let dict = remove_exceptional_lemmata dict in *)
  466 + let rules = StringMap.fold dict StringMap.empty (RuleGenerator.generate_rules_com_entry Stem.adj_stem_sel) in
  467 + File.file_out adj_com_rules_filename (fun file ->
  468 + StringMap.iter rules (fun interp rules2 ->
  469 + fprintf file "\n@RULES %s\n" interp;
  470 + StringMap.iter rules2 (fun rule (q,l) ->
  471 + fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l))))
  472 +
  473 +let find_validated_lemata_adj_pos dict =
  474 + let dict = map_of_tab dict in
  475 + StringMap.fold dict [] (fun lemmata lemma forms ->
  476 + if Rules.validate_entry lemma forms then lemma :: lemmata else lemmata)
  477 +
  478 +let find_validated_lemata_adj path filename =
  479 + let interp_sel = load_interp_sel "data/interps.tab" in
  480 + let dict = load_tab (path ^ filename) in
  481 + let dict = split_into_groups interp_sel dict in
  482 + if StringMap.size dict <> 3 then failwith ("generate_adj_rules: " ^
  483 + String.concat " " (StringMap.fold dict [] (fun l s _ -> s :: l))) else
  484 + find_validated_lemata_adj_pos (StringMap.find dict "adj")
  485 +
  486 +let generate_adv_pos_rules rules_filename dict =
  487 + let dict = map_of_tab dict in
  488 + let dict = find_kolwiek_suffixes dict in
  489 + let dict = remove_exceptional_lemmata dict in
  490 + let rules = StringMap.fold dict StringMap.empty (RuleGenerator.generate_rules_entry_lemma_as_stem Stem.adv_stem_sel StringMap.empty) in
  491 + File.file_out rules_filename (fun file ->
  492 + StringMap.iter rules (fun interp rules2 ->
  493 + fprintf file "\n@RULES %s\n" interp;
  494 + StringMap.iter rules2 (fun rule (q,l) ->
  495 + fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l))))
  496 +
  497 +let generate_adv_rules path filename adv_pos_rules_filename =
  498 + let interp_sel = load_interp_sel "data/interps.tab" in
  499 + let dict = load_tab (path ^ filename) in
  500 + let dict = split_into_groups interp_sel dict in
  501 + if StringMap.size dict <> 3 then failwith ("generate_adv_rules: " ^
  502 + String.concat " " (StringMap.fold dict [] (fun l s _ -> s :: l))) else
  503 + generate_adv_pos_rules adv_pos_rules_filename (StringMap.find dict "adv");
  504 + ()
  505 +
  506 +let generate_adv_com_rules path filename adv_com_rules_filename =
  507 + let dict = load_tab (path ^ filename) in
  508 + let dict = map_of_tab dict in
  509 + let dict = find_kolwiek_suffixes dict in
  510 + let dict = remove_exceptional_lemmata dict in
  511 + let rules = StringMap.fold dict StringMap.empty (RuleGenerator.generate_rules_com_entry_lemma_as_stem Stem.adv_stem_sel) in
  512 + File.file_out adv_com_rules_filename (fun file ->
  513 + StringMap.iter rules (fun interp rules2 ->
  514 + fprintf file "\n@RULES %s\n" interp;
  515 + StringMap.iter rules2 (fun rule (q,l) ->
  516 + fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l))))
  517 +
  518 +
  519 +let generate_noun_rules2 rules_filename dict =
  520 + let dict = map_of_tab dict in
  521 + let dict = find_kolwiek_suffixes dict in
  522 + let dict = remove_exceptional_lemmata dict in
  523 + let rules = StringMap.fold dict StringMap.empty (RuleGenerator.generate_rules_entry Stem.noun_stem_sel) in
  524 +(* let rules = StringMap.fold dict StringMap.empty (fun rules lemma l ->
  525 + let interps = Xlist.fold l StringMap.empty (fun map (orth,interp) ->
  526 + StringMap.add_inc map interp [orth] (fun l -> orth :: l)) in
  527 + let stem(*,_*) = generate_stem (*0*) interps lemma noun_stem_sel in
  528 + let cl = classify_noun lemma stem interps noun_classes in
  529 + if cl <> "A" && cl <> "II" (*&& cl <> "Ę" && cl <> "Ą"*) then rules else
  530 + if has_known_inflexion_noun stem interps then rules else
  531 + let interps = select_inflexion rules_a stem interps in
  532 + let stem2 = cut_stem_sufix stem in
  533 + StringMap.fold interps rules (fun rules interp orths ->
  534 + Xlist.fold orths rules (fun rules orth ->
  535 + let n = find_common_prefix_length [stem2;orth] in
  536 + let rules2 = try StringMap.find rules interp with Not_found -> StringMap.empty in
  537 + let a = cut_prefixn n orth in
  538 + let b = cut_prefixn n stem in
  539 + let c,f = rule_code (a,b) in
  540 +(* let rule = sprintf "%s\t%s\t%s\t%s" cl c a b in *)
  541 + let rule = cl ^ "\t" ^ if f then "\t" ^ c else sprintf "%s\t%s\t%s" c a b in
  542 + let rules2 = StringMap.add_inc rules2 rule (1,[lemma]) (fun (q,l) -> q+1, if q < 20 then lemma :: l else l) in
  543 + StringMap.add rules interp rules2))) in*)
  544 + File.file_out rules_filename (fun file ->
  545 + StringMap.iter rules (fun interp rules2 ->
  546 + fprintf file "\n@RULES %s\n" interp;
  547 + StringMap.iter rules2 (fun rule (q,l) ->
  548 + fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l))))
  549 +
  550 +let generate_noun_rules path filename noun_rules_filename =
  551 + let interp_sel = load_interp_sel "data/interps.tab" in
  552 + let dict = load_tab (path ^ filename) in
  553 + let dict = split_into_groups interp_sel dict in
  554 + if StringMap.size dict <> 1 then failwith ("generate_noun_rules: " ^
  555 + String.concat " " (StringMap.fold dict [] (fun l s _ -> s :: l))) else
  556 + generate_noun_rules2 noun_rules_filename (StringMap.find dict "noun");
  557 + ()
  558 +
  559 +let generate_verb_rules2 rules_filename dict =
  560 + let dict = map_of_tab dict in
  561 + let dict = remove_exceptional_lemmata dict in
  562 + let rules = StringMap.fold dict StringMap.empty (RuleGenerator.generate_rules_entry_lemma_as_stem Stem.verb_stem_sel Stem.verb_stem_sel2) in
  563 + File.file_out rules_filename (fun file ->
  564 + StringMap.iter rules (fun interp rules2 ->
  565 + fprintf file "\n@RULES %s\n" interp;
  566 + StringMap.iter rules2 (fun rule (q,l) ->
  567 + fprintf file "\t%s\t# %d %s\n" rule q (String.concat " " l))))
  568 +
  569 +let generate_verb_rules path filename rules_filename =
  570 + let interp_sel = load_interp_sel "data/interps.tab" in
  571 + let dict = load_tab (path ^ filename) in
  572 + let dict = split_into_groups interp_sel dict in
  573 + if StringMap.size dict <> 2 then failwith ("generate_verb_rules: " ^
  574 + String.concat " " (StringMap.fold dict [] (fun l s _ -> s :: l))) else
  575 + generate_verb_rules2 rules_filename (StringMap.find dict "verb");
  576 + ()
  577 +
  578 +let find_validated_lemata_noun path filename =
  579 + let dict = load_tab (path ^ filename) in
  580 + let dict = map_of_tab dict in
  581 + StringMap.fold dict [] (fun lemmata lemma forms ->
  582 + if Rules.validate_entry lemma forms then lemma :: lemmata else lemmata)
  583 +
  584 +let find_tags_lemata_noun path filename =
  585 + let dict = load_tab (path ^ filename) in
  586 + let dict = map_of_tab dict in
  587 + StringMap.iter dict (fun lemma forms ->
  588 + Xlist.iter (Rules.find_tags_entry lemma forms) (fun (orth,found,interp) ->
  589 + if Xlist.size found <> 1 then
  590 + printf "%s\t%s\t%d\n %s\n" orth lemma (Xlist.size found) (String.concat "\n " (Xlist.map found (fun (_,l) ->
  591 + String.concat " " (Xlist.map l (fun (k,v) -> k ^ "=" ^ v))
  592 + )))))
  593 +
  594 +let find_validated_lemata_verb2 dict =
  595 + let dict = map_of_tab dict in
  596 + StringMap.fold dict [] (fun lemmata lemma forms ->
  597 + if Rules.validate_entry lemma forms then lemma :: lemmata else lemmata)
  598 +
  599 +let find_validated_lemata_verb path filename =
  600 + let interp_sel = load_interp_sel "data/interps.tab" in
  601 + let dict = load_tab (path ^ filename) in
  602 + let dict = split_into_groups interp_sel dict in
  603 + if StringMap.size dict <> 2 then failwith ("generate_verb_rules: " ^
  604 + String.concat " " (StringMap.fold dict [] (fun l s _ -> s :: l))) else
  605 + find_validated_lemata_verb2 (StringMap.find dict "verb")
  606 +
  607 +let find_tags_lemata_verb2 dict =
  608 + let dict = map_of_tab dict in
  609 + StringMap.iter dict (fun lemma forms ->
  610 + Xlist.iter (Rules.find_tags_entry lemma forms) (fun (orth,found,interp) ->
  611 + if Xlist.size found <> 1 then
  612 + printf "%s\t%s\t%d\n %s\n" orth lemma (Xlist.size found) (String.concat "\n " (Xlist.map found (fun (_,l) ->
  613 + String.concat " " (Xlist.map l (fun (k,v) -> k ^ "=" ^ v))
  614 + )))))
  615 +
  616 +let find_tags_lemata_verb path filename =
  617 + let interp_sel = load_interp_sel "data/interps.tab" in
  618 + let dict = load_tab (path ^ filename) in
  619 + let dict = split_into_groups interp_sel dict in
  620 + if StringMap.size dict <> 2 then failwith ("generate_verb_rules: " ^
  621 + String.concat " " (StringMap.fold dict [] (fun l s _ -> s :: l))) else
  622 + find_tags_lemata_verb2 (StringMap.find dict "verb")
  623 +
  624 +let _ =
  625 +(* generate_adj_rules morfeusz_path ("odm_adj_" ^ sgjp_filename) "rules/ADJ-FLEX6.dic"; *)
  626 +(* generate_noun_rules morfeusz_path ("odm_noun_" ^ sgjp_filename) "rules/NOUN-FLEX6.dic"; *)
  627 +(* generate_adj_com_rules morfeusz_path ("odm_adj_" ^ sgjp_filename) "rules/ADJ-FLEX-COM6.dic"; *)
  628 +(* generate_adv_rules morfeusz_path ("adv_" ^ sgjp_filename) "rules/ADV-FLEX6.dic"; *)
  629 +(* generate_adv_com_rules morfeusz_path ("adv_" ^ sgjp_filename) "rules/ADV-FLEX-COM6.dic"; *)
  630 +(* generate_verb_rules morfeusz_path ("verb_" ^ sgjp_filename) "rules/VERB-FLEX6.dic"; *)
  631 +(* generate_verb_rules "data/" "verbs_ex.tab" "rules/VERB-FLEX6.dic"; *)
  632 + ()
  633 +
  634 +let _ =
  635 +(* let l = find_validated_lemata_adj morfeusz_path ("odm_adj_" ^ sgjp_filename) in *)
  636 +(* let l = find_validated_lemata_noun morfeusz_path ("odm_noun_" ^ sgjp_filename) in *)
  637 +(* let l = find_validated_lemata_verb morfeusz_path ("verb_" ^ sgjp_filename) in *)
  638 +(* let l = find_validated_lemata_noun "data/" "nouns_ex.tab" in *)
  639 +(* let l = find_validated_lemata_verb "data/" "verbs_ex.tab" in *)
  640 +(* Xlist.iter l print_endline; *)
  641 + ()
  642 +
  643 +let _ =
  644 +(* find_tags_lemata_verb "data/" "verbs_ex.tab"; *)
  645 + find_tags_lemata_noun "data/" "nouns_ex.tab";
  646 + ()
  647 +
  648 +let _ =
  649 +(* Rules.print "results/rules/"; *)
  650 + ()
  651 +
  652 +
  653 +
  654 +(***
  655 +let expand_tags tags =
  656 + if tags = "" then [] else
  657 + List.flatten (Xlist.map (Str.split (Str.regexp "|") tags) (fun tags ->
  658 + let tags = Xlist.map (Str.split (Str.regexp ":") tags) (Str.split (Str.regexp "\\.")) in
  659 + Xlist.map (Xlist.multiply_list tags) (String.concat ":")))
  660 +
  661 +let prepare_rules l =
  662 + Xlist.fold l [] (fun rules rule_set_name ->
  663 + let rule_set = StringMap.find rule_map rule_set_name in
  664 + Xlist.fold rule_set rules (fun rules (alternation_name, sufix, tags) ->
  665 + let alternation = StringMap.find alternation_map alternation_name in
  666 + Xlist.fold alternation rules (fun rules (a,b) ->
  667 + (a ^ sufix, b, expand_tags tags) :: rules)))
  668 +
  669 +let prepare_rules_simple l =
  670 + Xlist.fold l [] (fun rules rule_set_name ->
  671 + let rule_set = StringMap.find rule_map rule_set_name in
  672 + Xlist.fold rule_set rules (fun rules (alternation_name, sufix, tags) ->
  673 + let alternation = StringMap.find alternation_map alternation_name in
  674 + Xlist.fold alternation rules (fun rules (a,b) ->
  675 + (a ^ sufix, b, [tags]) :: rules)))
  676 +
  677 +let rules_adj_flex = prepare_rules_simple ["ADJ-FLEX"]
  678 +let rules_adj_lemma = prepare_rules ["ADJ-LEMMA"]
  679 +
  680 +let rules_a = prepare_rules ["NOUN-FLEX-GENERAL";"NOUN-FLEX-A"]
  681 +let rules_noun_as_adj = prepare_rules ["NOUN-FLEX-GENERAL";"NOUN-ADJ-FLEX"]
  682 +let rules_noun_lemma = prepare_rules ["NOUN-LEMMA"]
  683 +
  684 +let is_applicable_rule (a,_,_) s = check_sufix a s
  685 +
  686 +let apply_rule (a,b,_) s =
  687 + (cut_sufix a s) ^ b
  688 +
  689 +let match_interp (_,_,l) s =
  690 + Xlist.mem l s
  691 +
  692 +let get_interps (_,_,l) = l
  693 +
  694 +let apply_rules rules s =
  695 + Xlist.fold rules [] (fun l rule ->
  696 + if is_applicable_rule rule s then
  697 + (apply_rule rule s, get_interps rule) :: l
  698 + else l)
  699 +
  700 +let check_inflexion rules stem interps =
  701 + StringMap.fold interps true (fun b interp orths ->
  702 + Xlist.fold orths b (fun b orth ->
  703 + let c = Xlist.fold rules false (fun c rule ->
  704 + if is_applicable_rule rule orth && match_interp rule interp then
  705 + if apply_rule rule orth = stem then true else c
  706 + else c) in
  707 + if c then b else false))
  708 +
  709 +let has_known_inflexion_noun stem interps =
  710 + let b1 = check_inflexion rules_a stem interps in
  711 + let b2 = check_inflexion rules_noun_as_adj stem interps in
  712 + b1 || b2
  713 +
  714 +let has_known_inflexion_adj stem interps =
  715 + let b = check_inflexion rules_adj_flex stem interps in
  716 + b
  717 +
  718 +let select_inflexion rules stem interps =
  719 + StringMap.fold interps StringMap.empty (fun interps interp orths ->
  720 + let orths = Xlist.fold orths [] (fun orths orth ->
  721 + let c = Xlist.fold rules false (fun c rule ->
  722 + if is_applicable_rule rule orth && match_interp rule interp then
  723 + if apply_rule rule orth = stem then true else c
  724 + else c) in
  725 + if c then orths else orth :: orths) in
  726 + if orths = [] then interps else StringMap.add interps interp orths)
  727 +***)
... ...
guesser/makefile 0 → 100755
  1 +OCAMLC=ocamlc
  2 +OCAMLOPT=ocamlopt
  3 +OCAMLDEP=ocamldep
  4 +INCLUDES=-I +xml-light -I +xlib -I ../../9WalLCGslo
  5 +OCAMLFLAGS=$(INCLUDES) -g
  6 +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa xlib.cmxa
  7 +#OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa xlib.cmxa tokenizer.cmxa
  8 +
  9 +MODS= morf.ml morfeusz.ml
  10 +MODS2= morf.ml inflexionConv.ml
  11 +MODS3= morf.ml inflexion.ml
  12 +MODS4= morf.ml morphemes.ml
  13 +
  14 +all:
  15 + $(OCAMLOPT) -o generate $(OCAMLOPTFLAGS) schemata.ml rules.ml stem.ml ruleGenerator.ml generate.ml
  16 +# $(OCAMLOPT) -o morfeusz $(OCAMLOPTFLAGS) $(MODS)
  17 +# $(OCAMLOPT) -o inflexion $(OCAMLOPTFLAGS) $(MODS2)
  18 +# $(OCAMLOPT) -o inflexion_test $(OCAMLOPTFLAGS) $(MODS3)
  19 +# $(OCAMLOPT) -o morphemes $(OCAMLOPTFLAGS) $(MODS4)
  20 +
  21 +lib:
  22 + $(OCAMLOPT) -linkall -a -o inflexion.cmxa $(INCLUDES) $(MODS3)
  23 +
  24 +
  25 +.SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx
  26 +
  27 +.mll.ml:
  28 + ocamllex $<
  29 +
  30 +.mly.mli:
  31 + ocamlyacc $<
  32 +
  33 +.mly.ml:
  34 + ocamlyacc $<
  35 +
  36 +.ml.cmo:
  37 + $(OCAMLC) $(OCAMLFLAGS) -c $<
  38 +
  39 +.mli.cmi:
  40 + $(OCAMLC) $(OCAMLFALGS) -c $<
  41 +
  42 +.ml.cmx:
  43 + $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $<
  44 +
  45 +clean:
  46 + rm -f *~ *.cm[oix] *.o morfeusz inflexion inflexion_test morphemes
... ...
guesser/ruleGenerator.ml 0 → 100644
  1 +open Xstd
  2 +open Printf
  3 +
  4 +let alternation_map = Rules.alternation_map
  5 +
  6 +let rule_types = Xlist.fold [
  7 +(* Xlist.map (StringMap.find alternation_map "obce_ch") (fun (_,s,t) -> sprintf "%sch\t%s" s t), "{x}ych\t{x}";
  8 + Xlist.map (StringMap.find alternation_map "obce_ch") (fun (_,s,t) -> sprintf "%smi\t%s" s t), "{x}ymi\t{x}";
  9 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_iy") (fun (_,s,t) -> sprintf "%s\t%s" s t), "{'}y\t{'}";
  10 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_iy") (fun (_,s,t) -> sprintf "%sch\t%s" s t), "{'}ych\t{'}";
  11 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_iy") (fun (_,s,t) -> sprintf "%sm\t%s" s t), "{'}ym\t{'}";
  12 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_iy") (fun (_,s,t) -> sprintf "%smi\t%s" s t), "{'}ymi\t{'}";
  13 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_ae") (fun (_,s,t) -> sprintf "%se\t%s" s t), "{'}e\t{'}";
  14 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_ae") (fun (_,s,t) -> sprintf "%sego\t%s" s t), "{'}ego\t{'}";
  15 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_ae") (fun (_,s,t) -> sprintf "%sej\t%s" s t), "{'}ej\t{'}";
  16 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_ae") (fun (_,s,t) -> sprintf "%semu\t%s" s t), "{'}emu\t{'}";
  17 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_ae") (fun (_,s,t) -> sprintf "%sa\t%s" s t), "{'}a\t{'}";
  18 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_ae") (fun (_,s,t) -> sprintf "%są\t%s" s t), "{'}ą\t{'}";
  19 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_ae") (fun (_,s,t) -> sprintf "%so\t%s" s t), "{'}o\t{'}";
  20 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_ae") (fun (_,s,t) -> sprintf "%sę\t%s" s t), "{'}ę\t{'}";
  21 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_ae") (fun (_,s,t) -> sprintf "%su\t%s" s t), "{'}u\t{'}";
  22 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_ae") (fun (_,s,t) -> sprintf "%sów\t%s" s t), "{'}ów\t{'}";
  23 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_ae") (fun (_,s,t) -> sprintf "%som\t%s" s t), "{'}om\t{'}";
  24 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_ae") (fun (_,s,t) -> sprintf "%sami\t%s" s t), "{'}ami\t{'}";
  25 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_ae") (fun (_,s,t) -> sprintf "%sach\t%s" s t), "{'}ach\t{'}";
  26 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_ae") (fun (_,s,t) -> sprintf "%sowi\t%s" s t), "{'}owi\t{'}";
  27 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_ae") (fun (_,s,t) -> sprintf "%sowie\t%s" s t), "{'}owie\t{'}";
  28 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_ae") (fun (_,s,t) -> sprintf "%sum\t%s" s t), "{'}um\t{'}";
  29 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_ae") (fun (_,s,t) -> sprintf "%sem\t%s" s t), "{'}em\t{'}";
  30 +(* Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_ii") (fun (_,s,t) -> sprintf "%s\t%s" s t), "{'}ii\t{'}";
  31 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_yj") (fun (_,s,t) -> sprintf "%s\t%s" s t), "{'}yj\t{'}";*)
  32 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_wyglos") (fun (_,s,t) -> sprintf "%s\t%s" s t), "{'}ε\t{'}";
  33 +(* Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%s\t%s" s t), "{v'}y\t{v'}"; *)
  34 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%sch\t%s" s t), "{v'}ych\t{v'}";
  35 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%sm\t%s" s t), "{v'}ym\t{v'}";
  36 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%smi\t%s" s t), "{v'}ymi\t{v'}";
  37 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%se\t%s" s t), "{v'}e\t{v'}";
  38 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%sego\t%s" s t), "{v'}ego\t{v'}";
  39 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%sej\t%s" s t), "{v'}ej\t{v'}";
  40 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%semu\t%s" s t), "{v'}emu\t{v'}";
  41 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%sa\t%s" s t), "{v'}a\t{v'}";
  42 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%są\t%s" s t), "{v'}ą\t{v'}";
  43 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%so\t%s" s t), "{v'}o\t{v'}";
  44 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%sę\t%s" s t), "{v'}ę\t{v'}";
  45 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%su\t%s" s t), "{v'}u\t{v'}";
  46 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%sów\t%s" s t), "{v'}ów\t{v'}";
  47 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%som\t%s" s t), "{v'}om\t{v'}";
  48 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%sami\t%s" s t), "{v'}ami\t{v'}";
  49 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%sach\t%s" s t), "{v'}ach\t{v'}";
  50 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%sowi\t%s" s t), "{v'}owi\t{v'}";
  51 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%sowie\t%s" s t), "{v'}owie\t{v'}";
  52 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%sum\t%s" s t), "{v'}um\t{v'}";
  53 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe") (fun (_,s,t) -> sprintf "%sem\t%s" s t), "{v'}em\t{v'}";
  54 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_miekkie_nowe_wyglos") (fun (_,s,t) -> sprintf "%s\t%s" s t), "{v'}ε\t{v'}";
  55 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_y") (fun (_,s,t) -> sprintf "%s\t%s" s t), "{}y\t{}";
  56 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_y") (fun (_,s,t) -> sprintf "%sch\t%s" s t), "{}ych\t{}";
  57 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_y") (fun (_,s,t) -> sprintf "%sm\t%s" s t), "{}ym\t{}";
  58 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_y") (fun (_,s,t) -> sprintf "%smi\t%s" s t), "{}ymi\t{}";
  59 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_e") (fun (_,s,t) -> sprintf "%se\t%s" s t), "{}e\t{}";
  60 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_e") (fun (_,s,t) -> sprintf "%sego\t%s" s t), "{}ego\t{}";
  61 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_e") (fun (_,s,t) -> sprintf "%sej\t%s" s t), "{}ej\t{}";
  62 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_e") (fun (_,s,t) -> sprintf "%semu\t%s" s t), "{}emu\t{}";
  63 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_a") (fun (_,s,t) -> sprintf "%sa\t%s" s t), "{}a\t{}";
  64 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_a") (fun (_,s,t) -> sprintf "%są\t%s" s t), "{}ą\t{}";
  65 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_a") (fun (_,s,t) -> sprintf "%so\t%s" s t), "{}o\t{}";
  66 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_a") (fun (_,s,t) -> sprintf "%sę\t%s" s t), "{}ę\t{}";
  67 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_a") (fun (_,s,t) -> sprintf "%su\t%s" s t), "{}u\t{}";
  68 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_a") (fun (_,s,t) -> sprintf "%sów\t%s" s t), "{}ów\t{}";
  69 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_a") (fun (_,s,t) -> sprintf "%som\t%s" s t), "{}om\t{}";
  70 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_a") (fun (_,s,t) -> sprintf "%sami\t%s" s t), "{}ami\t{}";
  71 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_a") (fun (_,s,t) -> sprintf "%sach\t%s" s t), "{}ach\t{}";
  72 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_a") (fun (_,s,t) -> sprintf "%sowi\t%s" s t), "{}owi\t{}";
  73 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_a") (fun (_,s,t) -> sprintf "%sowie\t%s" s t), "{}owie\t{}";
  74 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_a") (fun (_,s,t) -> sprintf "%sum\t%s" s t), "{}um\t{}";
  75 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_e") (fun (_,s,t) -> sprintf "%sem\t%s" s t), "{}em\t{}";
  76 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_i") (fun (_,s,t) -> sprintf "%s\t%s" s t), "{}'i\t{}";
  77 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_ie") (fun (_,s,t) -> sprintf "%s\t%s" s t), "{}'ie\t{}";
  78 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_wyglos") (fun (_,s,t) -> sprintf "%s\t%s" s t), "{}ε\t{}";
  79 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe_y") (fun (_,s,t) -> sprintf "%s\t%s" s t), "{v}y\t{v}";
  80 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe_y") (fun (_,s,t) -> sprintf "%sch\t%s" s t), "{v}ych\t{v}";
  81 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe_y") (fun (_,s,t) -> sprintf "%sm\t%s" s t), "{v}ym\t{v}";
  82 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe_y") (fun (_,s,t) -> sprintf "%smi\t%s" s t), "{v}ymi\t{v}";
  83 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe") (fun (_,s,t) -> sprintf "%se\t%s" s t), "{v}e\t{v}";
  84 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe") (fun (_,s,t) -> sprintf "%sego\t%s" s t), "{v}ego\t{v}";
  85 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe") (fun (_,s,t) -> sprintf "%sej\t%s" s t), "{v}ej\t{v}";
  86 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe") (fun (_,s,t) -> sprintf "%semu\t%s" s t), "{v}emu\t{v}";
  87 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe") (fun (_,s,t) -> sprintf "%sa\t%s" s t), "{v}a\t{v}";
  88 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe") (fun (_,s,t) -> sprintf "%są\t%s" s t), "{v}ą\t{v}";
  89 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe") (fun (_,s,t) -> sprintf "%so\t%s" s t), "{v}o\t{v}";
  90 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe") (fun (_,s,t) -> sprintf "%sę\t%s" s t), "{v}ę\t{v}";
  91 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe") (fun (_,s,t) -> sprintf "%su\t%s" s t), "{v}u\t{v}";
  92 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe") (fun (_,s,t) -> sprintf "%sów\t%s" s t), "{v}ów\t{v}";
  93 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe") (fun (_,s,t) -> sprintf "%som\t%s" s t), "{v}om\t{v}";
  94 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe") (fun (_,s,t) -> sprintf "%sami\t%s" s t), "{v}ami\t{v}";
  95 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe") (fun (_,s,t) -> sprintf "%sach\t%s" s t), "{v}ach\t{v}";
  96 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe") (fun (_,s,t) -> sprintf "%sowi\t%s" s t), "{v}owi\t{v}";
  97 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe") (fun (_,s,t) -> sprintf "%sowie\t%s" s t), "{v}owie\t{v}";
  98 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe") (fun (_,s,t) -> sprintf "%sum\t%s" s t), "{v}um\t{v}";
  99 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe_ie") (fun (_,s,t) -> sprintf "%s\t%s" s t), "{v}'ie\t{v}";
  100 + Xlist.map (StringMap.find alternation_map "funkcjonalnie_twarde_nowe_wyglos") (fun (_,s,t) -> sprintf "%s\t%s" s t), "{v}ε\t{v}";
  101 + Xlist.map (StringMap.find alternation_map "kapitaliki_y") (fun (_,s,t) -> sprintf "%s\t%s" s t), "{-}y\t{-}";
  102 + Xlist.map (StringMap.find alternation_map "kapitaliki_y") (fun (_,s,t) -> sprintf "%sch\t%s" s t), "{-}ych\t{-}";
  103 + Xlist.map (StringMap.find alternation_map "kapitaliki_y") (fun (_,s,t) -> sprintf "%sm\t%s" s t), "{-}ym\t{-}";
  104 + Xlist.map (StringMap.find alternation_map "kapitaliki_y") (fun (_,s,t) -> sprintf "%smi\t%s" s t), "{-}ymi\t{-}";
  105 + Xlist.map (StringMap.find alternation_map "kapitaliki_a") (fun (_,s,t) -> sprintf "%se\t%s" s t), "{-}e\t{-}";
  106 + Xlist.map (StringMap.find alternation_map "kapitaliki_a") (fun (_,s,t) -> sprintf "%sego\t%s" s t), "{-}ego\t{-}";
  107 + Xlist.map (StringMap.find alternation_map "kapitaliki_a") (fun (_,s,t) -> sprintf "%sej\t%s" s t), "{-}ej\t{-}";
  108 + Xlist.map (StringMap.find alternation_map "kapitaliki_a") (fun (_,s,t) -> sprintf "%semu\t%s" s t), "{-}emu\t{-}";
  109 + Xlist.map (StringMap.find alternation_map "kapitaliki_a") (fun (_,s,t) -> sprintf "%sa\t%s" s t), "{-}a\t{-}";
  110 + Xlist.map (StringMap.find alternation_map "kapitaliki_a") (fun (_,s,t) -> sprintf "%są\t%s" s t), "{-}ą\t{-}";
  111 + Xlist.map (StringMap.find alternation_map "kapitaliki_a") (fun (_,s,t) -> sprintf "%so\t%s" s t), "{-}o\t{-}";
  112 + Xlist.map (StringMap.find alternation_map "kapitaliki_a") (fun (_,s,t) -> sprintf "%sę\t%s" s t), "{-}ę\t{-}";
  113 + Xlist.map (StringMap.find alternation_map "kapitaliki_a") (fun (_,s,t) -> sprintf "%su\t%s" s t), "{-}u\t{-}";
  114 + Xlist.map (StringMap.find alternation_map "kapitaliki_a") (fun (_,s,t) -> sprintf "%sów\t%s" s t), "{-}ów\t{-}";
  115 + Xlist.map (StringMap.find alternation_map "kapitaliki_a") (fun (_,s,t) -> sprintf "%som\t%s" s t), "{-}om\t{-}";
  116 + Xlist.map (StringMap.find alternation_map "kapitaliki_a") (fun (_,s,t) -> sprintf "%sem\t%s" s t), "{-}em\t{-}";
  117 + Xlist.map (StringMap.find alternation_map "kapitaliki_a") (fun (_,s,t) -> sprintf "%sami\t%s" s t), "{-}ami\t{-}";
  118 + Xlist.map (StringMap.find alternation_map "kapitaliki_a") (fun (_,s,t) -> sprintf "%sach\t%s" s t), "{-}ach\t{-}";
  119 + Xlist.map (StringMap.find alternation_map "kapitaliki_a") (fun (_,s,t) -> sprintf "%sowi\t%s" s t), "{-}owi\t{-}";
  120 + Xlist.map (StringMap.find alternation_map "kapitaliki_a") (fun (_,s,t) -> sprintf "%sowie\t%s" s t), "{-}owie\t{-}";
  121 + Xlist.map (StringMap.find alternation_map "kapitaliki_a") (fun (_,s,t) -> sprintf "%sum\t%s" s t), "{-}um\t{-}";
  122 + Xlist.map (StringMap.find alternation_map "kapitaliki_ie") (fun (_,s,t) -> sprintf "%s\t%s" s t), "{-}'ie\t{-}";
  123 + Xlist.map (StringMap.find alternation_map "kapitaliki_wyglos") (fun (_,s,t) -> sprintf "%s\t%s" s t), "{-}ε\t{-}";*)
  124 + ] StringMap.empty (fun map (l,code) ->
  125 + Xlist.fold l map (fun map rule -> StringMap.add_inc map rule code (fun code2 ->
  126 + print_endline ("rule_types: " ^ rule ^ " " ^ code ^ " " ^ code2); code2)))
  127 +
  128 +let rec cut_prefix_list c ll =
  129 + Xlist.map ll (function
  130 + [] -> raise Not_found
  131 + | x :: l -> if x = c then l else raise Not_found)
  132 +
  133 +let rec find_common_prefix_length_rec n = function
  134 + [] :: _ -> n
  135 + | (c :: l) :: ll ->
  136 + (try
  137 + let ll = cut_prefix_list c ll in
  138 + find_common_prefix_length_rec (n + String.length c) (l :: ll)
  139 + with Not_found -> n)
  140 + | [] -> failwith "find_common_prefix_length_rec"
  141 +
  142 +let find_common_prefix_length l =
  143 + let ll = Xlist.map l Xunicode.utf8_chars_of_utf8_string(*Stem.text_to_chars*) in
  144 + find_common_prefix_length_rec 0 ll
  145 +
  146 +let cut_prefixn i s =
  147 + let n = String.length s in
  148 + if i >= n then "" else
  149 + try String.sub s i (n-i) with _ -> failwith ("cut_prefixn: " ^ s ^ " " ^ string_of_int i)
  150 +
  151 +let rule_code (a,b) =
  152 + let s = sprintf "%s\t%s" a b in
  153 + try StringMap.find rule_types s, true with Not_found ->
  154 + if Rules.check_prefix b a then
  155 + let suf = Rules.cut_prefix b a in
  156 + suf ^ "_" ^ (String.concat "_" (List.rev (Xunicode.utf8_chars_of_utf8_string(*Stem.text_to_chars*) b))), false
  157 + else "???", false
  158 +
  159 +let generate_rule stem stem_pref orth =
  160 + let n = find_common_prefix_length [stem_pref;orth] in
  161 + let a = cut_prefixn n orth in
  162 + let b = cut_prefixn n stem in
  163 + let c,f = rule_code (a,b) in
  164 + if f then "\t" ^ c else sprintf "%s\t%s\t%s" c a b
  165 +
  166 +let rec classify_entry lemma stem forms = function
  167 + (class_interp,suf,cl) :: class_sel ->
  168 + let l = Xlist.fold forms [] (fun l (orth,interp) ->
  169 + if interp = class_interp then orth :: l else l) in
  170 + let b = Xlist.fold l false (fun b orth ->
  171 + if Rules.check_sufix suf orth then true else b) in
  172 + if b then cl else classify_entry lemma stem forms class_sel
  173 +(* let l = StringSet.to_list (Xlist.fold l StringSet.empty (fun set orth ->
  174 + if check_prefix stem orth then
  175 + StringSet.add set (cut_prefix stem orth)
  176 + else set)) in
  177 + if Xlist.mem l suf then cl else classify_noun lemma stem interps class_sel
  178 + let l = StringSet.to_list (Xlist.fold l StringSet.empty (fun set orth ->
  179 + if check_prefix stem orth then
  180 + StringSet.add set (cut_prefix stem orth)
  181 + else set)) in
  182 + if Xlist.mem l suf then cl else classify_noun lemma stem interps class_sel*)
  183 +(* (match l with
  184 + [] -> classify_noun lemma stem interps class_sel
  185 + | [s] -> if s = suf then cl else classify_noun lemma stem interps class_sel
  186 + | _ -> print_endline ("classify_noun multiple class: " ^ lemma ^ " " ^ String.concat " " l);
  187 + classify_noun lemma stem interps class_sel)*)
  188 + | [] -> (*print_endline ("classify_noun unknown class: " ^ lemma);*) "X"
  189 +
  190 +let entry_classes =
  191 + List.flatten (Xlist.map ["m1";"m2";"m3";"n1";"n2";"f";"p1";"p2";"p3"] (fun gender ->
  192 + Xlist.map ["ii";"ji";"yj"] (fun sufix ->
  193 + "subst:pl:gen:" ^ gender, sufix,"II"))) @
  194 + List.flatten (Xlist.map ["m1";"m2";"m3";"n1";"n2";"f"] (fun gender ->
  195 + Xlist.map ["a"] (fun sufix ->
  196 + "subst:sg:nom:" ^ gender, sufix,"A"))) @
  197 + List.flatten (Xlist.map ["m1";"m2";"m3";"n1";"n2";"f"] (fun gender ->
  198 + Xlist.map ["ę"] (fun sufix ->
  199 + "subst:sg:acc:" ^ gender, sufix,"Ę"))) @
  200 + List.flatten (Xlist.map ["m1";"m2";"m3";"n1";"n2";"f"] (fun gender ->
  201 + Xlist.map ["ą"] (fun sufix ->
  202 + "subst:sg:inst:" ^ gender, sufix,"Ą"))) @
  203 +(* List.flatten (Xlist.map ["m1";"m2";"m3";"n1";"n2";"f";"p1";"p2";"p3"] (fun gender ->
  204 + Xlist.map ["ym";"im";"m"] (fun sufix ->
  205 + "subst:pl:dat:" ^ gender, sufix,"ADJ"))) @
  206 + List.flatten (Xlist.map ["m1";"m2";"m3";"n1";"n2";"f"] (fun gender ->
  207 + Xlist.map ["a","A";"o","O";"e","E"] (fun (sufix,s) ->
  208 + "subst:sg:nom:" ^ gender, sufix,s))) @*)
  209 +[
  210 + "subst:sg:nom:n2","um","UM";
  211 + ]
  212 +
  213 +let generate_rules_entry stem_sel rules lemma forms =
  214 + let stem = Stem.generate_stem stem_sel lemma forms in
  215 + let stem_pref = Stem.cut_stem_sufix stem in
  216 + let cl = classify_entry lemma stem forms entry_classes in
  217 + let forms = Rules.select_not_validated lemma forms in
  218 + Xlist.fold forms rules (fun rules (orth,interp) ->
  219 + let rule = cl ^ "\t" ^ generate_rule stem stem_pref orth in
  220 + let rules2 = try StringMap.find rules interp with Not_found -> StringMap.empty in
  221 + let rules2 = StringMap.add_inc rules2 rule (1,[lemma]) (fun (q,l) -> q+1, if q < 20 then lemma :: l else l) in
  222 + StringMap.add rules interp rules2)
  223 +
  224 +let generate_rules_com_entry stem_sel rules lemma forms =
  225 + let stem = Stem.generate_stem stem_sel lemma forms in
  226 + let stem_pref = Stem.cut_stem_sufix stem in
  227 +(* let cl = classify_entry lemma stem forms entry_classes in *)
  228 + let forms = Rules.select_not_validated lemma forms in
  229 + Xlist.fold forms rules (fun rules (orth,interp) ->
  230 + if not (Rules.check_sufix ":com" interp) then rules else
  231 + let rule = "\t" ^ generate_rule stem stem_pref orth in
  232 + let rules2 = try StringMap.find rules interp with Not_found -> StringMap.empty in
  233 + let rules2 = StringMap.add_inc rules2 rule (1,[lemma]) (fun (q,l) -> q+1, if q < 20 then lemma :: l else l) in
  234 + StringMap.add rules interp rules2)
  235 +
  236 +let generate_rules_entry_lemma_as_stem stem_sel stem_sel2 rules lemma forms =
  237 + let stem = Stem.generate_stem_lemma_as_stem stem_sel lemma in
  238 + let stem = if stem = "" then Stem.generate_stem stem_sel2 lemma forms else stem in
  239 + if stem = "" then rules else
  240 + let stem_pref = Stem.cut_stem_sufix stem in
  241 + let cl = classify_entry lemma stem forms entry_classes in
  242 + let forms = Rules.select_not_validated lemma forms in
  243 + Xlist.fold forms rules (fun rules (orth,interp) ->
  244 + let rule = cl ^ "\t" ^ generate_rule stem stem_pref orth in
  245 + let rules2 = try StringMap.find rules interp with Not_found -> StringMap.empty in
  246 + let rules2 = StringMap.add_inc rules2 rule (1,[lemma]) (fun (q,l) -> q+1, if q < 20 then lemma :: l else l) in
  247 + StringMap.add rules interp rules2)
  248 +
  249 +let generate_rules_com_entry_lemma_as_stem stem_sel rules lemma forms =
  250 + let stem = Stem.generate_stem_lemma_as_stem stem_sel lemma in
  251 + let stem_pref = Stem.cut_stem_sufix stem in
  252 +(* let cl = classify_entry lemma stem forms entry_classes in *)
  253 + let forms = Rules.select_not_validated lemma forms in
  254 + Xlist.fold forms rules (fun rules (orth,interp) ->
  255 + if not (Rules.check_sufix ":com" interp) then rules else
  256 + let rule = "\t" ^ generate_rule stem stem_pref orth in
  257 + let rules2 = try StringMap.find rules interp with Not_found -> StringMap.empty in
  258 + let rules2 = StringMap.add_inc rules2 rule (1,[lemma]) (fun (q,l) -> q+1, if q < 20 then lemma :: l else l) in
  259 + StringMap.add rules interp rules2)
  260 +
  261 +
  262 +
  263 +
  264 +
  265 +
  266 +
  267 +
  268 +
0 269 \ No newline at end of file
... ...
guesser/rules.ml 0 → 100644
  1 +open Xstd
  2 +open Printf
  3 +
  4 +let check_prefix pat s =
  5 + let n = String.length pat in
  6 + if n > String.length s then false else
  7 + String.sub s 0 n = pat
  8 +
  9 +let cut_prefix pat s =
  10 + let i = String.length pat in
  11 + let n = String.length s in
  12 + if i >= n then "" else
  13 + try String.sub s i (n-i) with _ -> failwith ("cut_prefix: " ^ s ^ " " ^ string_of_int i)
  14 +
  15 +let check_sufix pat s =
  16 + let n = String.length pat in
  17 + let m = String.length s in
  18 + if n > m then false else
  19 + String.sub s (m-n) n = pat
  20 +
  21 +let cut_sufix pat s =
  22 + let i = String.length pat in
  23 + let n = String.length s in
  24 + try String.sub s 0 (n-i) with _ -> failwith ("cut_sufix: " ^ s)
  25 +
  26 +
  27 +
  28 +let revert_alternations l =
  29 + Xlist.map l (fun (a,b,c) -> a,c,b)
  30 +
  31 +let alternation_map = Xlist.fold Schemata.alternations StringMap.empty (fun map (k,v) ->
  32 + StringMap.add map k v)
  33 +
  34 +let rev_alternation_map = Xlist.fold Schemata.alternations StringMap.empty (fun map (k,v) ->
  35 + StringMap.add map k (revert_alternations v))
  36 +
  37 +let translate_tags = function
  38 + ["adj";n;c;g;d] -> ["cat","adj";"number",n;"case",c;"gender",g;"grad",d]
  39 + | ["adja"] -> ["cat","adja"]
  40 + | ["adjc"] -> ["cat","adjc"]
  41 + | ["adjp"] -> ["cat","adjp"]
  42 + | ["subst";n;c;g] -> ["cat","subst";"number",n;"case",c;"gender",g]
  43 + | ["depr";n;c;g] -> ["depr","subst";"number",n;"case",c;"gender",g]
  44 + | l -> failwith ("translate_tags: " ^ String.concat ":" l)
  45 +
  46 +let retranslate_tags = function
  47 + ["cat","adj";"number",n;"case",c;"gender",g;"grad",d] -> ["adj";n;c;g;d]
  48 + | ["cat","adja"] -> ["adja"]
  49 + | ["cat","adjc"] -> ["adjc"]
  50 + | ["cat","adjp"] -> ["adjp"]
  51 + | ["cat","subst";"number",n;"case",c;"gender",g] -> ["subst";n;c;g]
  52 + | ["depr","subst";"number",n;"case",c;"gender",g] -> ["depr";n;c;g]
  53 + | l -> failwith ("retranslate_tags: " ^ String.concat " " (Xlist.map l (fun (k,v) -> k ^ "=" ^ v)))
  54 +
  55 +let expand_tags x = function
  56 + Schemata.M tags ->
  57 + List.flatten (Xlist.map (Str.split (Str.regexp "|") tags) (fun tags ->
  58 + let tags = Xlist.map (Str.split (Str.regexp ":") tags) (Str.split (Str.regexp "\\.")) in
  59 + Xlist.map (Xlist.multiply_list tags) translate_tags))
  60 + | Schemata.T(k,v) -> [[k,v]]
  61 + | Schemata.A k -> [[k,x]]
  62 +(* | Schemata.N -> [[]] *)
  63 +
  64 +let expand_tag_list x l =
  65 + Xlist.map (Xlist.multiply_list (Xlist.map l (expand_tags x))) List.flatten
  66 +
  67 +let prepare_rules l =
  68 + Xlist.fold l [] (fun rules (alternation_name, sufix, tags) ->
  69 + let alternation = try StringMap.find alternation_map alternation_name with Not_found -> failwith ("prepare_rules " ^ alternation_name) in
  70 + Xlist.fold alternation rules (fun rules (c,a,b) ->
  71 + (a ^ sufix, b, expand_tag_list c tags) :: rules))
  72 +
  73 +let prepare_rev_rules l =
  74 + Xlist.fold l [] (fun rules (alternation_name, sufix, tags) ->
  75 + let alternation = try StringMap.find rev_alternation_map alternation_name with Not_found -> failwith ("prepare_rules " ^ alternation_name) in
  76 + Xlist.fold alternation rules (fun rules (c,a,b) ->
  77 + (a, b ^ sufix, expand_tag_list c tags) :: rules))
  78 +
  79 +let rule_map =
  80 + let map = Xlist.fold Schemata.rules StringMap.empty (fun map (k,v) -> StringMap.add map k (prepare_rules v)) in
  81 + Xlist.fold Schemata.rev_rules map (fun map (k,v) -> StringMap.add map k (prepare_rev_rules v))
  82 +
  83 +let is_applicable_rule (a,_,_) s = check_sufix a s
  84 +
  85 +let apply_rule (a,b,_) s =
  86 + (cut_sufix a s) ^ b
  87 +
  88 +let get_tags (_,_,l) = l
  89 +
  90 +let rec extract_tag s rev = function
  91 + [] -> "", List.rev rev
  92 + | (k,v) :: l -> if s = k then v, List.rev rev @ l else extract_tag s ((k,v) :: rev) l
  93 +
  94 +let apply_rules orth =
  95 + let found = Xlist.fold Schemata.schemata [] (fun found schema ->
  96 + let interps = Xlist.fold schema [orth,[]] (fun interps rule_set_name ->
  97 + let rules = try StringMap.find rule_map rule_set_name with Not_found -> failwith ("apply_rules: " ^ rule_set_name) in
  98 + Xlist.fold interps [] (fun interps (orth,tags) ->
  99 + Xlist.fold rules interps (fun interps rule ->
  100 + if is_applicable_rule rule orth then
  101 + let orth = apply_rule rule orth in
  102 + Xlist.fold (get_tags rule) interps (fun interps new_tags ->
  103 + (orth, new_tags @ tags) :: interps)
  104 + else interps))) in
  105 + interps @ found) in
  106 + Xlist.rev_map found (fun (orth,tags) ->
  107 + let suf,tags = extract_tag "suf" [] tags in
  108 + orth ^ suf, tags)
  109 +
  110 +let expand_tags tags =
  111 + if tags = "" then [] else
  112 + List.flatten (Xlist.map (Str.split (Str.regexp "|") tags) (fun tags ->
  113 + let tags = Xlist.map (Str.split (Str.regexp ":") tags) (Str.split (Str.regexp "\\.")) in
  114 + Xlist.map (Xlist.multiply_list tags) (String.concat ":")))
  115 +
  116 +let validate_form orth lemma interp =
  117 +(* printf "form %s %s %s%!" orth lemma interp; *)
  118 + let found = apply_rules orth in
  119 + let b = Xlist.fold found false (fun b (orth,_) ->
  120 + if orth = lemma then true else b) in
  121 +(* let found = StringSet.of_list (Xlist.fold found [] (fun found (orth,tags) ->
  122 + if lemma = orth then (String.concat ":" (retranslate_tags tags)) :: found else found)) in
  123 + let b = Xlist.fold (expand_tags interp) true (fun b interp ->
  124 + if StringSet.mem found interp then b else false) in*)
  125 +(* if b then printf " validated\n%!" else printf " not validated\n%!"; *)
  126 + b
  127 +
  128 +let simplify_lemma s =
  129 + match Str.split (Str.regexp ":") s with
  130 + [s] -> s
  131 + | [s;_] -> s
  132 + | _ -> failwith "simplify_lemma"
  133 +
  134 +let validate_entry lemma forms =
  135 + let lemma = simplify_lemma lemma in
  136 + let b = Xlist.fold forms true (fun b (orth,interp) ->
  137 + if validate_form orth lemma interp then b else false) in
  138 +(* if b then printf "entry %s validated\n%!" lemma else printf "entry %s not validated\n%!" lemma; *)
  139 + b
  140 +
  141 +let select_not_validated lemma forms =
  142 + let lemma = simplify_lemma lemma in
  143 + Xlist.fold forms [] (fun l (orth,interp) ->
  144 + if validate_form orth lemma interp then l else (orth,interp) :: l)
  145 +
  146 +let find_tags_form orth lemma interp =
  147 + let found = apply_rules orth in
  148 + let found = Xlist.fold found [] (fun found (orth,tags) ->
  149 + if orth = lemma then (orth,tags) :: found else found) in
  150 + found
  151 +
  152 +let find_tags_entry lemma forms =
  153 + let lemma = simplify_lemma lemma in
  154 + let l = Xlist.fold forms [] (fun l (orth,interp) ->
  155 + let found = find_tags_form orth lemma interp in
  156 + (orth,found,interp) :: l) in
  157 +(* if b then printf "entry %s validated\n%!" lemma else printf "entry %s not validated\n%!" lemma; *)
  158 + l
  159 +
  160 +let string_of_tags tags =
  161 + String.concat " " (Xlist.map (List.sort compare tags) (fun (k,v) -> k ^ "=" ^ v))
  162 +
  163 +let rec select_tag tag rev = function
  164 + [] -> "", rev
  165 + | (k,v) :: l -> if k = tag then v, rev @ l else select_tag tag ((k,v) :: rev) l
  166 +
  167 +let print path =
  168 + StringMap.iter rule_map (fun name rules ->
  169 + File.file_out (path ^ name ^ ".dic") (fun file ->
  170 + let map = Xlist.fold rules StringMap.empty (fun map (a,b,tagsl) ->
  171 + Xlist.fold tagsl map (fun map tags ->
  172 + let con,tags = select_tag "con" [] tags in
  173 + StringMap.add_inc map (string_of_tags tags) [a,b,con] (fun l -> (a,b,con) :: l))) in
  174 + StringMap.iter map (fun tags l ->
  175 + fprintf file "@RULES\t%s\n" tags;
  176 + Xlist.iter l (fun (a,b,con) ->
  177 + if con = "" then fprintf file "\t%s\t%s\t#\n" a b
  178 + else fprintf file "\t%s\t%s\tcon=%s\t#\n" a b con);
  179 + fprintf file "\n")));
  180 + File.file_out (path ^ "SCHEMATA.dic") (fun file ->
  181 + Xlist.iter Schemata.schemata (fun schema ->
  182 + fprintf file "@SCHEMA\t%s\n\n" (String.concat " " schema)))
  183 +
0 184 \ No newline at end of file
... ...
guesser/schemata.ml 0 → 100644
  1 +let alternations = [
  2 + "dowolne", ["", "", ""];
  3 +
  4 + "funkcjonalnie_miekkie_iy", [
  5 + "b'", "bi", "b'"; "ć", "ci", "ć"; "dź", "dzi", "dź"; "f'", "fi", "f'"; "ḿ", "mi", "ḿ"; "ń", "ni", "ń"; "ṕ", "pi", "ṕ"; "ś", "si", "ś"; "ẃ", "wi", "ẃ"; "ź", "zi", "ź";
  6 + "l", "li", "l";
  7 + "c", "cy", "c"; "cz", "czy", "cz"; "dz", "dzy", "dz"; "dż", "dży", "dż"; "rz", "rzy", "rz"; "sz", "szy", "sz"; "ż", "ży", "ż";
  8 + "aj", "ai", "aj"; "ej", "ei", "ej"; "ij", "ii", "ij"; "oj", "oi", "oj"; "ój", "ói", "ój"; "uj", "ui", "uj"; "yj", "yi", "yj";
  9 + ];
  10 + "funkcjonalnie_miekkie_ae", [
  11 + "b'", "bi", "b'"; "ć", "ci", "ć"; "dź", "dzi", "dź"; "f'", "fi", "f'"; "ḿ", "mi", "ḿ"; "ń", "ni", "ń"; "ṕ", "pi", "ṕ"; "ś", "si", "ś"; "ẃ", "wi", "ẃ"; "ź", "zi", "ź";
  12 + "l", "l", "l";
  13 + "c", "c", "c"; "cz", "cz", "cz"; "dz", "dz", "dz"; "dż", "dż", "dż"; "rz", "rz", "rz"; "sz", "sz", "sz"; "ż", "ż", "ż";
  14 + "aj", "aj", "aj"; "ej", "ej", "ej"; "ij", "ij", "ij"; "oj", "oj", "oj"; "ój", "ój", "ój"; "uj", "uj", "uj"; "yj", "yj", "yj";
  15 + "c->cz", "cz", "c";
  16 + ];
  17 + "funkcjonalnie_miekkie_wyglos", [
  18 + "b'", "b", "b'"; "b'", "ąb", "ęb'"; "b'", "ób", "ob'";
  19 + "ć", "ć", "ć"; "ć", "óć", "oć"; "ć", "eć", "ć"; "ć", "ieć", "ć";
  20 + "dź", "dź", "dź"; "dź", "ódź", "odź"; "dź", "ądź", "ędź"; "dź", "óźdź", "oźdź";
  21 + "f'", "f", "f'";
  22 + "ḿ", "m", "ḿ";
  23 + "ń", "ń", "ń"; "ń", "eń", "ń"; "ń", "ień", "ń"; "ń", "cień", "tń"; "ń", "dzień", "dń"; "ń", "sień", "śń"; "ń", "zień", "źń"; "ń", "dzień", "edń";
  24 + "ṕ", "p", "ṕ";
  25 + "ś", "ś", "ś"; "ś", "ieś", "ś";
  26 + "ẃ", "w", "ẃ"; "ẃ", "ew", "ẃ"; "ẃ", "iew", "ẃ"; "ẃ", "ów", "oẃ";
  27 + "ź", "ź", "ź"; "ź", "óź", "oź"; "ź", "ąź", "ęź";
  28 + "l", "l", "l"; "l", "el", "l"; "l", "iel", "l"; "l", "ól", "ol"; "l", "ódl", "odl";
  29 + "c", "c", "c"; "c", "ec", "c"; "c", "iec", "c"; "c", "niec", "ńc"; "c", "rzec", "rc"; "c", "siec", "śc"; "c", "ciec", "ćc"; "c", "dziec", "dc"; "c", "dziec", "dźc"; "c", "niec", "ieńc"; "c", "ziec", "źc";
  30 + "cz", "cz", "cz"; "cz", "ecz", "cz"; "cz", "ócz", "ocz";
  31 + "dz", "dz", "dz"; "dz", "ódz", "odz";
  32 + "dż", "dż", "dż";
  33 + "rz", "rz", "rz"; "rz", "erz", "rz"; "rz", "ierz", "rz"; "rz", "órz", "orz"; "rz", "ójrz", "ojrz";
  34 + "sz", "sz", "sz";
  35 + "ż", "ż", "ż"; "ż", "eż", "ż"; "ż", "óż", "oż"; "ż", "ąż", "ęż";
  36 + "aj", "aj", "aj";
  37 + "ej", "ej", "ej";
  38 + "ij", "ij", "ij";
  39 + "oj", "oj", "oj"; "oj", "ój", "oj";
  40 + "ój", "ój", "ój";
  41 + "uj", "uj", "uj";
  42 + "yj", "yj", "yj";
  43 + ];
  44 +
  45 + "funkcjonalnie_twarde_y", [
  46 + "b", "by", "b"; "ch", "chy", "ch"; "d", "dy", "d"; "f", "fy", "f"; "h", "hy", "h"; "ł", "ły", "ł"; "m", "my", "m"; "n", "ny", "n";
  47 + "p", "py", "p"; "r", "ry", "r"; "s", "sy", "s"; "sz", "szy", "sz"; "t", "ty", "t"; "v", "vy", "v"; "w", "wy", "w"; "z", "zy", "z";
  48 + "g", "gi", "g"; "k", "ki", "k";
  49 + "a", "ay", "a"; "e", "ey", "e"; "o", "oy", "o"; "u", "uy", "u";
  50 + ];
  51 + "funkcjonalnie_twarde_e", [
  52 + "b", "b", "b"; "ch", "ch", "ch"; "d", "d", "d"; "f", "f", "f"; "h", "h", "h"; "ł", "ł", "ł"; "m", "m", "m"; "n", "n", "n";
  53 + "p", "p", "p"; "r", "r", "r"; "s", "s", "s"; "sz", "sz", "sz"; "t", "t", "t"; "v", "v", "v"; "w", "w", "w"; "z", "z", "z";
  54 + "g", "gi", "g"; "k", "ki", "k";
  55 + "a", "a", "a"; "e", "e", "e"; "o", "o", "o"; "u", "u", "u";
  56 + ];
  57 + "funkcjonalnie_twarde_a", [
  58 + "b", "b", "b"; "ch", "ch", "ch"; "d", "d", "d"; "f", "f", "f"; "h", "h", "h"; "ł", "ł", "ł"; "m", "m", "m"; "n", "n", "n";
  59 + "p", "p", "p"; "r", "r", "r"; "s", "s", "s"; "sz", "sz", "sz"; "t", "t", "t"; "v", "v", "v"; "w", "w", "w"; "z", "z", "z";
  60 + "g", "g", "g"; "k", "k", "k";
  61 + "a", "a", "a"; "e", "e", "e"; "o", "o", "o"; "u", "u", "u";
  62 + ];
  63 + "funkcjonalnie_twarde_i", [
  64 + "b", "bi", "b"; "ch", "si", "ch"; "d", "dzi", "d"; "d", "edzi", "ad"; "f", "fi", "f"; "h", "zi", "h";
  65 + "ł", "li", "ł"; "ł", "eli", "oł"; "ł", "śli", "sł"; "ł", "źli", "zł"; "ł", "rźli", "rzł";
  66 + "m", "mi", "m"; "m", "śmi", "sm";
  67 + "n", "ni", "n"; "n", "eni", "on"; "n", "eni", "ion"; "n", "śni", "sn"; "n", "źni", "zn";
  68 + "p", "pi", "p"; "r", "rzy", "r"; "s", "si", "s"; "sz", "si", "sz";
  69 + "t", "ci", "t"; "t", "ści", "st"; "t", "eci", "ot";
  70 + "v", "vi", "v"; "w", "wi", "w"; "z", "zi", "z"; "ż", "zi", "ż";
  71 + "g", "dzy", "g"; "k", "cy", "k";
  72 + "a", "ai", "a"; "e", "ei", "e"; "o", "oi", "o"; "u", "ui", "u";
  73 +(* "", "rzy", "er";*)
  74 + ];
  75 +
  76 + "funkcjonalnie_twarde_ie", [
  77 + "b", "bie", "b"; "ch", "sze", "ch";
  78 + "d", "dzie", "d"; "d", "ździe", "zd"; "d", "edzie", "ad"; "d", "edzie", "od"; "d", "eździe", "azd";
  79 + "f", "fie", "f"; "h", "sze", "h"; "h", "że", "h";
  80 + "ł", "le", "ł"; "ł", "śle", "sł"; "ł", "źle", "zł"; "ł", "ele", "ał"; "ł", "ele", "oł"; "ł", "etle", "atł"; "ł", "lle", "łł";
  81 + "m", "mie", "m"; "m", "śmie", "sm";
  82 + "n", "nie", "n"; "n", "enie", "on"; "n", "śnie", "sn"; "n", "źnie", "zn";
  83 + "p", "pie", "p";
  84 + "r", "rze", "r"; "r", "erze", "ar"; "r", "etrze", "atr"; "r", "rze", "rr";
  85 + "s", "sie", "s"; "s", "esie", "as";
  86 + "t", "cie", "t"; "t", "ecie", "at"; "t", "ecie", "ot"; "t", "ście", "st"; "t", "eście", "ast";
  87 + "v", "vie", "v"; "w", "wie", "w"; "z", "zie", "z";
  88 + "g", "dze", "g"; "k", "ce", "k";
  89 + ];
  90 +
  91 + "funkcjonalnie_twarde_wyglos", [
  92 + "b", "b", "b"; "b", "eb", "b"; "b", "ób", "ob"; "b", "ąb", "ęb"; "b", "óśb", "ośb"; "b", "óźb", "oźb";
  93 + "ch","ch","ch";"ch","ech","ch";
  94 + "d", "d", "d"; "d", "ed", "d"; "d", "ód", "od"; "d", "ąd", "ęd";
  95 + "f", "f", "f";
  96 + "h", "h", "h";
  97 + "ł", "ł", "ł"; "ł", "eł", "ł"; "ł", "ieł", "ł"; "ł", "el", "oł"; "ł", "ół", "oł"; "ł", "ioł", "ł"; "ł", "rzeł", "rł"; "ł", "cieł", "tł"; "ł", "cioł", "tł";
  98 + "m", "m", "m"; "m", "em", "m"; "m", "ciem", "ćm";
  99 + "n", "n", "n"; "n", "en", "n"; "n", "ien", "n"; "n", "dzien", "dn"; "n", "zien", "źn"; "n", "cien", "tn"; "n", "sien", "śn";
  100 + "p", "p", "p"; "p", "ep", "p"; "p", "iep", "p"; "p", "óp", "op";
  101 + "r", "r", "r"; "r", "er", "r"; "r", "ier", "r"; "r", "ór", "or"; "r", "cer", "kr"; "r", "óbr", "obr"; "r", "óstr", "ostr";
  102 + "s", "s", "s"; "s", "ies", "s";
  103 + "sz","sz","sz";"sz","esz","sz";
  104 + "t", "t", "t"; "t", "et", "t"; "t", "ót", "ot"; "t", "ąt", "ęt";
  105 + "v", "v", "v";
  106 + "w", "w", "w"; "w", "ew", "w"; "w", "iew", "w"; "w", "ów", "ow";
  107 + "x", "x", "ks";
  108 + "z", "z", "z"; "z", "ez", "z"; "z", "iez", "z"; "z", "óz", "oz"; "z", "ąz", "ęz";
  109 + "g", "g", "g"; "g", "eg", "g"; "g", "óg", "og"; "g", "órg", "org"; "g", "ąg", "ęg";
  110 + "k", "k", "k"; "k", "ek", "k"; "k", "ciek", "ćk"; "k", "dziek", "dźk"; "k", "niek", "ńk"; "k", "siek", "śk"; "k", "ziek", "źk"; "k", "ąk", "ęk";
  111 + "a", "a", "a"; "e", "e", "e"; "o", "o", "o"; "u", "u", "u";
  112 + ];
  113 +
  114 + "funkcjonalnie_miekkie_ii", [
  115 + "ai", "ai", "ai";
  116 + "bi", "bi", "bi";
  117 +(* "ci", "ci", "ci"; *)
  118 + "chi", "chi", "chi";
  119 +(* "czi", "czi", "czi"; *)
  120 + "di", "di", "di";
  121 + "dżi", "dżi", "dżi";
  122 + "fi", "fi", "fi";
  123 + "gi", "gi", "gi";
  124 + "ki", "ki", "ki";
  125 + "li", "li", "li";
  126 + "mi", "mi", "mi";
  127 + "ni", "ni", "ni";
  128 + "pi", "pi", "pi";
  129 + "qui", "qui", "qui";
  130 + "ri", "ri", "ri";
  131 + "ti", "ti", "ti";
  132 + "vi", "vi", "vi";
  133 + "wi", "wi", "wi";
  134 + "xi", "xi", "xi";
  135 + "cj", "cj", "cj";
  136 + "czj", "czj", "czj";
  137 + "sj", "sj", "sj";
  138 + "szj", "szj", "szj";
  139 + "zj", "zj", "zj";
  140 + "żi", "żi", "żi";
  141 + ];
  142 +
  143 + "funkcjonalnie_miekkie_ii_wyglos", [
  144 + "bi", "bij", "bi";
  145 + "ci", "cyj", "ci";
  146 + "chi", "chij", "chi";
  147 +(* "czi", "czi", "czi"; *)
  148 + "di", "dyj", "di";
  149 + "dżi", "dżij", "dżi";
  150 + "fi", "fij", "fi";
  151 + "gi", "gij", "gi";
  152 + "ki", "kij", "ki";
  153 + "li", "lij", "li";
  154 + "mi", "mij", "mi";
  155 + "ni", "nij", "ni";
  156 + "pi", "pij", "pi";
  157 + "qui", "quij", "qui";
  158 + "ri", "ryj", "ri";
  159 + "ti", "tyj", "ti";
  160 + "vi", "vij", "vi";
  161 + "wi", "wij", "wi";
  162 + "cj", "cyj", "cj";
  163 + "czj", "czyj", "czj";
  164 + "sj", "syj", "sj";
  165 + "szj", "szyj", "szj";
  166 + "zj", "zyj", "zj";
  167 + ];
  168 +
  169 + "kontrakcje", [
  170 + "t", "ę", "ęt";
  171 + "t", "ęci", "ęt";
  172 + "t", "ęci", "ążęt";
  173 + "n", "ę", "on";
  174 + "n", "eni", "on";
  175 + "di", "dion", "di";
  176 + "n", "anin", "an";
  177 + "n", "o", "on";
  178 + "t", "a", "at";
  179 + ];
  180 +
  181 + "kapitaliki_y", [
  182 + "B", "B-y", "B"; "C", "C-i", "C"; "D", "D-y", "D"; "F", "F-y", "F"; "G", "G-i", "G"; "H", "H-y", "H";
  183 + "I", "I-i", "I"; "J", "J-i", "J"; "J", "J-oty", "J"; "K", "K-i", "K"; "L", "L-i", "L"; "M", "M-y", "M"; "N", "N-y", "N"; "P", "P-y", "P"; "R", "R-y", "R"; "S", "S-y", "S";
  184 + "T", "T-y", "T"; "V", "V-y", "V"; "W", "W-y", "W"; "X", "X-y", "X"; "Z", "Z-y", "Z"; "Z", "Z-ety", "Z";
  185 + "z", "z-ety", "z"; "Ż", "Ż-ety", "Ż"; "f", "f-y", "f"; "m", "m-y", "m"; "r", "r-y", "r";
  186 + "s", "s-y", "s"; "z", "z-y", "z"; "Ł", "Ł-y", "Ł"; "Ś", "Ś-y", "Ś"; "Ż", "Ż-y", "Ż"; "l", "l-i", "l";
  187 + ];
  188 +
  189 + "kapitaliki_a", [
  190 + "B", "B-", "B"; "C", "C-", "C"; "D", "D-", "D"; "E", "E-", "E"; "F", "F-", "F"; "G", "G-", "G"; "H", "H-", "H"; "I", "I-", "I";
  191 + "J", "J-", "J"; "J", "J-ot", "J"; "K", "K-", "K"; "L", "L-", "L"; "M", "M-", "M"; "N", "N-", "N"; "P", "P-", "P"; "R", "R-", "R"; "S", "S-", "S";
  192 + "T", "T-", "T"; "V", "V-", "V"; "W", "W-", "W"; "X", "X-", "X"; "Z", "Z-", "Z"; "Z", "Z-et", "Z";
  193 + "z", "z-et", "z"; "Ż", "Ż-et", "Ż"; "f", "f-", "f"; "l", "l-", "l"; "m", "m-", "m"; "r", "r-", "r";
  194 + "s", "s-", "s"; "z", "z-", "z"; "Ł", "Ł-", "Ł"; "Ś", "Ś-", "Ś"; "Ż", "Ż-", "Ż";
  195 + ];
  196 +
  197 + "kapitaliki_e", [
  198 + "B", "B-", "B"; "C", "C-", "C"; "C", "C-i", "C"; "D", "D-", "D"; "E", "E-", "E"; "F", "F-", "F"; "G", "G-i", "G"; "H", "H-", "H"; "I", "I-", "I";
  199 + "J", "J-", "J"; "J", "J-ot", "J"; "K", "K-i", "K"; "L", "L-", "L"; "M", "M-", "M"; "N", "N-", "N"; "P", "P-", "P"; "R", "R-", "R"; "S", "S-", "S";
  200 + "T", "T-", "T"; "V", "V-", "V"; "W", "W-", "W"; "X", "X-", "X"; "Z", "Z-", "Z"; "Z", "Z-et", "Z";
  201 + "z", "z-et", "z"; "Ż", "Ż-et", "Ż"; "f", "f-", "f"; "l", "l-", "l"; "m", "m-", "m"; "r", "r-", "r";
  202 + "s", "s-", "s"; "z", "z-", "z"; "Ł", "Ł-", "Ł"; "Ś", "Ś-", "Ś"; "Ż", "Ż-", "Ż";
  203 + ];
  204 +
  205 + "kapitaliki_ie", [
  206 + "B", "B-ie", "B"; "D", "D-zie", "D"; "F", "F-ie", "F"; "J", "J-ocie", "J"; "M", "M-ie", "M"; "N", "N-ie", "N"; "P", "P-ie", "P"; "R", "R-ze", "R"; "S", "S-ie", "S";
  207 + "T", "-cie", "T"; "V", "V-ie", "V"; "W", "W-ie", "W"; "X", "X-ie", "X"; "Z", "Z-ie", "Z"; "Z", "Z-ecie", "Z";
  208 + "z", "z-ecie", "z"; "Ż", "Ż-ecie", "Ż"; "f", "f-ie", "f"; "m", "m-ie", "m"; "s", "s-ie", "s"; "r", "r-ze", "r";
  209 + ];
  210 +
  211 + "kapitaliki_wyglos", [
  212 + "B", "B", "B"; "C", "C", "C"; "D", "D", "D"; "E", "E", "E"; "F", "F", "F"; "G", "G", "G"; "H", "H", "H"; "I", "I", "I";
  213 + "J", "J", "J"; "K", "K", "K"; "L", "L", "L"; "M", "M", "M"; "N", "N", "N"; "P", "P", "P"; "R", "R", "R"; "S", "S", "S";
  214 + "T", "T", "T"; "V", "V", "V"; "W", "W", "W"; "X", "X", "X"; "Z", "Z", "Z";
  215 + "Ł", "Ł", "Ł"; "Ś", "Ś", "Ś"; "Ż", "Ż", "Ż";
  216 + (*"z", "z", "z"; "f", "f", "f"; "l", "l", "l"; "m", "m", "m"; "r", "r", "r";
  217 + "s", "s", "s"; "z", "z", "z";*)
  218 + ];
  219 +
  220 +(*
  221 + "funkcjonalnie_miekkie_nowe", [
  222 + "chi", "chi"; "czi", "czi"; "di", "di"; "dżi", "dżi"; "gi", "gi";
  223 + "ki", "ki"; "li", "li"; "ri", "ri"; "ti", "ti"; "vi", "vi";
  224 + "cj", "cj"; "czj", "czj"; "sj", "sj";
  225 + "szj", "szj"; "zj", "zj"; "ui", "ui"; "ai", "ai";
  226 + "żi", "żi"; (*"nj", "nj"; "lj", "lj"; "pj", "pj";*)
  227 + (*"dhi", "dhi";*) "xi", "xi"; (*"yi", "yi";*) (*"ři", "ři"; "şi", "şi"; *)
  228 + (*"í", "í";*) (*"", "";
  229 + "", ""; "", ""; "", ""; "", ""; "", ""; "", ""; "", ""; "", ""; *)
  230 + ];
  231 +
  232 +
  233 + "funkcjonalnie_miekkie_nowe_wyglos", [
  234 + "chi", "chi";
  235 + "czi", "czi";
  236 + "di", "di";
  237 +(* "dhi", "dhi"; *)
  238 + "dżi", "dżi";
  239 + "li", "li";
  240 + "ri", "ri";
  241 + "ti", "ti";
  242 + "vi", "vi";
  243 + "xi", "xi";
  244 + "yi", "yi";
  245 +(* "ři", "ři";
  246 + "şi", "şi";
  247 + "ij", "ij";
  248 + "t", "ti"; *)
  249 + ];
  250 +
  251 + "samogloski", [
  252 + "a", "a"; "e", "e"; "o", "o"; "u", "u";
  253 + ];
  254 +
  255 + "funkcjonalnie_twarde_nowe", [
  256 + (*"dh", "dh";*) "dź", "dź";
  257 + (*"rh", "rh";*) "v", "v"; (*"gh", "gh"; "nh", "nh"; *)
  258 + (*"q", "q";*) "x", "x"; (*"", ""; "", ""; "", ""; "", ""; "", ""; "", "";
  259 + "", ""; "", ""; "", ""; "", "";
  260 + "", ""; "", ""; "", ""; "", ""; "", ""; "", ""; "", ""; "", "";
  261 + "", ""; "", ""; "", ""; "", ""; "", ""; "", ""; "", ""; "", ""; *)
  262 + ];
  263 +
  264 + "funkcjonalnie_twarde_nowe_y", [
  265 + "ay", "a"; (*"dhy", "dh"; "dźy", "dź";*) "ey", "e";
  266 + "oy", "o"; (*"rhy", "rh";*) "uy", "u"; "vy", "v"; (*"ghi", "gh"; "nhy", "nh"; *)
  267 + (*"qy", "q";*) "xy", "x"; (*"ki", "c";*) "ai", "a"; (*"dźi", "dź";*) "ei", "e";
  268 + "oi", "o"; "ui", "u";
  269 + (*"", ""; "", ""; "", ""; "", "";
  270 + "", ""; "", ""; "", ""; "", ""; "", ""; "", ""; "", ""; "", "";
  271 + "", ""; "", ""; "", ""; "", ""; "", ""; "", ""; "", ""; "", ""; *)
  272 + ];
  273 +
  274 + "funkcjonalnie_twarde_nowe_ie", [ (* FIXME: przejrzeć czy faktycznie nowe *)
  275 + "ksie", "x"; "dzie", "dh"; "dze", "h"; "rsze", "rh"; "rże", "rh"; "dze", "gh";
  276 + "cie", "te"; "nie", "nh"; "rze", "rh"; "cie", "th";
  277 +(* "cie", "tes";
  278 + "cie", "the";
  279 + "cie", "thes"; *)
  280 + "cie", "tt";
  281 + "dzie", "de";
  282 + "edzie", "ad";
  283 +(* "fie", "ph"; *)
  284 +(* "fie", "phe"; *)
  285 + "obie", "ób";
  286 + "rze", "er";
  287 + "rze", "re";
  288 +(* "rze", "res"; *)
  289 + "rze", "rre";
  290 + "sie", "ce";
  291 +(* "sie", "th"; *)
  292 + "ąbie", "ębi";
  293 + "ście", "ste";
  294 +(* "ście", "stes"; *)
  295 + "śnie", "sne";
  296 + "ecie", "at";
  297 + "edzie", "od";
  298 + "esie", "as";
  299 + "etrze", "atr";
  300 + "kcie", "ct";
  301 + "ole", "ół"; (* wątpliwe *)
  302 + "orze", "ór"; (* wątpliwe *)
  303 + "oździe", "ózd"; (* wątpliwe *)
  304 + "ębie", "ąb";
  305 + "ędzie", "ąd";
  306 + "ęsie", "ąs";
  307 + "rze", "rs";
  308 + "ele", "ał";
  309 + "etle", "atł";
  310 + "śmie", "sm";
  311 + "vie", "v";
  312 + "rzie", "rz";
  313 + ];
  314 +
  315 + "funkcjonalnie_twarde_nowe_wyglos", [
  316 + "x", "ks";
  317 + "x", "s";
  318 +(* "dh", "dh";
  319 + "gh", "gh";
  320 + "nh", "nh";
  321 + "q", "q"; *)
  322 + "v", "v";
  323 +(* "rh", "rh"; *)
  324 + "dź", "dź";
  325 + ];
  326 +
  327 +
  328 + "obce_ch", [
  329 +(* "u", "u"; "y", "y"; "ee", "e";
  330 + "die", "di"; "pie", "pi"; "rie", "ri"; "tie", "ti"; "ne", "n";
  331 + "nii", "ni"; "rii", "ri"; "oji", "oj"; "zi", "z";
  332 + "gie", "g"; "kie", "k";
  333 + "sze", "ch"; "cze", "c"; "rze", "r"; "esie", "os"; (*"", ""; "", "";
  334 + "", ""; "", ""; "", ""; "", ""; "", ""; "", "";
  335 + "", ""; "", ""; "", ""; "", ""; "", ""; "", ""; *)*)
  336 + ];*)
  337 +
  338 + "sz", [
  339 + "sz", "sz", "sz";
  340 + ];
  341 +
  342 + "sz_i", [
  343 + "sz", "si", "sz";
  344 + ];
  345 +
  346 + "adj_grad_sz", [
  347 + "b", "b", "b";
  348 + "k", "b", "bk";
  349 + "k", "b", "bok";
  350 + "c", "ęt", "ąc";
  351 + "d", "d", "d";
  352 + "d", "ed", "ad";
  353 + "k", "d", "dk";
  354 + "h", "h", "h";
  355 + "g", "ż", "g";
  356 + "k", "k", "k";
  357 + "k", "k", "kk";
  358 + "ł", "l", "ł";
  359 + "ł", "el", "oł";
  360 + "ł", "el", "ał";
  361 + "k", "el", "ałk";
  362 + "k", "l", "lek";
  363 + "m", "m", "m";
  364 + "n", "ń", "n";
  365 + "n", "eń", "an";
  366 + "ń", "ń", "ń";
  367 + "k", "ń", "nk";
  368 + "n", "eń", "on";
  369 + "n", "ień", "on";
  370 + "p", "p", "p";
  371 + "ṕ", "p", "ṕ";
  372 + "k", "p", "pk";
  373 + "r", "r", "r";
  374 + "r", "ędr", "ądr";
  375 + "k", "r", "rok";
  376 + "k", "ż", "sk";
  377 + "k", "ż", "sok";
  378 + "k", "ęż", "ąsk";
  379 + "t", "t", "t";
  380 + "k", "t", "tk";
  381 + "k", "t", "ck";
  382 + "w", "w", "w";
  383 + "ż", "ż", "ż";
  384 + "k", "ż", "żk";
  385 + ];
  386 +
  387 + "adj_grad_iejsz", [
  388 + "d", "dzi", "d";
  389 + "k", "ci", "k";
  390 + "k", "ż", "ekk";
  391 + "ł", "l", "ł";
  392 + "ł", "śl", "sł";
  393 + "ł", "źl", "zł";
  394 + "m", "mi", "m";
  395 + "n", "ni", "n";
  396 + "n", "śni", "sn";
  397 + "n", "źni", "zn";
  398 + "ń", "ni", "ń";
  399 + "r", "rz", "r";
  400 + "t", "ci", "t";
  401 + "t", "ści", "st";
  402 + "w", "wi", "w";
  403 + ];
  404 +
  405 + "adv_grad", [
  406 + "k", "c", "kk";
  407 + "k", "c", "tk";
  408 + "k", "ci", "k";
  409 + "k", "ci", "tk";
  410 + "k", "cz", "k";
  411 + "k", "dz", "dk";
  412 + "k", "dzi", "dk";
  413 + "k", "ż", "żk";
  414 + "k", "ni", "nk";
  415 + "k", "bi", "bok";
  416 + "n", "ieni", "on";
  417 + "k", "l", "lek";
  418 + "k", "rz", "rok";
  419 + "k", "ęzi", "ąsk";
  420 + "c", "ęc", "ąc";
  421 + "k", "ż", "ekk";
  422 + "g", "ż", "g";
  423 + "k", "ż", "sk";
  424 + "k", "ż", "sok";
  425 + "cz", "cz", "cz";
  426 + "ḿ", "mi", "ḿ";
  427 + "ń", "ni", "ń";
  428 + "ṕ", "pi", "ṕ";
  429 + "ż", "ż", "ż";
  430 + ];
  431 +
  432 + "ger", [
  433 + "ć", "ci", "ć";
  434 + "ń", "ni", "ń";
  435 + ];
  436 + "ger_wyglos", [
  437 + "ć", "ć", "ć";
  438 + "ń", "ń", "ń";
  439 + ];
  440 + "pact", [
  441 + "c", "c", "c";
  442 + ];
  443 + "ppas", [
  444 + "t", "t", "t";
  445 + "n", "n", "n";
  446 + ];
  447 + "ppas_i", [
  448 + "t", "ci", "t";
  449 + "n", "ni", "n";
  450 +(* "y", "", "y"; *)
  451 + "n", "eni", "on";
  452 + ];
  453 + "praet", [
  454 + "ł", "ł", "ł";
  455 + ];
  456 + "praet_i", [
  457 + "ł", "li", "ł";
  458 + "ł", "edli", "adł";
  459 + "ł", "etli", "otł";
  460 + "ł", "eśli", "osł";
  461 + "ł", "eźli", "azł";
  462 + "ł", "eźli", "ozł";
  463 + "ł", "śli", "sł";
  464 + "ł", "źli", "zł";
  465 + "ł", "eli", "ał";
  466 + ];
  467 + "ae", [
  468 + "", "a", "a";
  469 + "", "e", "e";
  470 + ];
  471 +
  472 + "verb_inf_ć", [
  473 + "b", "ś", "b";
  474 + "d", "ś", "d";
  475 + "d", "eś", "ad";
  476 + "d", "eś", "od";
  477 + "d", "óś", "od";
  478 + "d", "ąś", "ęd";
  479 + "s", "ś", "s";
  480 + "s", "eś", "os";
  481 + "s", "ąś", "ęs";
  482 + "s", "óś", "os";
  483 + "t", "eś", "ot";
  484 + "z", "ź", "z";
  485 + "z", "eź", "az";
  486 + "z", "eź", "oz";
  487 + "z", "ąź", "ęz";
  488 +(* "n", "nię", "n"; *)
  489 +(* "ą", "óś", "osną";
  490 + "ą", "ąź", "ęzną";
  491 + "d", "dną", "d";
  492 + "g", "gną", "g";
  493 + "k", "kną", "k";*)
  494 + ];
  495 + "verb_inf_c", [
  496 + "g", "", "g";
  497 + "g", "ó", "og";
  498 + "g", "ą", "ęg";
  499 + "k", "", "k";
  500 + "k", "ą", "ęk";
  501 +(* "ą", "ąc", "ęgną";
  502 + "ą", "ąc", "ękną";*)
  503 + ];
  504 +
  505 + "verb_łszy", [
  506 + "", "ad", "ed";
  507 + "", "ód", "od";
  508 + "", "óg", "og";
  509 + "", "ók", "ok";
  510 + "", "ós", "os";
  511 + "", "ót", "ot";
  512 + "", "óz", "oz";
  513 + "", "ąb", "ęb";
  514 + "", "ąd", "ęd";
  515 + "", "ąg", "ęg";
  516 + "", "ąk", "ęk";
  517 + "", "ąs", "ęs";
  518 + "", "ązg", "ęzg";
  519 + "", "ąz", "ęz";
  520 + "", "b", "b";
  521 + "", "d", "d";
  522 + "", "g", "g";
  523 + "", "h", "h";
  524 + "", "k", "k";
  525 + "", "p", "p";
  526 + "", "s", "s";
  527 + "", "t", "t";
  528 + "", "z", "z";
  529 + "", "ż", "ż";
  530 + "", "r", "r";
  531 + "", "ł", "ł";
  532 + (* ruchome e *)
  533 + "", "ech", "ch";
  534 + "", "sech", "esch";
  535 + "", "szed", "esz";
  536 + ];
  537 +
  538 + "patal_j", [
  539 + "", "szcz", "ść";
  540 + "", "żdż", "źdź";
  541 + "", "ż", "ź";
  542 + "", "cz", "ć";
  543 + "", "sz", "ś";
  544 + "", "c", "ć";
  545 + "", "n", "ń";
  546 + "", "dz", "dź";
  547 + ];
  548 +
  549 + "verb_nie", [ (* wyjątki *)
  550 + "", "gn", "gi";
  551 + "", "mn", "mi";
  552 + "", "pn", "pi";
  553 + "", "tn", "ci";
  554 + "", "śn", "s";
  555 + "", "źn", "z";
  556 + "", "dm", "d";
  557 + "", "jm", "j";
  558 + "", "żm", "ż";
  559 + "", "źm", "zi";
  560 + (* ruchome e *)
  561 + "", "bejm", "bj";
  562 + "", "dejm", "dj";
  563 + "", "eczn", "cz";
  564 + "", "edm", "d";
  565 + "", "egn", "gi";
  566 + "", "ekln", "kl";
  567 + "", "emn", "mi";
  568 + "", "epn", "pi";
  569 + "", "espn", "spi";
  570 + "", "eszczn", "szcz";
  571 + "", "etn", "ci";
  572 + "", "ezdm", "zd";
  573 + "", "eźm", "zi";
  574 + "", "eżm", "ż";
  575 + "", "eżn", "ż";
  576 + "", "zejm", "zj";
  577 + "", "zekln", "skl";
  578 + "", "zepn", "spi";
  579 + "", "zetn", "ści";
  580 + ];
  581 +
  582 + "verb_nię", [
  583 + "", "ś", "s";
  584 + "", "ź", "z";
  585 + "", "ęd", "ad";
  586 + ];
  587 +
  588 + "verb_ie", [
  589 + "", "bierze", "ebr";
  590 + "", "ce", "t";
  591 + "", "cze", "k";
  592 + "", "cze", "t";
  593 + "", "dzie", "ch";
  594 + "", "esze", "os";
  595 + "", "jdzie", "laz";
  596 + "", "szcze", "sk";
  597 + "", "szcze", "st";
  598 + "", "ędzie", "ad";
  599 + "", "ęże", "eg";
  600 + "", "ście", "s";
  601 + "", "żdże", "zd";
  602 + "", "żdże", "zg";
  603 + "", "że", "g";
  604 + "", "że", "z";
  605 + "", "tanie", "t";
  606 + "", "aje", "aj";
  607 + "", "le", "l";
  608 + "", "sze", "s";
  609 + "", "owie", "w";
  610 + "", "źre", "ziar";
  611 + "", "orzy", "ar";
  612 + "", "re", "ar";
  613 + "", "rze", "ar";
  614 + (* ruchome e *)
  615 + "", "ierze", "r";
  616 + "", "pierze", "epr";
  617 + "", "spierze", "zepr";
  618 + "", "ściele", "esł";
  619 + "", "ściele", "sł";
  620 + "", "edrze", "dar";
  621 + "", "emrze", "mar";
  622 + "", "eprze", "par";
  623 + "", "esprze", "spar";
  624 + "", "etrze", "tar";
  625 + "", "ewrze", "war";
  626 + "", "eźre", "ziar";
  627 + "", "eźrze", "ziar";
  628 + "", "eżre", "żar";
  629 + "", "iele", "eł";
  630 + "", "zeprze", "spar";
  631 + "", "zetrze", "star";
  632 + ];
  633 +
  634 + "r", [
  635 + "", "r", "r";
  636 + ];
  637 +
  638 + "nieregularne", [
  639 + "e", "je", "jad";
  640 + "e", "re", "rz";
  641 + "e", "iele", "eł";
  642 + "ie", "wie", "wiedzi";
  643 + "i", "śpi", "sp";
  644 + "a", "ma", "mi";
  645 + "eje", "reje", "r";
  646 + "e", "ce", "ci";
  647 + "i", "oi", "";
  648 + "ie", "będzie", "b";
  649 + ];
  650 +
  651 + "verb_impt", [
  652 + "", "cz", "k";
  653 + "", "cz", "t";
  654 + "", "dź", "ch";
  655 + "", "edz", "ad";
  656 + "", "edź", "od";
  657 + "", "esz", "os";
  658 + "", "eć", "ot";
  659 + "", "eś", "os";
  660 + "", "eź", "az";
  661 + "", "eź", "oz";
  662 + "", "gnij", "gi";
  663 + "", "iel", "eł";
  664 + "", "jdź", "laz";
  665 + "", "mnij", "mi";
  666 + "", "pnij", "pi";
  667 + "", "ryj", "ar";
  668 + "", "sz", "ch";
  669 + "", "tnij", "ci";
  670 + "", "zcz", "k";
  671 + "", "zcz", "t";
  672 + "", "ódź", "od";
  673 + "", "órz", "or";
  674 + "", "óż", "og";
  675 + "", "ądź", "ęd";
  676 + "", "ąś", "ęs";
  677 + "", "ędź", "ad";
  678 + "", "ęż", "eg";
  679 + "", "ś", "s";
  680 + "", "ścij", "s";
  681 + "", "ślij", "sł";
  682 + "", "śnij", "s";
  683 + "", "śpij", "sp";
  684 + "", "ź", "z";
  685 + "", "źnij", "z";
  686 + "", "ż", "g";
  687 + "", "ż", "z";
  688 + "", "żdż", "zd";
  689 + "", "żdż", "zg";
  690 + "", "żyj", "g";
  691 + "", "z", "z";
  692 + "", "laj", "l";
  693 + "", "rej", "r";
  694 + "", "sij", "s";
  695 + "", "wij", "w";
  696 + "", "rz", "r";
  697 + "", "sz", "s";
  698 + "", "bój", "b";
  699 + "", "tój", "t";
  700 + "", "bądź", "b";
  701 + "", "dź", "d";
  702 + (* ruchome e *)
  703 + "", "bejmij", "bj";
  704 + "", "bierz", "ebr";
  705 + "", "dejmij", "dj";
  706 + "", "ecznij", "cz";
  707 + "", "edmij", "d";
  708 + "", "egnij", "gi";
  709 + "", "eklnij", "kl";
  710 + "", "emnij", "mi";
  711 + "", "epnij", "pi";
  712 + "", "espnij", "spi";
  713 + "", "eszcznij", "szcz";
  714 + "", "etnij", "ci";
  715 + "", "ezdmij", "zd";
  716 + "", "eź", "zi";
  717 + "", "eźmij", "zi";
  718 + "", "eźryj", "ziar";
  719 + "", "eżmij", "ż";
  720 + "", "eżnij", "ż";
  721 + "", "eżryj", "żar";
  722 + "", "ierz", "r";
  723 + "", "pierz", "epr";
  724 + "", "spierz", "zepr";
  725 + "", "zejmij", "zj";
  726 + "", "zeklnij", "skl";
  727 + "", "zepnij", "spi";
  728 + "", "zetnij", "ści";
  729 + "", "ściel", "esł";
  730 + "", "ściel", "sł";
  731 + ];
  732 +
  733 + "verb_ną", [ (* wyjątki *)
  734 + "", "gn", "gi";
  735 + "", "mn", "mi";
  736 + "", "pn", "pi";
  737 + "", "tn", "ci";
  738 + "", "dm", "d";
  739 + "", "jm", "j";
  740 + "", "ajm", "aj";
  741 + "", "ejm", "ej";
  742 + "", "ojm", "oj";
  743 + "", "ujm", "uj";
  744 + "", "yjm", "yj";
  745 + "", "żm", "ż";
  746 + (* ruchome e *)
  747 + "", "bejm", "bj";
  748 + "", "dejm", "dj";
  749 + "", "eczn", "cz";
  750 + "", "edm", "d";
  751 + "", "egn", "gi";
  752 + "", "ekln", "kl";
  753 + "", "emn", "mi";
  754 + "", "epn", "pi";
  755 + "", "espn", "spi";
  756 + "", "eszczn", "szcz";
  757 + "", "etn", "ci";
  758 + "", "ezdm", "zd";
  759 + "", "ezm", "zi";
  760 + "", "eżm", "ż";
  761 + "", "eżn", "ż";
  762 + "", "zejm", "zj";
  763 + "", "zekln", "skl";
  764 + "", "zepn", "spi";
  765 + "", "zetn", "ści";
  766 + ];
  767 +
  768 + "verb_j", [
  769 + "", "ad", "ech";
  770 + "", "cz", "k";
  771 + "", "cz", "t";
  772 + "", "c", "t";
  773 + "", "ec", "ot";
  774 + "", "d", "ch";
  775 + "", "edz", "ad";
  776 + "", "ez", "az";
  777 + "", "ior", "r";
  778 + "", "jd", "laz";
  779 + "", "jd", "sz";
  780 + "", "nid", "esz";
  781 + "", "nid", "sz";
  782 + "", "nijd", "esz";
  783 + "", "nijd", "sz";
  784 + "", "sz", "ch";
  785 + "", "szcz", "sk";
  786 + "", "szcz", "st";
  787 + "", "ójd", "osz";
  788 + "", "ąd", "ad";
  789 + "", "ęd", "ad";
  790 + "", "śl", "sł";
  791 + "", "śpi", "sp";
  792 + "", "żdż", "zd";
  793 + "", "żdż", "zg";
  794 + "", "ż", "g";
  795 + "", "ęż", "eg";
  796 + "", "ż", "z";
  797 + "", "dadz", "d";
  798 + "", "bi", "b";
  799 + "", "mi", "m";
  800 + "", "pi", "p";
  801 + "", "st", "s";
  802 + "", "rz", "r";
  803 + "", "sz", "s";
  804 + "", "b", "b";
  805 + "", "d", "d";
  806 + "", "g", "g";
  807 + "", "aj", "aj";
  808 + "", "k", "k";
  809 + "", "l", "l";
  810 + "", "s", "s";
  811 + "", "t", "t";
  812 + "", "w", "w";
  813 + "", "owi", "w";
  814 + "", "ow", "w";
  815 + "", "z", "z";
  816 + "", "cz", "cz";
  817 + "", "źr", "ziar";
  818 + "", "iel", "eł";
  819 + "", "orz", "ar";
  820 + "", "r", "ar";
  821 + "", "dz", "d";
  822 + (* ruchome e *)
  823 + "", "bior", "ebr";
  824 + "", "pior", "epr";
  825 + "", "spior", "zepr";
  826 + "", "ściel", "esł";
  827 + "", "ściel", "sł";
  828 + "", "edr", "dar";
  829 + "", "emr", "mar";
  830 + "", "epr", "par";
  831 + "", "espr", "spar";
  832 + "", "etr", "tar";
  833 + "", "ewr", "war";
  834 + "", "eźr", "ziar";
  835 + "", "eżr", "żar";
  836 + "", "zepr", "spar";
  837 + "", "zetr", "star";
  838 + ];
  839 +
  840 + "nieregularne_j", [
  841 + "a", "ma", "mi";
  842 + "aj", "maj", "mi";
  843 + "ej", "rej", "r";
  844 + "oj", "oj", "";
  845 + "ε", "będ", "b";
  846 + "e", "je", "jad";
  847 + "ie", "wie", "wiedzi";
  848 +(* "", "", "";
  849 + "", "", "";*)
  850 + ];
  851 +
  852 + "inf_e", [
  853 + (* ruchome e *)
  854 + "", "edrz", "dar";
  855 + "", "eml", "meł";
  856 + "", "emrz", "mar";
  857 + "", "epl", "peł";
  858 + "", "eprz", "par";
  859 + "", "esprz", "spar";
  860 + "", "etrz", "tar";
  861 + "", "ewrz", "war";
  862 + "", "eźr", "ziar";
  863 + "", "eźrz", "ziar";
  864 + "", "eżr", "żar";
  865 + "", "l", "eł";
  866 + "", "r", "ar";
  867 + "", "rz", "ar";
  868 + "", "zeprz", "spar";
  869 + "", "zetrz", "star";
  870 + "", "źr", "ziar";
  871 + ];
  872 +(*
  873 +
  874 + "verb_ń", [
  875 + "", "ń", "";
  876 + ];
  877 +
  878 + "verb_fin", [
  879 + "i", "i", "i";
  880 + "y", "y", "y";
  881 + "y", "ędzie", "y";
  882 + "e", "e", "e";
  883 + "e", "i", "ie";
  884 + "e", "y", "e";
  885 + "e", "ma", "mie";
  886 + "e", "ce", "cie";
  887 + "e", "iele", "le";
  888 + "e", "li", "le";
  889 + "e", "miele", "emle";
  890 + "e", "e", "edzie";
  891 + "e", "piele", "eple";
  892 + "e", "re", "rze";
  893 + ];
  894 + "verb_fin_ter", [
  895 + "i", "i", "i";
  896 + "i", "aj","ai";
  897 + "i", "ej","ei";
  898 + "i", "oj","oi";
  899 + "i", "uj","ui";
  900 + "i", "c","ci";
  901 + "i", "cz","ci";
  902 + "i", "dz","dzi";
  903 + "i", "l","li";
  904 + "i", "rż","rzi";
  905 + "i", "szcz","ści";
  906 + "i", "sz","si";
  907 + "i", "żdż","ździ";
  908 + "i", "ż","zi";
  909 + "y", "cz", "czy";
  910 + "y", "ż", "ży";
  911 + "y", "sz", "szy";
  912 + "y", "rz", "rzy";
  913 + "y", "ęd", "y";
  914 + "e", "sz", "sze";
  915 + "e", "cz", "cze";
  916 + "e", "rz", "rze";
  917 + "e", "r", "re";
  918 + "e", "ż", "że";
  919 + "e", "r", "rze";
  920 + "e", "szcz", "ście";
  921 + "e", "sz", "sie";
  922 + "e", "żr", "zrz";
  923 + "e", "l", "le";
  924 + "e", "dz", "dzie";
  925 + "e", "maj", "mie";
  926 + "e", "iel", "le";
  927 + "e", "miel", "emle";
  928 + "e", "piel", "eple";
  929 + "e", "bi", "bie";
  930 + "e", "mi", "mie";
  931 + "e", "pi", "pie";
  932 + "e", "ni", "nie";
  933 + "e", "wi", "wie";
  934 + "e", "si", "sie";
  935 + "e", "fi", "fie";
  936 + "e", "ci", "cie";
  937 + "e", "zi", "zie";
  938 + "e", "dzi", "dzie";
  939 + "e", "n", "nie";
  940 + "e", "c", "cie";
  941 + "e", "j", "je";
  942 + ];
  943 + "verb_fin_pri", [
  944 + "e", "a", "ie";
  945 + "d", "e", "ad";
  946 + "e", "e", "edzie";
  947 + "a", "a", "a";
  948 + "e", "e", "e";
  949 + ];
  950 + "verb_ger_n", [
  951 + "a", "a", "a";
  952 + "a", "ściele", "sła";
  953 + "e", "e", "e";
  954 + "e", "e", "ie";
  955 + "e", "szcze", "ście";
  956 + "e", "sze", "sie";
  957 + "i", "ce", "ci";
  958 + "i", "cze", "ci";
  959 + "i", "dze", "dzi";
  960 + "i", "rże", "rzi";
  961 + "i", "szcze", "ści";
  962 + "i", "sze", "si";
  963 + "i", "żdże", "ździ";
  964 + "i", "że", "zi";
  965 + "i", "aje", "ai";
  966 + "i", "eje", "ei";
  967 + "i", "oje", "oi";
  968 + "i", "bie", "bi";
  969 + "i", "cie", "ci";
  970 + "i", "fie", "fi";
  971 + "i", "le", "li";
  972 + "i", "mie", "mi";
  973 + "i", "nie", "ni";
  974 + "i", "pie", "pi";
  975 + "i", "sie", "si";
  976 + "i", "wie", "wi";
  977 + "i", "zie", "zi";
  978 + "i", "le", "li";
  979 + "y", "e", "y";
  980 + "k", "cze", "k";
  981 + "t", "ece", "ot";
  982 + "d", "edze", "ad";
  983 + "d", "edze", "od";
  984 + "g", "że", "g";
  985 + "d", "dze", "d";
  986 + "s", "esie", "os";
  987 + "z", "ezie", "az";
  988 + "z", "ezie", "oz";
  989 + "b", "bie", "b";
  990 + "s", "sie", "s";
  991 + "z", "zie", "z";
  992 + ];
  993 + "verb_ger_c", [
  994 + "ą", "ię", "ą";
  995 + "ą", "ę", "ą";
  996 + "ą", "śnię", "sną";
  997 + "ą", "źnię", "zną";
  998 + "ą", "rźnię", "rzną";
  999 + "e", "ar", "re";
  1000 + "e", "ar", "rze";
  1001 + "e", "eł", "le";
  1002 + "e", "ziar", "źre";
  1003 + "i", "bi", "bi";
  1004 + "i", "ni", "ni";
  1005 + "i", "pi", "pi";
  1006 + "i", "wi", "wi";
  1007 +(* "u", "u", "u"; *)
  1008 + "y", "y", "y";
  1009 + "g", "gnię", "g";
  1010 + "k", "knię", "k";
  1011 + ];
  1012 + "verb_u", [
  1013 + "e", "e", "e";
  1014 + "i", "i", "i";
  1015 + "u", "u", "u";
  1016 + "y", "y", "y";
  1017 + ];
  1018 +
  1019 + "verb_'ε", [
  1020 + "", "eź", "zi";
  1021 + ];
  1022 +
  1023 +*)
  1024 +]
  1025 +
  1026 +type tags =
  1027 + M of string
  1028 + | T of string * string
  1029 + | A of string
  1030 +
  1031 +let rules = [
  1032 + "KOLWIEK-SUFFIXES", [
  1033 + "dowolne","żkolwiek",[T("suf","żkolwiek")];
  1034 + "dowolne","żekolwiek",[T("suf","żkolwiek")];
  1035 + "dowolne","śkolwiek",[T("suf","śkolwiek")];
  1036 + "dowolne","kolwiek",[T("suf","kolwiek")];
  1037 + "dowolne","ż",[T("suf","ż")];
  1038 + "dowolne","że",[T("suf","ż")];
  1039 + "dowolne","ściś",[T("suf","ściś")];
  1040 + "dowolne","ciś",[T("suf","ciś")];
  1041 + "dowolne","ś",[T("suf","ś")];
  1042 + "dowolne","ści",[T("suf","ści")];
  1043 + "dowolne","sik",[T("suf","sik")];
  1044 + "dowolne","si",[T("suf","si")];
  1045 + "dowolne","",[];
  1046 + ];
  1047 +
  1048 + "ADJ-FLEX", [
  1049 + "funkcjonalnie_miekkie_iy", "", [T("flex","y"); A "con"; T("cat","adj")];
  1050 + "funkcjonalnie_twarde_y", "", [T("flex","y"); A "con"; T("cat","adj")];
  1051 + "funkcjonalnie_miekkie_iy", "ch", [T("flex","ych"); A "con"; T("cat","adj")];
  1052 + "funkcjonalnie_twarde_y", "ch", [T("flex","ych"); A "con"; T("cat","adj")];
  1053 + "funkcjonalnie_miekkie_iy", "m", [T("flex","ym"); A "con"; T("cat","adj")];
  1054 + "funkcjonalnie_twarde_y", "m", [T("flex","ym"); A "con"; T("cat","adj")];
  1055 + "funkcjonalnie_miekkie_iy", "mi", [T("flex","ymi"); A "con"; T("cat","adj")];
  1056 + "funkcjonalnie_twarde_y", "mi", [T("flex","ymi"); A "con"; T("cat","adj")];
  1057 + "funkcjonalnie_miekkie_ae", "e", [T("flex","e"); A "con"; T("cat","adj")];
  1058 + "funkcjonalnie_twarde_e", "e", [T("flex","e"); A "con"; T("cat","adj")];
  1059 + "funkcjonalnie_miekkie_ae", "ego",[T("flex","ego"); A "con"; T("cat","adj")];
  1060 + "funkcjonalnie_twarde_e", "ego",[T("flex","ego"); A "con"; T("cat","adj")];
  1061 + "funkcjonalnie_miekkie_ae", "ej", [T("flex","ej"); A "con"; T("cat","adj")];
  1062 + "funkcjonalnie_twarde_e", "ej", [T("flex","ej"); A "con"; T("cat","adj")];
  1063 + "funkcjonalnie_miekkie_ae", "emu",[T("flex","emu"); A "con"; T("cat","adj")];
  1064 + "funkcjonalnie_twarde_e", "emu",[T("flex","emu"); A "con"; T("cat","adj")];
  1065 + "funkcjonalnie_miekkie_ae", "a", [T("flex","a"); A "con"; T("cat","adj")];
  1066 + "funkcjonalnie_twarde_a", "a", [T("flex","a"); A "con"; T("cat","adj")];
  1067 + "funkcjonalnie_miekkie_ae", "ą", [T("flex","ą"); A "con"; T("cat","adj")];
  1068 + "funkcjonalnie_twarde_a", "ą", [T("flex","ą"); A "con"; T("cat","adj")];
  1069 + "funkcjonalnie_miekkie_ae", "o", [T("flex","o"); A "con"; T("cat","adj")];
  1070 + "funkcjonalnie_twarde_a", "o", [T("flex","o"); A "con"; T("cat","adj")];
  1071 + "funkcjonalnie_twarde_a", "u", [T("flex","u"); A "con"; T("cat","adj")];
  1072 + "funkcjonalnie_twarde_i", "", [T("flex","i"); A "con"; T("cat","adj")];
  1073 + "funkcjonalnie_miekkie_wyglos","", [T("flex","ε"); A "con"; T("cat","adj")];
  1074 + "funkcjonalnie_twarde_wyglos", "", [T("flex","ε"); A "con"; T("cat","adj")];
  1075 + ];
  1076 + "ADJ-FLEX-GRAD", [
  1077 + "sz", "y", [T("flex","y"); T("cat","adj:grad")];
  1078 + "sz", "ych", [T("flex","ych"); T("cat","adj:grad")];
  1079 + "sz", "ym", [T("flex","ym"); T("cat","adj:grad")];
  1080 + "sz", "ymi", [T("flex","ymi"); T("cat","adj:grad")];
  1081 + "sz", "e", [T("flex","e"); T("cat","adj:grad")];
  1082 + "sz", "ego",[T("flex","ego"); T("cat","adj:grad")];
  1083 + "sz", "ej", [T("flex","ej"); T("cat","adj:grad")];
  1084 + "sz", "emu",[T("flex","emu"); T("cat","adj:grad")];
  1085 + "sz", "a", [T("flex","a"); T("cat","adj:grad")];
  1086 + "sz", "ą", [T("flex","ą"); T("cat","adj:grad")];
  1087 + "sz_i", "", [T("flex","i"); T("cat","adj:grad")];
  1088 + ];
  1089 +
  1090 + "ADJ-GRAD", [
  1091 + "adj_grad_sz", "sz", [T("grad","sz"); A "con"];
  1092 + "adj_grad_iejsz", "ejsz", [T("grad","iejsz"); A "con"];
  1093 + ];
  1094 +
  1095 + "ADV-FLEX", [
  1096 + "funkcjonalnie_miekkie_ae", "o", [T("flex","o"); A "con"; T("cat","adv")];
  1097 + "funkcjonalnie_twarde_a", "o", [T("flex","o"); A "con"; T("cat","adv")];
  1098 + "funkcjonalnie_twarde_ie", "", [T("flex","ie"); A "con"; T("cat","adv")];
  1099 + "adv_grad", "ej", [T("flex","iej"); A "con"; T("cat","adv")];
  1100 + "funkcjonalnie_twarde_ie", "j", [T("flex","iej"); A "con"; T("cat","adv")];
  1101 + ];
  1102 +
  1103 + "NOUN-FLEX", [
  1104 + "funkcjonalnie_miekkie_iy", "", [T("flex","y1"); A "con"; T("cat","noun")];
  1105 + "funkcjonalnie_twarde_y", "", [T("flex","y2"); A "con"; T("cat","noun")];
  1106 + "funkcjonalnie_miekkie_ii", "", [T("flex","y3"); A "con"; T("cat","noun")];
  1107 + "funkcjonalnie_miekkie_iy", "ch", [T("flex","ych1"); A "con"; T("cat","noun")];
  1108 + "funkcjonalnie_twarde_y", "ch", [T("flex","ych2"); A "con"; T("cat","noun")];
  1109 + "funkcjonalnie_miekkie_ii", "ch", [T("flex","ych3"); A "con"; T("cat","noun")];
  1110 + "funkcjonalnie_miekkie_iy", "m", [T("flex","ym1"); A "con"; T("cat","noun")];
  1111 + "funkcjonalnie_twarde_y", "m", [T("flex","ym2"); A "con"; T("cat","noun")];
  1112 + "funkcjonalnie_miekkie_ii", "m", [T("flex","ym3"); A "con"; T("cat","noun")];
  1113 + "funkcjonalnie_miekkie_iy", "mi", [T("flex","ymi1"); A "con"; T("cat","noun")];
  1114 + "funkcjonalnie_twarde_y", "mi", [T("flex","ymi2"); A "con"; T("cat","noun")];
  1115 + "funkcjonalnie_miekkie_ii", "mi", [T("flex","ymi3"); A "con"; T("cat","noun")];
  1116 + "funkcjonalnie_miekkie_ae", "e", [T("flex","e1"); A "con"; T("cat","noun")];
  1117 + "funkcjonalnie_twarde_e", "e", [T("flex","e2"); A "con"; T("cat","noun")];
  1118 + "funkcjonalnie_miekkie_ii", "e", [T("flex","e3"); A "con"; T("cat","noun")];
  1119 + "funkcjonalnie_miekkie_ae", "ego", [T("flex","ego1"); A "con"; T("cat","noun")];
  1120 + "funkcjonalnie_twarde_e", "ego", [T("flex","ego2"); A "con"; T("cat","noun")];
  1121 + "funkcjonalnie_miekkie_ii", "ego", [T("flex","ego3"); A "con"; T("cat","noun")];
  1122 + "funkcjonalnie_miekkie_ae", "ej", [T("flex","ej1"); A "con"; T("cat","noun")];
  1123 + "funkcjonalnie_twarde_e", "ej", [T("flex","ej2"); A "con"; T("cat","noun")];
  1124 + "funkcjonalnie_miekkie_ae", "em", [T("flex","em1"); A "con"; T("cat","noun")];
  1125 + "funkcjonalnie_twarde_e", "em", [T("flex","em2"); A "con"; T("cat","noun")];
  1126 + "funkcjonalnie_miekkie_ii", "em", [T("flex","em3"); A "con"; T("cat","noun")];
  1127 + "kontrakcje", "em", [T("flex","em4"); A "con"; T("cat","noun")];
  1128 + "funkcjonalnie_miekkie_ae", "emu", [T("flex","emu1"); A "con"; T("cat","noun")];
  1129 + "funkcjonalnie_twarde_e", "emu", [T("flex","emu2"); A "con"; T("cat","noun")];
  1130 + "funkcjonalnie_miekkie_ii", "emu", [T("flex","emu3"); A "con"; T("cat","noun")];
  1131 + "funkcjonalnie_miekkie_ae", "a", [T("flex","a1"); A "con"; T("cat","noun")];
  1132 + "funkcjonalnie_twarde_a", "a", [T("flex","a2"); A "con"; T("cat","noun")];
  1133 + "funkcjonalnie_miekkie_ii", "a", [T("flex","a3"); A "con"; T("cat","noun")];
  1134 + "kontrakcje", "a", [T("flex","a4"); A "con"; T("cat","noun")];
  1135 + "funkcjonalnie_miekkie_ae", "ach", [T("flex","ach1"); A "con"; T("cat","noun")];
  1136 + "funkcjonalnie_twarde_a", "ach", [T("flex","ach2"); A "con"; T("cat","noun")];
  1137 + "funkcjonalnie_miekkie_ii", "ach", [T("flex","ach3"); A "con"; T("cat","noun")];
  1138 + "funkcjonalnie_miekkie_ae", "ami", [T("flex","ami1"); A "con"; T("cat","noun")];
  1139 + "funkcjonalnie_twarde_a", "ami", [T("flex","ami2"); A "con"; T("cat","noun")];
  1140 + "funkcjonalnie_miekkie_ii", "ami", [T("flex","ami3"); A "con"; T("cat","noun")];
  1141 + "funkcjonalnie_miekkie_wyglos","mi", [T("flex","ami4"); A "con"; T("cat","noun")];
  1142 + "funkcjonalnie_twarde_wyglos", "mi", [T("flex","ami5"); A "con"; T("cat","noun")];
  1143 + "funkcjonalnie_miekkie_ae", "ą", [T("flex","ą1"); A "con"; T("cat","noun")];
  1144 + "funkcjonalnie_twarde_a", "ą", [T("flex","ą2"); A "con"; T("cat","noun")];
  1145 + "funkcjonalnie_miekkie_ii", "ą", [T("flex","ą3"); A "con"; T("cat","noun")];
  1146 + "funkcjonalnie_miekkie_ae", "ę", [T("flex","ę1"); A "con"; T("cat","noun")];
  1147 + "funkcjonalnie_twarde_a", "ę", [T("flex","ę2"); A "con"; T("cat","noun")];
  1148 + "funkcjonalnie_miekkie_ii", "ę", [T("flex","ę3"); A "con"; T("cat","noun")];
  1149 + "funkcjonalnie_miekkie_ae", "o", [T("flex","o1"); A "con"; T("cat","noun")];
  1150 + "funkcjonalnie_twarde_a", "o", [T("flex","o2"); A "con"; T("cat","noun")];
  1151 + "funkcjonalnie_miekkie_ii", "o", [T("flex","o3"); A "con"; T("cat","noun")];
  1152 + "funkcjonalnie_miekkie_ae", "om", [T("flex","om1"); A "con"; T("cat","noun")];
  1153 + "funkcjonalnie_twarde_a", "om", [T("flex","om2"); A "con"; T("cat","noun")];
  1154 + "funkcjonalnie_miekkie_ii", "om", [T("flex","om3"); A "con"; T("cat","noun")];
  1155 + "funkcjonalnie_miekkie_ae", "owi", [T("flex","owi1"); A "con"; T("cat","noun")];
  1156 + "funkcjonalnie_twarde_a", "owi", [T("flex","owi2"); A "con"; T("cat","noun")];
  1157 + "funkcjonalnie_miekkie_ii", "owi", [T("flex","owi3"); A "con"; T("cat","noun")];
  1158 + "kontrakcje", "owi", [T("flex","owi4"); A "con"; T("cat","noun")];
  1159 + "funkcjonalnie_miekkie_ae", "owie",[T("flex","owie1"); A "con"; T("cat","noun")];
  1160 + "funkcjonalnie_twarde_a", "owie",[T("flex","owie2"); A "con"; T("cat","noun")];
  1161 + "funkcjonalnie_miekkie_ii", "owie",[T("flex","owie3"); A "con"; T("cat","noun")];
  1162 + "funkcjonalnie_miekkie_ae", "ów", [T("flex","ów1"); A "con"; T("cat","noun")];
  1163 + "funkcjonalnie_twarde_a", "ów", [T("flex","ów2"); A "con"; T("cat","noun")];
  1164 + "funkcjonalnie_miekkie_ii", "ów", [T("flex","ów3"); A "con"; T("cat","noun")];
  1165 + "funkcjonalnie_miekkie_ae", "u", [T("flex","u1"); A "con"; T("cat","noun")];
  1166 + "funkcjonalnie_twarde_a", "u", [T("flex","u2"); A "con"; T("cat","noun")];
  1167 + "funkcjonalnie_miekkie_ii", "u", [T("flex","u3"); A "con"; T("cat","noun")];
  1168 + "kontrakcje", "u", [T("flex","u4"); A "con"; T("cat","noun")];
  1169 + "funkcjonalnie_twarde_a", "um", [T("flex","um1"); A "con"; T("cat","noun")];
  1170 + "funkcjonalnie_miekkie_ii", "um", [T("flex","um2"); A "con"; T("cat","noun")];
  1171 + "funkcjonalnie_twarde_a", "us", [T("flex","us"); A "con"; T("cat","noun")];
  1172 + "funkcjonalnie_twarde_i", "", [T("flex","i1"); A "con"; T("cat","noun")];
  1173 + "funkcjonalnie_miekkie_ii", "i", [T("flex","i2"); A "con"; T("cat","noun")];
  1174 + "funkcjonalnie_twarde_ie", "", [T("flex","ie1"); A "con"; T("cat","noun")];
  1175 + "kontrakcje", "ie", [T("flex","ie2"); A "con"; T("cat","noun")];
  1176 + "funkcjonalnie_miekkie_wyglos","", [T("flex","ε1"); A "con"; T("cat","noun")];
  1177 + "funkcjonalnie_twarde_wyglos", "", [T("flex","ε2"); A "con"; T("cat","noun")];
  1178 + "funkcjonalnie_miekkie_ii_wyglos","", [T("flex","ε3"); A "con"; T("cat","noun")];
  1179 + "kontrakcje", "", [T("flex","ε4"); A "con"; T("cat","noun")];
  1180 + ];
  1181 +(* "ε->t","",M "subst:sg:nom.acc.voc:n1";
  1182 + "ci->t","a",M "subst:sg:gen:n1";
  1183 + "ci->t","u",M "subst:sg:dat.loc:n1";
  1184 + "ci->t","em",M "subst:sg:inst:n1";*)
  1185 + "NOUN-FLEX-CAP", [
  1186 + "kapitaliki_y","", [T("flex","yC"); A "con"; T("cat","noun")];
  1187 + "kapitaliki_e","e", [T("flex","eC"); A "con"; T("cat","noun")];
  1188 + "kapitaliki_e","em", [T("flex","emC"); A "con"; T("cat","noun")];
  1189 + "kapitaliki_a","ach", [T("flex","achC"); A "con"; T("cat","noun")];
  1190 + "kapitaliki_a","ami", [T("flex","amiC"); A "con"; T("cat","noun")];
  1191 + "kapitaliki_a","a", [T("flex","aC"); A "con"; T("cat","noun")];
  1192 + "kapitaliki_a","ą", [T("flex","ąC"); A "con"; T("cat","noun")];
  1193 + "kapitaliki_a","ę", [T("flex","ęC"); A "con"; T("cat","noun")];
  1194 + "kapitaliki_a","o", [T("flex","oC"); A "con"; T("cat","noun")];
  1195 + "kapitaliki_a","om", [T("flex","omC"); A "con"; T("cat","noun")];
  1196 + "kapitaliki_a","owi", [T("flex","owiC"); A "con"; T("cat","noun")];
  1197 + "kapitaliki_a","owie", [T("flex","owieC"); A "con"; T("cat","noun")];
  1198 + "kapitaliki_a","ów", [T("flex","ówC"); A "con"; T("cat","noun")];
  1199 + "kapitaliki_a","u", [T("flex","uC"); A "con"; T("cat","noun")];
  1200 + "kapitaliki_ie","", [T("flex","ieC"); A "con"; T("cat","noun")];
  1201 + "kapitaliki_wyglos","", [T("flex","εC"); A "con"; T("cat","noun")];
  1202 + "kapitaliki_wyglos","A", [T("flex","AC"); A "con"; T("cat","noun")];
  1203 + ];
  1204 +
  1205 + "GER-FLEX", [
  1206 + "ger", "om", [T("flex","om")];
  1207 + "ger", "ami", [T("flex","ami")];
  1208 + "ger", "ach", [T("flex","ach")];
  1209 + "ger", "e", [T("flex","e")];
  1210 + "ger", "a", [T("flex","a")];
  1211 + "ger", "u", [T("flex","u")];
  1212 + "ger", "em", [T("flex","em")];
  1213 + "ger_wyglos", "", [T("flex","ε")];
  1214 + ];
  1215 + "PACT-FLEX", [
  1216 + "pact", "ych", [T("flex","ych")];
  1217 + "pact", "ym", [T("flex","ym")];
  1218 + "pact", "ymi", [T("flex","ymi")];
  1219 + "pact", "e", [T("flex","e")];
  1220 + "pact", "o", [T("flex","o")];
  1221 + "pact", "y", [T("flex","y")];
  1222 + "pact", "ą", [T("flex","ą")];
  1223 + "pact", "a", [T("flex","a")];
  1224 + "pact", "ego", [T("flex","ego")];
  1225 + "pact", "emu", [T("flex","emu")];
  1226 + "pact", "ej", [T("flex","ej")];
  1227 + "pact", "", [T("flex","ε")];
  1228 + ];
  1229 + "PPAS-FLEX", [
  1230 + "ppas", "ych", [T("flex","ych")];
  1231 + "ppas", "ym", [T("flex","ym")];
  1232 + "ppas", "ymi", [T("flex","ymi")];
  1233 + "ppas", "e", [T("flex","e")];
  1234 + "ppas", "y", [T("flex","y")];
  1235 + "ppas", "ą", [T("flex","ą")];
  1236 + "ppas", "a", [T("flex","a")];
  1237 + "ppas", "o", [T("flex","o")];
  1238 + "ppas", "ego", [T("flex","ego")];
  1239 + "ppas", "emu", [T("flex","emu")];
  1240 + "ppas", "ej", [T("flex","ej")];
  1241 + "ppas_i", "", [T("flex","i")];
  1242 + ];
  1243 + "PRAET-FLEX", [
  1244 + "praet", "", [T("flex","ε")];
  1245 + "praet", "em", [T("flex","em")];
  1246 + "praet", "eś", [T("flex","eś")];
  1247 + "praet", "a", [T("flex","a")];
  1248 + "praet", "am", [T("flex","am")];
  1249 + "praet", "aś", [T("flex","aś")];
  1250 + "praet", "o", [T("flex","o")];
  1251 + "praet", "om", [T("flex","om")];
  1252 + "praet", "oś", [T("flex","oś")];
  1253 + "praet", "y", [T("flex","y")];
  1254 + "praet", "yśmy", [T("flex","yśmy")];
  1255 + "praet", "yście", [T("flex","yście")];
  1256 + "praet", "ego", [T("flex","ego")];
  1257 + "praet", "emu", [T("flex","emu")];
  1258 + "praet_i", "", [T("flex","i")];
  1259 + "praet_i", "śmy", [T("flex","iśmy")];
  1260 + "praet_i", "ście", [T("flex","iście")];
  1261 + ];
  1262 + "FIN-FLEX", [
  1263 + "dowolne", "", [T("flex","ε")];
  1264 + "dowolne", "my", [T("flex","my")];
  1265 + "dowolne", "cie", [T("flex","cie")];
  1266 + "dowolne", "sz", [T("flex","sz")];
  1267 + ];
  1268 + "FIN-FLEX-J", [
  1269 + "ae", "m", [T("flex","m")];
  1270 + "dowolne", "ą", [T("flex","ą")];
  1271 + "dowolne", "ę", [T("flex","ę")];
  1272 + ];
  1273 +
  1274 + "VERB-FLEX2", [
  1275 + "verb_inf_ć", "ć", [T("flex2","ć")];
  1276 + "verb_inf_c", "c", [T("flex2","c")];
  1277 + "dowolne", "ć", [T("flex2","ć")];
  1278 +(* "eai", "", [T("flex2","ε?")]; *)
  1279 + "dowolne", "", [T("flex2","ε")];
  1280 + "dowolne", "t", [T("flex2","t")];
  1281 + "dowolne", "wszy",[T("flex2","wszy")];
  1282 + "dowolne", "ł", [T("flex2","ł")];
  1283 + "verb_łszy", "ł", [T("flex2","ł")];
  1284 + "verb_łszy", "łszy",[T("flex2","łszy")];
  1285 + ];
  1286 + "VERB-FLEX2-J", [
  1287 + "dowolne", "", [T("flex2","ε")];
  1288 + "dowolne", "ń", [T("flex2","ń")];
  1289 + "dowolne", "n", [T("flex2","n")];
  1290 + "dowolne", "ąc", [T("flex2","ąc")];
  1291 + ];
  1292 +
  1293 + "VERB-GROUP-SUFIX", [
  1294 + "dowolne","a",[T("group","a"); T("cat","verb")];
  1295 +(* "dowolne","owa",[T("group","owa"); T("cat","verb")];
  1296 + "dowolne","iwa",[T("group","iwa"); T("cat","verb")];
  1297 + "dowolne","ywa",[T("group","ywa"); T("cat","verb")];
  1298 + "dowolne","awa",[T("group","awa"); T("cat","verb")];
  1299 + "dowolne","owywa",[T("group","owywa"); T("cat","verb")];*)
  1300 + "dowolne","u",[T("group","u"); T("cat","verb")];
  1301 + "funkcjonalnie_twarde_y","",[T("group","y"); T("cat","verb")];
  1302 +
  1303 + "dowolne","uje",[T("group","uje"); T("cat","verb")];
  1304 + "dowolne","eje",[T("group","eje"); T("cat","verb")];
  1305 + "dowolne","aje",[T("group","aje"); T("cat","verb")];
  1306 +(* "dowolne","owuje",[T("group","owuje"); T("cat","verb")]; *)
  1307 + "funkcjonalnie_twarde_y","je",[T("group","yje"); T("cat","verb")];
  1308 +
  1309 + "dowolne","uj",[T("group","uj"); T("cat","verb")];
  1310 + "dowolne","ej",[T("group","ej"); T("cat","verb")];
  1311 + "dowolne","aj",[T("group","aj"); T("cat","verb")];
  1312 +(* "dowolne","owuj",[T("group","owuj"); T("cat","verb")]; *)
  1313 + "funkcjonalnie_twarde_y","j",[T("group","yj"); T("cat","verb")];
  1314 + ];
  1315 +
  1316 + "VERB-GROUP-PATAL", [
  1317 + "funkcjonalnie_miekkie_iy", "", [T("group","y"); T("cat","verb")];
  1318 + "funkcjonalnie_miekkie_ae", "e", [T("group","e"); T("cat","verb")];
  1319 + "funkcjonalnie_miekkie_ae", "eje",[T("group","eje"); T("cat","verb")];
  1320 + "funkcjonalnie_miekkie_iy", "je", [T("group","yje"); T("cat","verb")];
  1321 + "funkcjonalnie_miekkie_ae", "ej", [T("group","ej"); T("cat","verb")];
  1322 + "funkcjonalnie_miekkie_iy", "j", [T("group","yj"); T("cat","verb")];
  1323 + "funkcjonalnie_miekkie_wyglos","", [T("group","ε"); T("cat","verb")];
  1324 + "funkcjonalnie_miekkie_ae", "a", [T("group","a"); T("cat","verb")];
  1325 + ];
  1326 +
  1327 + "VERB-GROUP-J-PATAL", [
  1328 + "funkcjonalnie_miekkie_ae", "", [T("group","J"); T("cat","verb")];
  1329 + "funkcjonalnie_miekkie_iy", "j", [T("group","j"); T("cat","verb")];
  1330 + "funkcjonalnie_miekkie_ae", "ej",[T("group","ej"); T("cat","verb")];
  1331 + "patal_j", "", [T("group","J"); T("cat","verb")];
  1332 + "funkcjonalnie_miekkie_ae", "e", [T("group","Je"); T("cat","verb")];
  1333 + "patal_j", "e", [T("group","Je"); T("cat","verb")];
  1334 + "funkcjonalnie_miekkie_ae", "a", [T("group","Ja"); T("cat","verb")];
  1335 + "funkcjonalnie_miekkie_ae", "o", [T("group","Jo"); T("cat","verb")];
  1336 + "patal_j", "o", [T("group","Jo"); T("cat","verb")];
  1337 + ];
  1338 +
  1339 + "VERB-GROUP-NĄ", [
  1340 + "dowolne","ną",[T("group","ną"); T("cat","verb")];
  1341 + "dowolne","ą",[T("group","ą"); T("cat","verb")];
  1342 + "dowolne","nie",[T("group","nie"); T("cat","verb")];
  1343 + "verb_nie","ie",[T("group","nie"); T("cat","verb")];
  1344 + "dowolne","nię",[T("group","nię"); T("cat","verb")];
  1345 + "verb_nię","nię",[T("group","nię"); T("cat","verb")];
  1346 + "dowolne","ę",[T("group","ę"); T("cat","verb")];
  1347 + "dowolne","nę",[T("group","nę"); T("cat","verb")];
  1348 + "dowolne","",[T("group","ε"); T("cat","verb")];
  1349 + "funkcjonalnie_twarde_ie","",[T("group","ie"); T("cat","verb")];
  1350 + "verb_ie","",[T("group","ie"); T("cat","verb")];
  1351 + "r","e",[T("group","e"); T("cat","verb")];
  1352 + "nieregularne","",[A "group"; T("cat","verb")];
  1353 + "dowolne","ń",[T("group","ń"); T("cat","verb")];
  1354 + "dowolne","nij",[T("group","nij"); T("cat","verb")];
  1355 + "dowolne","mij",[T("group","mij"); T("cat","verb")];
  1356 + "verb_impt","",[T("group","ε"); T("cat","verb")];
  1357 +(* "dowolne","nąć",[T("group","ną"); T("flex2","ć"); T("cat","verb")];
  1358 + "dowolne","ąć",[T("group","ą"); T("flex2","ć"); T("cat","verb")];
  1359 + "dowolne","nie",[T("group","nie"); T("flex2","ε"); T("cat","verb")];
  1360 + "verb_nie","ie",[T("group","nie"); T("flex2","ε"); T("cat","verb")];
  1361 + "dowolne","nięć",[T("group","nię"); T("flex2","ć"); T("cat","verb")];
  1362 + "verb_nię","nięć",[T("group","nię"); T("flex2","ć"); T("cat","verb")];
  1363 + "dowolne","ęć",[T("group","ę"); T("flex2","ć"); T("cat","verb")];
  1364 + "dowolne","nięt",[T("group","nię"); T("flex2","t"); T("cat","verb")];
  1365 + "verb_nię","nięt",[T("group","nię"); T("flex2","t"); T("cat","verb")];
  1366 + "dowolne","ęt",[T("group","ę"); T("flex2","t"); T("cat","verb")];
  1367 + "dowolne","nij",[T("group","ni"); T("flex2","j"); T("cat","verb")];
  1368 + "verb_nie","ij",[T("group","ni"); T("flex2","j"); T("cat","verb")];
  1369 + "dowolne","nąwszy",[T("group","ną"); T("flex2","wszy"); T("cat","verb")];
  1370 + "dowolne","ąwszy",[T("group","ą"); T("flex2","wszy"); T("cat","verb")];
  1371 + "dowolne","nęł",[T("group","nę"); T("flex2","ł"); T("cat","verb")];
  1372 + "dowolne","ęł",[T("group","ę"); T("flex2","ł"); T("cat","verb")]; *)
  1373 + ];
  1374 +
  1375 + "VERB-GROUP-J-NĄ", [
  1376 + "dowolne","n",[T("group","n"); T("cat","verb")];
  1377 + "verb_ną","",[T("group","n"); T("cat","verb")];
  1378 + "verb_j","",[T("group","ε"); T("cat","verb")];
  1379 + "nieregularne_j","",[A "group"; T("cat","verb")];
  1380 + "verb_j","o",[T("group","o"); T("cat","verb")];
  1381 + "funkcjonalnie_twarde_i","o",[T("group","io"); T("cat","verb")];
  1382 + "verb_j","e",[T("group","e"); T("cat","verb")];
  1383 + "funkcjonalnie_twarde_i","e",[T("group","ie"); T("cat","verb")];
  1384 +(* "dowolne","ną",[T("group","n"); T("flex2","ε"); T("flex","ą"); T("cat","verb")];
  1385 + "verb_ną","ą",[T("group","n"); T("flex2","ε"); T("flex","ą"); T("cat","verb")];
  1386 + "dowolne","nę",[T("group","n"); T("flex2","ε"); T("flex","ę"); T("cat","verb")];
  1387 + "verb_ną","ę",[T("group","n"); T("flex2","ε"); T("flex","ę"); T("cat","verb")];
  1388 + "dowolne","nąc",[T("group","n"); T("flex2","ąc"); T("cat","verb")];
  1389 + "verb_ną","ąc",[T("group","n"); T("flex2","ąc"); T("cat","verb")];
  1390 + "verb_j","ą",[T("group","ε"); T("flex2","ą"); T("cat","verb")];
  1391 + "verb_j","ę",[T("group","ε"); T("flex2","ę"); T("cat","verb")];
  1392 + "verb_j","ąc",[T("group","ε"); T("flex2","ąc"); T("cat","verb")]; *)
  1393 + ];
  1394 +
  1395 +
  1396 + ]
  1397 +
  1398 +let rev_rules = [
  1399 + "ADJ-LEMMA", [
  1400 + "funkcjonalnie_miekkie_iy","",[T("lemma","y")];
  1401 + "funkcjonalnie_twarde_y","",[T("lemma","y")];
  1402 + "funkcjonalnie_miekkie_wyglos","",[T("lemma","ε")];
  1403 + "funkcjonalnie_twarde_wyglos","",[T("lemma","ε")];
  1404 + ];
  1405 + "ADV-LEMMA", [
  1406 + "funkcjonalnie_miekkie_ae", "o", [T("lemma","o")];
  1407 + "funkcjonalnie_twarde_a", "o", [T("lemma","o")];
  1408 + "funkcjonalnie_twarde_ie", "", [T("lemma","ie")];
  1409 + ];
  1410 + "NOUN-LEMMA", [
  1411 + "funkcjonalnie_miekkie_iy", "", [T("lemma","y")];
  1412 + "funkcjonalnie_twarde_y", "", [T("lemma","y")];
  1413 + "funkcjonalnie_miekkie_ii", "", [T("lemma","y")];
  1414 + "funkcjonalnie_miekkie_ae", "e", [T("lemma","e")];
  1415 + "funkcjonalnie_twarde_e", "e", [T("lemma","e")];
  1416 + "funkcjonalnie_miekkie_ii", "e", [T("lemma","e")];
  1417 + "funkcjonalnie_miekkie_ae", "a", [T("lemma","a")];
  1418 + "funkcjonalnie_twarde_a", "a", [T("lemma","a")];
  1419 + "funkcjonalnie_miekkie_ii", "a", [T("lemma","a")];
  1420 + "funkcjonalnie_miekkie_ae", "o", [T("lemma","o")];
  1421 + "funkcjonalnie_twarde_a", "o", [T("lemma","o")];
  1422 + "funkcjonalnie_miekkie_ii", "o", [T("lemma","o")];
  1423 + "funkcjonalnie_twarde_a", "um", [T("lemma","um")];
  1424 + "funkcjonalnie_miekkie_ii", "um", [T("lemma","um")];
  1425 + "funkcjonalnie_twarde_a", "us", [T("lemma","us")];
  1426 + "funkcjonalnie_miekkie_wyglos","", [T("lemma","ε")];
  1427 + "funkcjonalnie_twarde_wyglos", "", [T("lemma","ε")];
  1428 + "kontrakcje", "", [T("lemma","ε")];
  1429 + ];
  1430 + "NOUN-LEMMA-CAP", [
  1431 + "kapitaliki_wyglos", "", [T("lemma","ε")];
  1432 + "kapitaliki_wyglos", "A", [T("lemma","A")];
  1433 + ];
  1434 + "VERB-LEMMA-PATAL", [
  1435 + "funkcjonalnie_miekkie_iy","ć",[T("lemma","ć")];
  1436 + "funkcjonalnie_miekkie_ae","eć",[T("lemma","eć")];
  1437 + ];
  1438 + "VERB-LEMMA-SUFIX", [
  1439 + "dowolne","ować",[T("lemma","ować")];
  1440 + "dowolne","ywać",[T("lemma","ywać")];
  1441 + "dowolne","iwać",[T("lemma","iwać")];
  1442 + "dowolne","awać",[T("lemma","awać")];
  1443 + "dowolne","owywać",[T("lemma","owywać")];
  1444 + "dowolne","ać",[T("lemma","ać")];
  1445 + "dowolne","uć",[T("lemma","uć")];
  1446 + "funkcjonalnie_twarde_y","ć",[T("lemma","yć")];
  1447 + ];
  1448 + "VERB-LEMMA-NĄ", [
  1449 + "dowolne","nąć",[T("lemma","nąć")];
  1450 + "dowolne","ąć",[T("lemma","ąć")];
  1451 + "verb_inf_ć","ć",[T("lemma","ć")];
  1452 + "verb_inf_c","c",[T("lemma","c")];
  1453 + "dowolne","ać",[T("lemma","ać")];
  1454 + "dowolne","eć",[T("lemma","eć")];
  1455 + "dowolne","yć",[T("lemma","yć")];
  1456 + "inf_e","eć",[T("lemma","eć")];
  1457 +(* "pleć_mleć_lemma","eć",[T("lemma","eć")]; *)
  1458 + ];
  1459 + ]
  1460 +
  1461 +let schemata = [
  1462 +(* ["KOLWIEK-SUFFIXES";"ADJ-FLEX";"ADJ-LEMMA"];
  1463 + ["ADJ-FLEX-GRAD";"ADJ-GRAD";"ADJ-LEMMA"];
  1464 + ["ADV-FLEX";"ADV-LEMMA"];*)
  1465 + ["NOUN-FLEX";"NOUN-LEMMA"];
  1466 + ["NOUN-FLEX-CAP";"NOUN-LEMMA-CAP"];
  1467 +(* ["FIN-FLEX"; "VERB-FLEX2";"VERB-GROUP-SUFIX";"VERB-LEMMA-SUFIX"];
  1468 + ["GER-FLEX"; "VERB-FLEX2";"VERB-GROUP-SUFIX";"VERB-LEMMA-SUFIX"];
  1469 + ["PPAS-FLEX"; "VERB-FLEX2";"VERB-GROUP-SUFIX";"VERB-LEMMA-SUFIX"];
  1470 + ["PRAET-FLEX";"VERB-FLEX2";"VERB-GROUP-SUFIX";"VERB-LEMMA-SUFIX"];
  1471 + ["FIN-FLEX-J";"VERB-FLEX2-J";"VERB-GROUP-SUFIX";"VERB-LEMMA-SUFIX"];
  1472 + ["GER-FLEX"; "VERB-FLEX2-J";"VERB-GROUP-SUFIX";"VERB-LEMMA-SUFIX"];
  1473 + ["PACT-FLEX"; "VERB-FLEX2-J";"VERB-GROUP-SUFIX";"VERB-LEMMA-SUFIX"];
  1474 + ["PPAS-FLEX"; "VERB-FLEX2-J";"VERB-GROUP-SUFIX";"VERB-LEMMA-SUFIX"];
  1475 + ["FIN-FLEX"; "VERB-FLEX2";"VERB-GROUP-PATAL";"VERB-LEMMA-PATAL"];
  1476 + ["GER-FLEX"; "VERB-FLEX2";"VERB-GROUP-PATAL";"VERB-LEMMA-PATAL"];
  1477 + ["PPAS-FLEX"; "VERB-FLEX2";"VERB-GROUP-PATAL";"VERB-LEMMA-PATAL"];
  1478 + ["PRAET-FLEX";"VERB-FLEX2";"VERB-GROUP-PATAL";"VERB-LEMMA-PATAL"];
  1479 + ["FIN-FLEX-J";"VERB-FLEX2-J";"VERB-GROUP-J-PATAL";"VERB-LEMMA-PATAL"];
  1480 + ["GER-FLEX"; "VERB-FLEX2-J";"VERB-GROUP-J-PATAL";"VERB-LEMMA-PATAL"];
  1481 + ["PACT-FLEX"; "VERB-FLEX2-J";"VERB-GROUP-J-PATAL";"VERB-LEMMA-PATAL"];
  1482 + ["PPAS-FLEX"; "VERB-FLEX2-J";"VERB-GROUP-J-PATAL";"VERB-LEMMA-PATAL"];
  1483 + ["FIN-FLEX"; "VERB-FLEX2";"VERB-GROUP-NĄ";"VERB-LEMMA-NĄ"];
  1484 + ["GER-FLEX"; "VERB-FLEX2";"VERB-GROUP-NĄ";"VERB-LEMMA-NĄ"];
  1485 + ["PPAS-FLEX"; "VERB-FLEX2";"VERB-GROUP-NĄ";"VERB-LEMMA-NĄ"];
  1486 + ["PRAET-FLEX";"VERB-FLEX2";"VERB-GROUP-NĄ";"VERB-LEMMA-NĄ"];
  1487 + ["FIN-FLEX-J";"VERB-FLEX2-J";"VERB-GROUP-J-NĄ";"VERB-LEMMA-NĄ"];
  1488 + ["GER-FLEX"; "VERB-FLEX2-J";"VERB-GROUP-J-NĄ";"VERB-LEMMA-NĄ"];
  1489 + ["PACT-FLEX"; "VERB-FLEX2-J";"VERB-GROUP-J-NĄ";"VERB-LEMMA-NĄ"];
  1490 + ["PPAS-FLEX"; "VERB-FLEX2-J";"VERB-GROUP-J-NĄ";"VERB-LEMMA-NĄ"]; *)
  1491 +
  1492 +(* ["NOUN-FLEX-GENERAL";"NOUN-LEMMA-GENERAL"];
  1493 + ["NOUN-FLEX-ADJ";"NOUN-LEMMA-ADJ"];
  1494 + ["NOUN-FLEX-A";"NOUN-LEMMA-A"];
  1495 + ["NOUN-FLEX-F-WYGŁOS";"NOUN-LEMMA-F-WYGŁOS"];
  1496 + ["NOUN-FLEX-N1";"NOUN-LEMMA-N1"];
  1497 + ["NOUN-FLEX-N2";"NOUN-LEMMA-N2"];*)
  1498 + ]
... ...
guesser/stem.ml 0 → 100644
  1 +open Xstd
  2 +open Printf
  3 +
  4 +let adj_stem_sel = [
  5 + 1,"adj:sg:nom.voc:f:pos", ("a","");
  6 + 1,"adj:sg:nom:f:pos", ("a","");
  7 + ]
  8 +
  9 +let noun_stem_sel =
  10 + List.flatten (Xlist.map ["m1";"m2";"m3";"n1";"n2";"f";"p1";"p2";"p3"] (fun gender -> [
  11 + 1,"subst:pl:loc:" ^ gender, ("’ach","");
  12 + 1,"subst:pl:loc:" ^ gender, ("-ach","");
  13 + 1,"subst:pl:loc:" ^ gender, ("-etach","");
  14 + 1,"subst:pl:loc:" ^ gender, ("-otach","");
  15 + 2,"subst:pl:dat:" ^ gender, ("om","");
  16 + 2,"subst:pl:loc:" ^ gender, ("ach","");
  17 + 2,"subst:pl:loc:" ^ gender, ("ych","");
  18 + 2,"subst:pl:loc:" ^ gender, ("bich","bi");
  19 + 2,"subst:pl:loc:" ^ gender, ("cich","ci");
  20 + 2,"subst:pl:loc:" ^ gender, ("dzich","dzi");
  21 + 2,"subst:pl:loc:" ^ gender, ("fich","fi");
  22 + 2,"subst:pl:loc:" ^ gender, ("mich","mi");
  23 + 2,"subst:pl:loc:" ^ gender, ("nich","ni");
  24 + 2,"subst:pl:loc:" ^ gender, ("pich","pi");
  25 + 2,"subst:pl:loc:" ^ gender, ("sich","si");
  26 + 2,"subst:pl:loc:" ^ gender, ("wich","wi");
  27 + 2,"subst:pl:loc:" ^ gender, ("zich","zi");
  28 + 2,"subst:pl:loc:" ^ gender, ("kich","k");
  29 + 2,"subst:pl:loc:" ^ gender, ("gich","g");
  30 + 2,"subst:pl:loc:" ^ gender, ("lich","l");
  31 + 2,"subst:pl:loc:" ^ gender, ("żich","żi");
  32 + 3,"subst:sg:gen:" ^ gender, ("kiego","k");
  33 + 3,"subst:sg:gen:" ^ gender, ("ojego","oj");
  34 + 3,"subst:sg:gen:" ^ gender, ("nego","n");
  35 + 3,"subst:sg:gen:" ^ gender, ("tego","t");
  36 + 3,"subst:sg:gen:" ^ gender, ("wego","w");
  37 + 3,"subst:sg:gen:" ^ gender, ("siego","si");
  38 + 3,"subst:sg:gen:" ^ gender, ("ojej","oj");
  39 + ])) @
  40 + [3,"subst:pl:inst:p1", ("wem","w");
  41 + 3,"subst:pl:nom:m1", ("owie","");
  42 + 4,"subst:pl:gen:p1", ("oich","oj");
  43 + ]
  44 +
  45 +let verb_stem_sel2 =
  46 + List.flatten (Xlist.map ["imperf";"perf";"imperf.perf"] (fun aspect -> [
  47 + 4,"praet:sg:f:" ^ aspect, ("kła","k");
  48 + 4,"praet:sg:f:" ^ aspect, ("gła","g");
  49 + 4,"praet:sg:f:" ^ aspect, ("zła","z");
  50 + 4,"praet:sg:f:" ^ aspect, ("sła","s");
  51 + 4,"praet:sg:f:" ^ aspect, ("zła","z");
  52 + 4,"praet:sg:f:" ^ aspect, ("dła","d");
  53 + 4,"praet:sg:f:" ^ aspect, ("tła","t");
  54 + 4,"praet:sg:f:" ^ aspect, ("bła","b");
  55 + 4,"praet:sg:f:" ^ aspect, ("łła","ł");
  56 + 4,"praet:sg:f:" ^ aspect, ("rła","r");
  57 + 5,"inf:" ^ aspect, ("ieć","");
  58 + 6,"inf:" ^ aspect, ("eć","");
  59 +(* 3,"ppas:sg:nom.voc:m1.m2.m3:" ^ aspect ^ ":aff", ("ty",""); *)
  60 +(* 3,"praaaet:sg:f:" ^ aspect, ("zła","z"); *)
  61 + ]))
  62 +
  63 +let prepare_stem_sel stem_sel =
  64 + Xlist.fold stem_sel StringMap.empty (fun map (priority,tags,(a,b)) ->
  65 + StringMap.add_inc map tags [a,b,priority] (fun l -> (a,b,priority) :: l))
  66 +
  67 +let adj_stem_sel = prepare_stem_sel adj_stem_sel
  68 +let noun_stem_sel = prepare_stem_sel noun_stem_sel
  69 +let verb_stem_sel2 = prepare_stem_sel verb_stem_sel2
  70 +
  71 +let adv_stem_sel = [
  72 + "o","",1;
  73 + "wie","w",1;
  74 + "nie","n",1;
  75 + "dze","g",1;
  76 + "le","ł",1;
  77 + "cie","t",1;
  78 + "dzie","d",1;
  79 + "mie","m",1;
  80 + "rze","r",1;
  81 + "ce","k",1;
  82 + ]
  83 +
  84 +let verb_stem_sel = [
  85 + "ować","",1;
  86 + "owywać","",1;
  87 + "iwać","",1;
  88 + "ywać","",2;
  89 + "awać","",1;
  90 + "ać","",3;
  91 + "nąć","",1;
  92 + "ąć","",2;
  93 +(* "eć","e",1; *)
  94 + "ić","",1;
  95 + "yć","",1;
  96 + "uć","u",1;
  97 +(* "ć","",2; *)
  98 + ]
  99 +
  100 +let generate_stem stem_sel lemma forms =
  101 + let stems = Xlist.fold forms StringMap.empty (fun stems (orth,interp) ->
  102 + let rules = try StringMap.find stem_sel interp with Not_found -> [] in
  103 + Xlist.fold rules stems (fun stems rule ->
  104 + if Rules.is_applicable_rule rule orth then
  105 + StringMap.add_inc stems (Rules.apply_rule rule orth) (Rules.get_tags rule) (fun priority -> min priority (Rules.get_tags rule))
  106 + else stems)) in
  107 + let stems,_ = StringMap.fold stems ([],max_int) (fun (stems,priority) stem p ->
  108 + if p < priority then [stem],p else
  109 + if p > priority then stems,priority else
  110 + stem :: stems, priority) in
  111 + match stems with
  112 + [] -> (*print_endline ("stem not found for " ^ lemma);
  113 + Xlist.iter forms (fun (orth,interp) -> printf " %s\t%s\n" orth interp);*)
  114 + ""
  115 + | [s] -> s
  116 + | l -> print_endline ("many stems found for " ^ lemma ^ ": " ^ String.concat " " l); ""
  117 + (*printf "\"%s\"; " lemma; ""*)
  118 +
  119 +let generate_stem_lemma_as_stem stem_sel lemma =
  120 + let orth = Rules.simplify_lemma lemma in
  121 + let stems = Xlist.fold stem_sel StringMap.empty (fun stems rule ->
  122 + if Rules.is_applicable_rule rule orth then
  123 + StringMap.add_inc stems (Rules.apply_rule rule orth) (Rules.get_tags rule) (fun priority -> min priority (Rules.get_tags rule))
  124 + else stems) in
  125 + let stems,_ = StringMap.fold stems ([],max_int) (fun (stems,priority) stem p ->
  126 + if p < priority then [stem],p else
  127 + if p > priority then stems,priority else
  128 + stem :: stems, priority) in
  129 + match stems with
  130 + [] -> (*print_endline ("stem not found for " ^ lemma); *)
  131 + ""
  132 + | [s] -> s
  133 + | l -> print_endline ("many stems found for " ^ lemma ^ ": " ^ String.concat " " l); ""
  134 + (*printf "\"%s\"; " lemma; ""*)
  135 +
  136 +let rec merge_digraph = function
  137 + [] -> []
  138 + | "b" :: "'" :: l -> "b'" :: (merge_digraph l)
  139 + | "f" :: "'" :: l -> "f'" :: (merge_digraph l)
  140 + | "c" :: "h" :: l -> "ch" :: (merge_digraph l)
  141 + | "c" :: "z" :: l -> "cz" :: (merge_digraph l)
  142 + | "d" :: "h" :: l -> "dh" :: (merge_digraph l)
  143 + | "d" :: "z" :: l -> "dz" :: (merge_digraph l)
  144 + | "d" :: "ź" :: l -> "dź" :: (merge_digraph l)
  145 + | "d" :: "ż" :: l -> "dż" :: (merge_digraph l)
  146 + | "g" :: "h" :: l -> "gh" :: (merge_digraph l)
  147 + | "n" :: "h" :: l -> "nh" :: (merge_digraph l)
  148 + | "r" :: "h" :: l -> "rh" :: (merge_digraph l)
  149 + | "r" :: "z" :: l -> "rz" :: (merge_digraph l)
  150 + | "s" :: "z" :: l -> "sz" :: (merge_digraph l)
  151 + | "q" :: "u" :: l -> "qu" :: (merge_digraph l)
  152 + | s :: l -> s :: (merge_digraph l)
  153 +
  154 +(*let text_to_chars s =
  155 + (try UTF8.validate s with UTF8.Malformed_code -> failwith ("Invalid UTF8 string: " ^ s));
  156 + let r = ref [] in
  157 + UTF8.iter (fun c ->
  158 + r := (UTF8.init 1 (fun _ -> c)) :: (!r)) s;
  159 + merge_digraph (List.rev (!r))*)
  160 +
  161 +let cut_stem_sufix s =
  162 + let l = Xunicode.utf8_chars_of_utf8_string (*text_to_chars*) s in
  163 + let l = match List.rev l with
  164 + "i" :: _ :: l -> l
  165 + | "j" :: _ :: l -> l
  166 + | _ :: l -> l
  167 + | _ -> [] in
  168 + String.concat "" (List.rev l)
  169 +
0 170 \ No newline at end of file
... ...