Commit e0700d7b6ee4a00d085da7acc3256bc12e80eb29

Authored by Wojciech Jaworski
1 parent 9f38bc0d

biblioteka eniam-tokenizer-1.0 z poprawioną konfiguracją

tokenizer/ENIAMtokenizerTypes.ml
... ... @@ -64,10 +64,8 @@ type pat = L | CL | D of string | C of string | S of string | RD of string | O o
64 64 let empty_token = {
65 65 orth="";corr_orth="";beg=0;len=0;next=0; token=Symbol ""; attrs=[]}
66 66  
67   -let config =
68   - try File.load_attr_val_pairs "config-tokenizer"
69   - with _ -> (print_endline "ENIAMtokenizer config file not found"; [])
  67 +let resource_path =
  68 + try Sys.getenv "ENIAM_RESOURCE_PATH"
  69 + with Not_found -> "/usr/share/eniam"
70 70  
71   -let mte_filename =
72   - try Xlist.assoc config "MTE_FILENAME"
73   - with Not_found -> (print_endline "ENIAMtokenizer MTE_FILENAME config variable undefined"; "")
  71 +let mte_filename = resource_path ^ "/tokenizer/mte.tab"
... ...
tokenizer/README
... ... @@ -6,7 +6,7 @@ ENIAMtokenizer is a library that provides a tokenizer for Polish.
6 6 Install
7 7 -------
8 8  
9   -ENIAMtokenizer requires OCaml version 4.02.3 compiler
  9 +ENIAMtokenizer requires OCaml version 4.02.3 compiler
10 10 together with Xlib library version 3.1 or later.
11 11  
12 12 In order to install type:
... ... @@ -20,6 +20,10 @@ In order to test library type:
20 20 make test
21 21 ./test
22 22  
  23 +By default ENIAMtokenizer looks for resources in /usr/share/eniam directory.
  24 +However this behaviour may be changed by setting end exporting ENIAM_RESOURCE_PATH
  25 +environment variable.
  26 +
23 27 Credits
24 28 -------
25 29 Copyright © 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
... ... @@ -47,4 +51,3 @@ GNU General Public License for more details.
47 51  
48 52 You should have received a copy of the GNU Lesser General Public License
49 53 along with this program. If not, see <http://www.gnu.org/licenses/>.
50   -
... ...
tokenizer/config-tokenizer deleted
1   -# Localization of definitions of multi-token-expressions
2   -MTE_FILENAME=/usr/share/eniam/resources/SGJP/mte_20151215.tab
tokenizer/eniam-tokenizer-1.0.tar.bz2
No preview for this file type
tokenizer/eniam-tokenizer-1.0/ENIAMacronyms.ml deleted
1   -(*
2   - * ENIAMtokenizer, a tokenizer for Polish
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
5   - *
6   - * This library is free software: you can redistribute it and/or modify
7   - * it under the terms of the GNU Lesser General Public License as published by
8   - * the Free Software Foundation, either version 3 of the License, or
9   - * (at your option) any later version.
10   - *
11   - * This library is distributed in the hope that it will be useful,
12   - * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   - * GNU General Public License for more details.
15   - *
16   - * You should have received a copy of the GNU Lesser General Public License
17   - * along with this program. If not, see <http://www.gnu.org/licenses/>.
18   - *)
19   -
20   -open ENIAMtokenizerTypes
21   -
22   -let mte_patterns =
23   - let lines = try File.load_lines mte_filename
24   - with _ -> (print_endline ("ENIAMtokenizer mte file " ^ mte_filename ^ " not found"); []) in
25   - let l = List.rev (Xlist.rev_map lines (fun line ->
26   - match Str.split (Str.regexp "\t") line with
27   - [orths; lemma; interp] -> Str.split (Str.regexp " ") orths, lemma, interp
28   - | _ -> failwith ("mte_patterns: " ^ line))) in
29   - List.rev (Xlist.rev_map l (fun (orths,lemma,interp) ->
30   - Xlist.map orths (fun orth -> O orth), (fun (_:token_record list) -> ENIAMtokens.make_lemma (lemma,interp))))
31   -
32   -
33   -let compose_lemma t lemma_suf interp =
34   - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ lemma_suf, interp)
35   -
36   -let compose_lemma3 t1 t2 t3 lemma_suf interp =
37   - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t1.token ^ ENIAMtokens.get_orth t2.token ^ ENIAMtokens.get_orth t3.token ^ lemma_suf, interp)
38   -
39   -let concat_orths l =
40   - String.concat "" (Xlist.map l (fun t -> t.orth))
41   -
42   -let ct l lemma interp =
43   - let beg = (List.hd l).beg in
44   - let t = List.hd (List.rev l) in
45   - let len = t.beg + t.len - beg in
46   - Token{empty_token with
47   - orth=concat_orths l;
48   - beg=beg;
49   - len=len;
50   - next=t.next;
51   - token=ENIAMtokens.make_lemma (lemma,interp);
52   - attrs=ENIAMtokens.merge_attrs l}
53   -
54   -let rec get_orth_prefix i l =
55   - if i = 0 then "",l else
56   - match l with
57   - c :: l -> let s,l = get_orth_prefix (i-1) l in c ^ s, l
58   - | [] -> failwith "get_orth_prefix"
59   -
60   -let make_sub_tokens t l =
61   - let n = Xlist.fold l 0 (fun n (i,_,_) -> n + i) in
62   - let orth = Xunicode.utf8_chars_of_utf8_string t.orth in
63   - if Xlist.size orth <> n then failwith "make_sub_tokens: invalid orth length" else
64   - let l,_,_,_ = Xlist.fold l ([],t.beg,t.len,orth) (fun (l,beg,remaining_len,orth) (i,lemma,interp) ->
65   - let orth,remaining_orth = get_orth_prefix i orth in
66   - let len = if beg mod factor = 0 then i * factor else ((i-1) * factor) + (beg mod factor) in
67   - if remaining_len = 0 then failwith "make_sub_tokens: invalid remaining_len" else
68   - let len = if len > remaining_len then remaining_len else len in
69   - Token{empty_token with
70   - orth=orth;
71   - beg=beg;
72   - len=len;
73   - next=beg+len;
74   - token=ENIAMtokens.make_lemma (lemma,interp);
75   - attrs=t.attrs} :: l,
76   - beg+len, remaining_len-len, remaining_orth) in
77   - l
78   -
79   -let st t l =
80   - let l = make_sub_tokens t l in
81   - match l with
82   - Token s :: l -> List.rev (Token{s with next=t.next} :: l)
83   - | _ -> failwith "st"
84   -
85   -let std t d l =
86   - let l = make_sub_tokens t l in
87   - match l with
88   - Token s :: l -> List.rev (Token{s with orth=s.orth^d.orth; len=d.beg+d.len-s.beg; next=d.next} :: l)
89   - | _ -> failwith "std"
90   -
91   -let acronym_patterns = [
92   - [L; S "-"; O "owscy"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns");
93   - [L; S "-"; O "owska"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns");
94   - [L; S "-"; O "owski"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns");
95   - [L; S "-"; O "owski"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns");
96   - [L; S "-"; O "owskich"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns");
97   - [L; S "-"; O "owskich"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
98   - [L; S "-"; O "owskich"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
99   - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");
100   - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");
101   - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns");
102   - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns");
103   - [L; S "-"; O "owskiego"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns");
104   - [L; S "-"; O "owskiego"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
105   - [L; S "-"; O "owskiej"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns");
106   - [L; S "-"; O "owskiej"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns");
107   - [L; S "-"; O "owskiej"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns");
108   - [L; S "-"; O "owskiemu"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
109   - [L; S "-"; O "owskim"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
110   - [L; S "-"; O "owskim"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
111   - [L; S "-"; O "owskim"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
112   - [L; S "-"; O "owskimi"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
113   - [L; S "-"; O "owsko"], (function [x;_;_] -> compose_lemma x "-owski" "adja" | _ -> failwith "acronym_patterns");
114   - [L; S "-"; O "owsko"], (function [x;_;_] -> compose_lemma x "-owsko" "adv:pos" | _ -> failwith "acronym_patterns");
115   - [L; S "-"; O "owsku"], (function [x;_;_] -> compose_lemma x "-owski" "adjp" | _ -> failwith "acronym_patterns");
116   - [L; S "-"; O "owską"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns");
117   - [L; S "-"; O "owską"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns");
118   - [L; S "-"; O "wscy"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns");
119   - [L; S "-"; O "wska"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns");
120   - [L; S "-"; O "wski"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns");
121   - [L; S "-"; O "wski"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns");
122   - [L; S "-"; O "wskich"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns");
123   - [L; S "-"; O "wskich"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
124   - [L; S "-"; O "wskich"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
125   - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");
126   - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");
127   - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns");
128   - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns");
129   - [L; S "-"; O "wskiego"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns");
130   - [L; S "-"; O "wskiego"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
131   - [L; S "-"; O "wskiej"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns");
132   - [L; S "-"; O "wskiej"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns");
133   - [L; S "-"; O "wskiej"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns");
134   - [L; S "-"; O "wskiemu"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
135   - [L; S "-"; O "wskim"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
136   - [L; S "-"; O "wskim"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
137   - [L; S "-"; O "wskim"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
138   - [L; S "-"; O "wskimi"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
139   - [L; S "-"; O "wsko"], (function [x;_;_] -> compose_lemma x "-wski" "adja" | _ -> failwith "acronym_patterns");
140   - [L; S "-"; O "wsko"], (function [x;_;_] -> compose_lemma x "-wsko" "adv:pos" | _ -> failwith "acronym_patterns");
141   - [L; S "-"; O "wsku"], (function [x;_;_] -> compose_lemma x "-wski" "adjp" | _ -> failwith "acronym_patterns");
142   - [L; S "-"; O "wską"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns");
143   - [L; S "-"; O "wską"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns");
144   - [L; S "’"; O "owa"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns");
145   - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");
146   - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");
147   - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns");
148   - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns");
149   - [L; S "’"; O "owego"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns");
150   - [L; S "’"; O "owego"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
151   - [L; S "’"; O "owej"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns");
152   - [L; S "’"; O "owej"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns");
153   - [L; S "’"; O "owej"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns");
154   - [L; S "’"; O "owemu"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
155   - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns");
156   - [L; S "’"; O "owo"], (function [x;_;_] -> compose_lemma x "’owo" "adv:pos" | _ -> failwith "acronym_patterns");
157   - [L; S "’"; O "owo"], (function [x;_;_] -> compose_lemma x "’owy" "adja" | _ -> failwith "acronym_patterns");
158   - [L; S "’"; O "owy"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns");
159   - [L; S "’"; O "owy"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns");
160   - [L; S "’"; O "owych"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns");
161   - [L; S "’"; O "owych"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
162   - [L; S "’"; O "owych"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
163   - [L; S "’"; O "owym"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
164   - [L; S "’"; O "owym"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
165   - [L; S "’"; O "owym"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
166   - [L; S "’"; O "owymi"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
167   - [L; S "’"; O "ową"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns");
168   - [L; S "’"; O "ową"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns");
169   - [L; S "’"; O "owscy"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns");
170   - [L; S "’"; O "owska"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns");
171   - [L; S "’"; O "owski"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns");
172   - [L; S "’"; O "owski"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns");
173   - [L; S "’"; O "owskich"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns");
174   - [L; S "’"; O "owskich"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
175   - [L; S "’"; O "owskich"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
176   - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");
177   - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");
178   - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns");
179   - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns");
180   - [L; S "’"; O "owskiego"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns");
181   - [L; S "’"; O "owskiego"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
182   - [L; S "’"; O "owskiej"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns");
183   - [L; S "’"; O "owskiej"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns");
184   - [L; S "’"; O "owskiej"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns");
185   - [L; S "’"; O "owskiemu"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
186   - [L; S "’"; O "owskim"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
187   - [L; S "’"; O "owskim"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
188   - [L; S "’"; O "owskim"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
189   - [L; S "’"; O "owskimi"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
190   - [L; S "’"; O "owsko"], (function [x;_;_] -> compose_lemma x "’owski" "adja" | _ -> failwith "acronym_patterns");
191   - [L; S "’"; O "owsko"], (function [x;_;_] -> compose_lemma x "’owsko" "adv:pos" | _ -> failwith "acronym_patterns");
192   - [L; S "’"; O "owsku"], (function [x;_;_] -> compose_lemma x "’owski" "adjp" | _ -> failwith "acronym_patterns");
193   - [L; S "’"; O "owską"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns");
194   - [L; S "’"; O "owską"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns");
195   - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");
196   - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns");
197   - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");
198   - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns");
199   - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");
200   - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");
201   - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns");
202   - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns");
203   - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:n2" | _ -> failwith "acronym_patterns");
204   - [CL; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:loc:f" | _ -> failwith "acronym_patterns");
205   - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");
206   - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns");
207   - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns");
208   - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:n2" | _ -> failwith "acronym_patterns");
209   - [CL; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:inst:f" | _ -> failwith "acronym_patterns");
210   - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "T" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");
211   - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "T" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");
212   - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "TA" "subst:sg:dat:f" | _ -> failwith "acronym_patterns");
213   - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "TA" "subst:sg:loc:f" | _ -> failwith "acronym_patterns");
214   - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");
215   - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");
216   - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");
217   - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");
218   - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");
219   - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");
220   - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");
221   - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");
222   - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");
223   - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");
224   - [CL; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns");
225   - [CL; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns");
226   - [CL; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns");
227   - [L; S "-"; O "ecie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");
228   - [L; S "-"; O "ecie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");
229   - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");
230   - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns");
231   - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");
232   - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:n2" | _ -> failwith "acronym_patterns");
233   - [L; S "-"; O "etach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns");
234   - [L; S "-"; O "etami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns");
235   - [L; S "-"; O "etem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");
236   - [L; S "-"; O "etom"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns");
237   - [L; S "-"; O "etowi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns");
238   - [L; S "-"; O "etu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");
239   - [L; S "-"; O "ety"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");
240   - [L; S "-"; O "ety"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");
241   - [L; S "-"; O "ety"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");
242   - [L; S "-"; O "etów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");
243   - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");
244   - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");
245   - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");
246   - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");
247   - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");
248   - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");
249   - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");
250   - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");
251   - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");
252   - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:gen:f" | _ -> failwith "acronym_patterns");
253   - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns");
254   - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns");
255   - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns");
256   - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");
257   - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns");
258   - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");
259   - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");
260   - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns");
261   - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");
262   - [CL; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns");
263   - [CL; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns");
264   - [L; S "-"; O "iem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns");
265   - [L; S "-"; O "iem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");
266   - [CL; S "-"; O "o"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:voc:f" | _ -> failwith "acronym_patterns");
267   - [L; S "-"; O "ocie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");
268   - [L; S "-"; O "ocie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");
269   - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");
270   - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns");
271   - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns");
272   - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:n2" | _ -> failwith "acronym_patterns");
273   - [CL; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:dat:f" | _ -> failwith "acronym_patterns");
274   - [L; S "-"; O "otach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns");
275   - [L; S "-"; O "otami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns");
276   - [L; S "-"; O "otem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");
277   - [L; S "-"; O "otom"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns");
278   - [L; S "-"; O "otowi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns");
279   - [L; S "-"; O "otu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");
280   - [L; S "-"; O "oty"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");
281   - [L; S "-"; O "oty"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");
282   - [L; S "-"; O "oty"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");
283   - [L; S "-"; O "otów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");
284   - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");
285   - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns");
286   - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns");
287   - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:n2" | _ -> failwith "acronym_patterns");
288   - [L; S "-"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");
289   - [L; S "-"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");
290   - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns");
291   - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns");
292   - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");
293   - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:n2" | _ -> failwith "acronym_patterns");
294   - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");
295   - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns");
296   - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");
297   - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");
298   - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns");
299   - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");
300   - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");
301   - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");
302   - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");
303   - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");
304   - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:n2" | _ -> failwith "acronym_patterns");
305   - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");
306   - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");
307   - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:n2" | _ -> failwith "acronym_patterns");
308   - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");
309   - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");
310   - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:n2" | _ -> failwith "acronym_patterns");
311   - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns");
312   - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns");
313   - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns");
314   - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns");
315   - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns");
316   - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");
317   - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:n2" | _ -> failwith "acronym_patterns");
318   - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns");
319   - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");
320   - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:n2" | _ -> failwith "acronym_patterns");
321   - [L; S "-"; O "zie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");
322   - [L; S "-"; O "zie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");
323   - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");
324   - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");
325   - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns");
326   - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");
327   - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:n2" | _ -> failwith "acronym_patterns");
328   - [CL; S "-"; O "ą"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:inst:f" | _ -> failwith "acronym_patterns");
329   - [CL; S "-"; O "ę"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:acc:f" | _ -> failwith "acronym_patterns");
330   - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");
331   - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns");
332   - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");
333   - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns");
334   - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");
335   - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");
336   - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns");
337   - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns");
338   - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:loc:p3" | _ -> failwith "acronym_patterns");
339   - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");
340   - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns");
341   - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns");
342   - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:inst:p3" | _ -> failwith "acronym_patterns");
343   - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");
344   - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");
345   - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");
346   - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");
347   - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");
348   - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");
349   - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");
350   - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");
351   - [L; S "’"; O "ego"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");
352   - [L; S "’"; O "ego"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");
353   - [L; S "’"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");
354   - [L; S "’"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns");
355   - [L; S "’"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");
356   - [L; S "’"; O "emu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");
357   - [L; S "’"; O "go"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");
358   - [L; S "’"; O "go"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");
359   - [L; S "’"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns");
360   - [L; S "’"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");
361   - [L; S "’"; O "m"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");
362   - [L; S "’"; O "m"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");
363   - [L; S "’"; O "mu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");
364   - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");
365   - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns");
366   - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns");
367   - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:dat:p3" | _ -> failwith "acronym_patterns");
368   - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");
369   - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns");
370   - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns");
371   - [L; S "’"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");
372   - [L; S "’"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");
373   - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");
374   - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");
375   - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns");
376   - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");
377   - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");
378   - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns");
379   - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");
380   - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");
381   - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");
382   - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");
383   - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");
384   - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");
385   - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");
386   - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");
387   - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");
388   - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");
389   - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");
390   - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");
391   - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns");
392   - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");
393   - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:gen:p3" | _ -> failwith "acronym_patterns");
394   - [L; S "-"; O "ista"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns");
395   - [L; S "-"; O "istach"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");
396   - [L; S "-"; O "istami"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");
397   - [L; S "-"; O "isto"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");
398   - [L; S "-"; O "istom"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");
399   - [L; S "-"; O "isty"], (function [x;_;_] -> compose_lemma x "-ista" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");
400   - [L; S "-"; O "isty"], (function [x;_;_] -> compose_lemma x "-ista" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");
401   - [L; S "-"; O "isty"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");
402   - [L; S "-"; O "istów"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");
403   - [L; S "-"; O "istów"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");
404   - [L; S "-"; O "istą"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");
405   - [L; S "-"; O "istę"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");
406   - [L; S "-"; O "iści"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");
407   - [L; S "-"; O "iści"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");
408   - [L; S "-"; O "iście"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");
409   - [L; S "-"; O "iście"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");
410   - [L; S "-"; O "owca"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");
411   - [L; S "-"; O "owca"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");
412   - [L; S "-"; O "owcach"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");
413   - [L; S "-"; O "owcami"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");
414   - [L; S "-"; O "owce"], (function [x;_;_] -> compose_lemma x "-owiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");
415   - [L; S "-"; O "owce"], (function [x;_;_] -> compose_lemma x "-owiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");
416   - [L; S "-"; O "owcem"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");
417   - [L; S "-"; O "owcom"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");
418   - [L; S "-"; O "owcowi"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");
419   - [L; S "-"; O "owcu"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");
420   - [L; S "-"; O "owcu"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");
421   - [L; S "-"; O "owcy"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");
422   - [L; S "-"; O "owcy"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");
423   - [L; S "-"; O "owcze"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");
424   - [L; S "-"; O "owców"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");
425   - [L; S "-"; O "owców"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");
426   - [L; S "-"; O "owiec"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns");
427   - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:acc:f" | _ -> failwith "acronym_patterns");
428   - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:gen:f" | _ -> failwith "acronym_patterns");
429   - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:nom:f" | _ -> failwith "acronym_patterns");
430   - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:voc:f" | _ -> failwith "acronym_patterns");
431   - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:dat:f" | _ -> failwith "acronym_patterns");
432   - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:gen:f" | _ -> failwith "acronym_patterns");
433   - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:loc:f" | _ -> failwith "acronym_patterns");
434   - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:voc:f" | _ -> failwith "acronym_patterns");
435   - [L; S "-"; O "owskościach"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:loc:f" | _ -> failwith "acronym_patterns");
436   - [L; S "-"; O "owskościami"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:inst:f" | _ -> failwith "acronym_patterns");
437   - [L; S "-"; O "owskościom"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:dat:f" | _ -> failwith "acronym_patterns");
438   - [L; S "-"; O "owskością"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:inst:f" | _ -> failwith "acronym_patterns");
439   - [L; S "-"; O "owskość"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:acc:f" | _ -> failwith "acronym_patterns");
440   - [L; S "-"; O "owskość"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:nom:f" | _ -> failwith "acronym_patterns");
441   - [L; S "-"; O "wca"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");
442   - [L; S "-"; O "wca"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");
443   - [L; S "-"; O "wcach"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");
444   - [L; S "-"; O "wcami"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");
445   - [L; S "-"; O "wce"], (function [x;_;_] -> compose_lemma x "-wiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");
446   - [L; S "-"; O "wce"], (function [x;_;_] -> compose_lemma x "-wiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");
447   - [L; S "-"; O "wcem"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");
448   - [L; S "-"; O "wcom"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");
449   - [L; S "-"; O "wcowi"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");
450   - [L; S "-"; O "wcu"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");
451   - [L; S "-"; O "wcu"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");
452   - [L; S "-"; O "wcy"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");
453   - [L; S "-"; O "wcy"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");
454   - [L; S "-"; O "wców"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");
455   - [L; S "-"; O "wców"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");
456   - [L; S "-"; O "wiec"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns");
457   - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:acc:f" | _ -> failwith "acronym_patterns");
458   - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:gen:f" | _ -> failwith "acronym_patterns");
459   - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:nom:f" | _ -> failwith "acronym_patterns");
460   - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:voc:f" | _ -> failwith "acronym_patterns");
461   - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:dat:f" | _ -> failwith "acronym_patterns");
462   - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:gen:f" | _ -> failwith "acronym_patterns");
463   - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:loc:f" | _ -> failwith "acronym_patterns");
464   - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:voc:f" | _ -> failwith "acronym_patterns");
465   - [L; S "’"; O "owościach"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:loc:f" | _ -> failwith "acronym_patterns");
466   - [L; S "’"; O "owościami"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:inst:f" | _ -> failwith "acronym_patterns");
467   - [L; S "’"; O "owościom"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:dat:f" | _ -> failwith "acronym_patterns");
468   - [L; S "’"; O "owością"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:inst:f" | _ -> failwith "acronym_patterns");
469   - [L; S "’"; O "owość"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:acc:f" | _ -> failwith "acronym_patterns");
470   - [L; S "’"; O "owość"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:nom:f" | _ -> failwith "acronym_patterns");
471   -
472   - [L; S "-"; L; S "-"; O "owscy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns");
473   - [L; S "-"; L; S "-"; O "owska"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns");
474   - [L; S "-"; L; S "-"; O "owski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns");
475   - [L; S "-"; L; S "-"; O "owski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns");
476   - [L; S "-"; L; S "-"; O "owskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns");
477   - [L; S "-"; L; S "-"; O "owskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
478   - [L; S "-"; L; S "-"; O "owskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
479   - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");
480   - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");
481   - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns");
482   - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns");
483   - [L; S "-"; L; S "-"; O "owskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns");
484   - [L; S "-"; L; S "-"; O "owskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
485   - [L; S "-"; L; S "-"; O "owskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns");
486   - [L; S "-"; L; S "-"; O "owskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns");
487   - [L; S "-"; L; S "-"; O "owskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns");
488   - [L; S "-"; L; S "-"; O "owskiemu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
489   - [L; S "-"; L; S "-"; O "owskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
490   - [L; S "-"; L; S "-"; O "owskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
491   - [L; S "-"; L; S "-"; O "owskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
492   - [L; S "-"; L; S "-"; O "owskimi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
493   - [L; S "-"; L; S "-"; O "owsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adja" | _ -> failwith "acronym_patterns");
494   - [L; S "-"; L; S "-"; O "owsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owsko" "adv:pos" | _ -> failwith "acronym_patterns");
495   - [L; S "-"; L; S "-"; O "owsku"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adjp" | _ -> failwith "acronym_patterns");
496   - [L; S "-"; L; S "-"; O "owską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns");
497   - [L; S "-"; L; S "-"; O "owską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns");
498   - [L; S "-"; L; S "-"; O "wscy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns");
499   - [L; S "-"; L; S "-"; O "wska"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns");
500   - [L; S "-"; L; S "-"; O "wski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns");
501   - [L; S "-"; L; S "-"; O "wski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns");
502   - [L; S "-"; L; S "-"; O "wskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns");
503   - [L; S "-"; L; S "-"; O "wskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
504   - [L; S "-"; L; S "-"; O "wskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
505   - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");
506   - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");
507   - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns");
508   - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns");
509   - [L; S "-"; L; S "-"; O "wskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns");
510   - [L; S "-"; L; S "-"; O "wskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
511   - [L; S "-"; L; S "-"; O "wskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns");
512   - [L; S "-"; L; S "-"; O "wskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns");
513   - [L; S "-"; L; S "-"; O "wskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns");
514   - [L; S "-"; L; S "-"; O "wskiemu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
515   - [L; S "-"; L; S "-"; O "wskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
516   - [L; S "-"; L; S "-"; O "wskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
517   - [L; S "-"; L; S "-"; O "wskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");
518   - [L; S "-"; L; S "-"; O "wskimi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");
519   - [L; S "-"; L; S "-"; O "wsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adja" | _ -> failwith "acronym_patterns");
520   - [L; S "-"; L; S "-"; O "wsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wsko" "adv:pos" | _ -> failwith "acronym_patterns");
521   - [L; S "-"; L; S "-"; O "wsku"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adjp" | _ -> failwith "acronym_patterns");
522   - [L; S "-"; L; S "-"; O "wską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns");
523   - [L; S "-"; L; S "-"; O "wską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns");
524   - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");
525   - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns");
526   - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");
527   - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns");
528   - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");
529   - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");
530   - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns");
531   - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns");
532   - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:n2" | _ -> failwith "acronym_patterns");
533   - [CL; S "-"; CL; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:loc:f" | _ -> failwith "acronym_patterns");
534   - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");
535   - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns");
536   - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns");
537   - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:n2" | _ -> failwith "acronym_patterns");
538   - [CL; S "-"; CL; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:inst:f" | _ -> failwith "acronym_patterns");
539   - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "T" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");
540   - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "T" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");
541   - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "TA" "subst:sg:dat:f" | _ -> failwith "acronym_patterns");
542   - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "TA" "subst:sg:loc:f" | _ -> failwith "acronym_patterns");
543   - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");
544   - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");
545   - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");
546   - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");
547   - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");
548   - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");
549   - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");
550   - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");
551   - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");
552   - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");
553   - [CL; S "-"; CL; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns");
554   - [CL; S "-"; CL; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns");
555   - [CL; S "-"; CL; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns");
556   - [L; S "-"; L; S "-"; O "ecie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");
557   - [L; S "-"; L; S "-"; O "ecie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");
558   - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");
559   - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns");
560   - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");
561   - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:n2" | _ -> failwith "acronym_patterns");
562   - [L; S "-"; L; S "-"; O "etach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns");
563   - [L; S "-"; L; S "-"; O "etami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns");
564   - [L; S "-"; L; S "-"; O "etem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");
565   - [L; S "-"; L; S "-"; O "etom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns");
566   - [L; S "-"; L; S "-"; O "etowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns");
567   - [L; S "-"; L; S "-"; O "etu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");
568   - [L; S "-"; L; S "-"; O "ety"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");
569   - [L; S "-"; L; S "-"; O "ety"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");
570   - [L; S "-"; L; S "-"; O "ety"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");
571   - [L; S "-"; L; S "-"; O "etów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");
572   - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");
573   - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");
574   - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");
575   - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");
576   - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");
577   - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");
578   - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");
579   - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");
580   - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");
581   - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:gen:f" | _ -> failwith "acronym_patterns");
582   - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns");
583   - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns");
584   - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns");
585   - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");
586   - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns");
587   - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");
588   - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");
589   - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns");
590   - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");
591   - [CL; S "-"; CL; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns");
592   - [CL; S "-"; CL; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns");
593   - [L; S "-"; L; S "-"; O "iem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns");
594   - [L; S "-"; L; S "-"; O "iem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");
595   - [CL; S "-"; CL; S "-"; O "o"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:voc:f" | _ -> failwith "acronym_patterns");
596   - [L; S "-"; L; S "-"; O "ocie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");
597   - [L; S "-"; L; S "-"; O "ocie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");
598   - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");
599   - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns");
600   - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns");
601   - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:n2" | _ -> failwith "acronym_patterns");
602   - [CL; S "-"; CL; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:dat:f" | _ -> failwith "acronym_patterns");
603   - [L; S "-"; L; S "-"; O "otach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns");
604   - [L; S "-"; L; S "-"; O "otami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns");
605   - [L; S "-"; L; S "-"; O "otem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");
606   - [L; S "-"; L; S "-"; O "otom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns");
607   - [L; S "-"; L; S "-"; O "otowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns");
608   - [L; S "-"; L; S "-"; O "otu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");
609   - [L; S "-"; L; S "-"; O "oty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");
610   - [L; S "-"; L; S "-"; O "oty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");
611   - [L; S "-"; L; S "-"; O "oty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");
612   - [L; S "-"; L; S "-"; O "otów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");
613   - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");
614   - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns");
615   - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns");
616   - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:n2" | _ -> failwith "acronym_patterns");
617   - [L; S "-"; L; S "-"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");
618   - [L; S "-"; L; S "-"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");
619   - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns");
620   - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns");
621   - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");
622   - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:n2" | _ -> failwith "acronym_patterns");
623   - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");
624   - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns");
625   - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");
626   - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");
627   - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns");
628   - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");
629   - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");
630   - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");
631   - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");
632   - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");
633   - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:n2" | _ -> failwith "acronym_patterns");
634   - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");
635   - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");
636   - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:n2" | _ -> failwith "acronym_patterns");
637   - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");
638   - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");
639   - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:n2" | _ -> failwith "acronym_patterns");
640   - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns");
641   - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns");
642   - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns");
643   - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns");
644   - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns");
645   - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");
646   - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:n2" | _ -> failwith "acronym_patterns");
647   - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns");
648   - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");
649   - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:n2" | _ -> failwith "acronym_patterns");
650   - [L; S "-"; L; S "-"; O "zie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");
651   - [L; S "-"; L; S "-"; O "zie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");
652   - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");
653   - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");
654   - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns");
655   - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");
656   - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:n2" | _ -> failwith "acronym_patterns");
657   - [CL; S "-"; CL; S "-"; O "ą"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:inst:f" | _ -> failwith "acronym_patterns");
658   - [CL; S "-"; CL; S "-"; O "ę"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:acc:f" | _ -> failwith "acronym_patterns");
659   - [L; S "-"; L; S "-"; O "ista"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns");
660   - [L; S "-"; L; S "-"; O "istach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");
661   - [L; S "-"; L; S "-"; O "istami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");
662   - [L; S "-"; L; S "-"; O "isto"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");
663   - [L; S "-"; L; S "-"; O "istom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");
664   - [L; S "-"; L; S "-"; O "isty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");
665   - [L; S "-"; L; S "-"; O "isty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");
666   - [L; S "-"; L; S "-"; O "isty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");
667   - [L; S "-"; L; S "-"; O "istów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");
668   - [L; S "-"; L; S "-"; O "istów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");
669   - [L; S "-"; L; S "-"; O "istą"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");
670   - [L; S "-"; L; S "-"; O "istę"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");
671   - [L; S "-"; L; S "-"; O "iści"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");
672   - [L; S "-"; L; S "-"; O "iści"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");
673   - [L; S "-"; L; S "-"; O "iście"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");
674   - [L; S "-"; L; S "-"; O "iście"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");
675   - [L; S "-"; L; S "-"; O "owca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");
676   - [L; S "-"; L; S "-"; O "owca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");
677   - [L; S "-"; L; S "-"; O "owcach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");
678   - [L; S "-"; L; S "-"; O "owcami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");
679   - [L; S "-"; L; S "-"; O "owce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");
680   - [L; S "-"; L; S "-"; O "owce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");
681   - [L; S "-"; L; S "-"; O "owcem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");
682   - [L; S "-"; L; S "-"; O "owcom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");
683   - [L; S "-"; L; S "-"; O "owcowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");
684   - [L; S "-"; L; S "-"; O "owcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");
685   - [L; S "-"; L; S "-"; O "owcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");
686   - [L; S "-"; L; S "-"; O "owcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");
687   - [L; S "-"; L; S "-"; O "owcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");
688   - [L; S "-"; L; S "-"; O "owcze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");
689   - [L; S "-"; L; S "-"; O "owców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");
690   - [L; S "-"; L; S "-"; O "owców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");
691   - [L; S "-"; L; S "-"; O "owiec"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns");
692   - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:acc:f" | _ -> failwith "acronym_patterns");
693   - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:gen:f" | _ -> failwith "acronym_patterns");
694   - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:nom:f" | _ -> failwith "acronym_patterns");
695   - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:voc:f" | _ -> failwith "acronym_patterns");
696   - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:dat:f" | _ -> failwith "acronym_patterns");
697   - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:gen:f" | _ -> failwith "acronym_patterns");
698   - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:loc:f" | _ -> failwith "acronym_patterns");
699   - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:voc:f" | _ -> failwith "acronym_patterns");
700   - [L; S "-"; L; S "-"; O "owskościach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:loc:f" | _ -> failwith "acronym_patterns");
701   - [L; S "-"; L; S "-"; O "owskościami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:inst:f" | _ -> failwith "acronym_patterns");
702   - [L; S "-"; L; S "-"; O "owskościom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:dat:f" | _ -> failwith "acronym_patterns");
703   - [L; S "-"; L; S "-"; O "owskością"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:inst:f" | _ -> failwith "acronym_patterns");
704   - [L; S "-"; L; S "-"; O "owskość"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:acc:f" | _ -> failwith "acronym_patterns");
705   - [L; S "-"; L; S "-"; O "owskość"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:nom:f" | _ -> failwith "acronym_patterns");
706   - [L; S "-"; L; S "-"; O "wca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");
707   - [L; S "-"; L; S "-"; O "wca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");
708   - [L; S "-"; L; S "-"; O "wcach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");
709   - [L; S "-"; L; S "-"; O "wcami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");
710   - [L; S "-"; L; S "-"; O "wce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");
711   - [L; S "-"; L; S "-"; O "wce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");
712   - [L; S "-"; L; S "-"; O "wcem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");
713   - [L; S "-"; L; S "-"; O "wcom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");
714   - [L; S "-"; L; S "-"; O "wcowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");
715   - [L; S "-"; L; S "-"; O "wcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");
716   - [L; S "-"; L; S "-"; O "wcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");
717   - [L; S "-"; L; S "-"; O "wcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");
718   - [L; S "-"; L; S "-"; O "wcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");
719   - [L; S "-"; L; S "-"; O "wców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");
720   - [L; S "-"; L; S "-"; O "wców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");
721   - [L; S "-"; L; S "-"; O "wiec"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns");
722   - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");
723   - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns");
724   - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");
725   - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns");
726   - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");
727   - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");
728   - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns");
729   - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns");
730   - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:loc:p3" | _ -> failwith "acronym_patterns");
731   - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");
732   - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns");
733   - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns");
734   - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:inst:p3" | _ -> failwith "acronym_patterns");
735   - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");
736   - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");
737   - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");
738   - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");
739   - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");
740   - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");
741   - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");
742   - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");
743   - [L; S "-"; L; S "’"; O "ego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");
744   - [L; S "-"; L; S "’"; O "ego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");
745   - [L; S "-"; L; S "’"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");
746   - [L; S "-"; L; S "’"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns");
747   - [L; S "-"; L; S "’"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");
748   - [L; S "-"; L; S "’"; O "emu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");
749   - [L; S "-"; L; S "’"; O "go"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");
750   - [L; S "-"; L; S "’"; O "go"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");
751   - [L; S "-"; L; S "’"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns");
752   - [L; S "-"; L; S "’"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");
753   - [L; S "-"; L; S "’"; O "m"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");
754   - [L; S "-"; L; S "’"; O "m"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");
755   - [L; S "-"; L; S "’"; O "mu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");
756   - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");
757   - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns");
758   - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns");
759   - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:dat:p3" | _ -> failwith "acronym_patterns");
760   - [L; S "-"; L; S "’"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");
761   - [L; S "-"; L; S "’"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns");
762   - [L; S "-"; L; S "’"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns");
763   - [L; S "-"; L; S "’"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");
764   - [L; S "-"; L; S "’"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");
765   - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");
766   - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");
767   - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns");
768   - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");
769   - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");
770   - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns");
771   - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");
772   - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");
773   - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");
774   - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");
775   - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");
776   - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");
777   - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");
778   - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");
779   - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");
780   - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");
781   - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");
782   - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");
783   - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns");
784   - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");
785   - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:gen:p3" | _ -> failwith "acronym_patterns");
786   - ]
787   -
788   -let name_patterns = [
789   - [O "O"; S "’"; L], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns");
790   - [O "d"; S "’"; L], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns");
791   - [O "l"; S "’"; L], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns");
792   - [L; S "’"; O "s"], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns");
793   - [L; S "’"; O "sa"], (function [x;_;_] -> compose_lemma x "’s" "subst:sg:gen.acc:_" | _ -> failwith "name_patterns");
794   - ]
795   -
796   -let abr_patterns = [
797   - [O "b"; S "."; O "u"; S "."], (function [a;b;c;d] -> [ct [a;b] "bez" "prep:gen:nwok"; ct [c;d] "uwaga" "subst:pl:gen:f"] | _ -> failwith "abr_patterns");
798   - [O "b"; S "."; O "zm"; S "."], (function [a;b;c;d] -> [ct [a;b] "bez" "prep:gen:nwok"; ct [c;d] "zmiana" "subst:pl:gen:f"] | _ -> failwith "abr_patterns");
799   - [O "blm"], (function [a] -> st a [1,"bez","prep:gen:nwok";1,"liczba","subst:sg:gen:f";1,"mnogi","adj:sg:gen:f:pos"] | _ -> failwith "abr_patterns");
800   - [O "blp"], (function [a] -> st a [1,"bez","prep:gen:nwok";1,"liczba","subst:sg:gen:f";1,"pojedynczy","adj:sg:gen:f:pos"] | _ -> failwith "abr_patterns");
801   - [O "błp"; S "."], (function [a;b] -> std a b [2,"błogosławiony","adj:sg:gen:f:pos";1,"pamięć","subst:sg:gen:f"] | _ -> failwith "abr_patterns");
802   - [O "bm"], (function [a] -> st a [1,"bieżący","adj:sg:$C:m3:pos";1,"miesiąc","subst:sg:$C:m3"] | _ -> failwith "abr_patterns");
803   - [O "bm"; S "."], (function [a;b] -> std a b [1,"bieżący","adj:sg:$C:m3:pos";1,"miesiąc","subst:sg:$C:m3"] | _ -> failwith "abr_patterns");
804   - [O "bp"; S "."], (function [a;b] -> std a b [1,"błogosławiony","adj:sg:gen:f:pos";1,"pamięć","subst:sg:gen:f"] | _ -> failwith "abr_patterns");
805   - [O "br"], (function [a] -> st a [1,"bieżący","adj:sg:$C:m3:pos";1,"rok","subst:sg:$C:m3"] | _ -> failwith "abr_patterns");
806   - [O "br"; S "."], (function [a;b] -> std a b [1,"bieżący","adj:sg:$C:m3:pos";1,"rok","subst:sg:$C:m3"] | _ -> failwith "abr_patterns");
807   - [O "c"; S "."; O "d"; S "."; O "n"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "ciąg" "subst:sg:nom:m3"; ct [c;d] "daleki" "adj:sg:nom:m3:com"; ct [e;f] "nastąpić" "fin:sg:ter:perf"] | _ -> failwith "abr_patterns");
808   - [O "ccm"], (function [a] -> st a [1,"sześcienny","adj:_:$C:m3:pos";2,"centymetr","subst:_:$C:m3"] | _ -> failwith "abr_patterns");
809   - [O "cd"; S "."], (function [a;b] -> std a b [1,"ciąg","subst:sg:nom:m3";1,"daleki","adj:sg:nom:m3:com"] | _ -> failwith "abr_patterns");
810   - [O "cdn"; S "."], (function [a;b] -> std a b [1,"ciąg","subst:sg:nom:m3";1,"daleki","adj:sg:nom:m3:com";1,"nastąpić","fin:sg:ter:perf"] | _ -> failwith "abr_patterns");
811   - [O "cm"; O "3"], (function [a;b] -> [ct [a] "centymetr" "subst:_:$C:m3"; ct [b] "sześcienny" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns");
812   - [O "dcn"; S "."], (function [a;b] -> std a b [1,"daleki","adj:sg:nom:m3:com";1,"ciąg","subst:sg:nom:m3";1,"nastąpić","fin:sg:ter:perf"] | _ -> failwith "abr_patterns");
813   - [O "dm"; O "3"], (function [a;b] -> [ct [a] "decymetr" "subst:_:$C:m3"; ct [b] "sześcienny" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns");
814   - [O "ds"; S "."], (function [a;b] -> std a b [1,"do","prep:gen";1,"sprawa","subst:pl:gen:f"] | _ -> failwith "abr_patterns");
815   - [O "d"; O "/"; O "s"], (function [a;b;c] -> [ct [a;b] "do" "prep:gen"; ct [c] "sprawa" "subst:pl:gen:f"] | _ -> failwith "abr_patterns");
816   - [O "itd"; S "."], (function [a;b] -> std a b [1,"i","conj";1,"tak","adv:pos";1,"daleko","adv:com"] | _ -> failwith "abr_patterns");
817   - [O "itede"; S "."], (function [a;b] -> std a b [1,"i","conj";2,"tak","adv:pos";2,"daleko","adv:com"] | _ -> failwith "abr_patterns");
818   - [O "itp"; S "."], (function [a;b] -> std a b [1,"i","conj";1,"tym","adv";1,"podobny","adj:pl:nom:_:pos"] | _ -> failwith "abr_patterns");
819   - [O "jw"; S "."], (function [a;b] -> std a b [1,"jak","adv:pos";1,"wysoko","adv:com"] | _ -> failwith "abr_patterns");
820   - [O "JWP"], (function [a] -> st a [1,"jaśnie","adv:pos";1,"wielmożny","adj:_:$C:m1:pos";1,"pan","subst:_:$C:m1"] | _ -> failwith "abr_patterns");
821   - [O "JWP"], (function [a] -> st a [1,"jaśnie","adv:pos";1,"wielmożny","adj:_:$C:f:pos";1,"pani","subst:_:$C:f"] | _ -> failwith "abr_patterns");
822   - [O "km"; S "."; O "2"], (function [a;b;c] -> [ct [a;b] "kilometr" "subst:_:$C:m3"; ct [c] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns");
823   - [O "km"; O "2"], (function [a;b] -> [ct [a] "kilometr" "subst:_:$C:m3"; ct [b] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns");
824   - [O "km"; O "²"], (function [a;b] -> [ct [a] "kilometr" "subst:_:$C:m3"; ct [b] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns");
825   - [O "lm"; S "."], (function [a;b] -> std a b [1,"liczba","subst:sg:$C:f";1,"mnogi","adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns");
826   - [O "lp"; S "."], (function [a;b] -> std a b [1,"liczba","subst:sg:$C:f";1,"pojedynczy","adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns");
827   - [O "m"; S "."; O "in"; S "."], (function [a;b;c;d] -> [ct [a;b] "między" "prep:inst"; ct [c;d] "inny" "adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns");
828   - [O "m"; S "."; O "in"], (function [a;b;c] -> [ct [a;b] "między" "prep:inst"; ct [c] "inny" "adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns");
829   - [O "m"; S "."; O "inn"; S "."], (function [a;b;c;d] -> [ct [a;b] "między" "prep:inst"; ct [c;d] "inny" "adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns");
830   - [O "m"; S "."; O "st"; S "."], (function [a;b;c;d] -> [ct [a;b] "miasto" "subst:_:$C:n2"; ct [c;d] "stołeczny" "adj:_:$C:n2:pos"] | _ -> failwith "abr_patterns");
831   - [O "m"; O "^"; O "2"], (function [a;b;c] -> [ct [a] "metr" "subst:_:$C:m3"; ct [b;c] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns");
832   - [O "m"; O "2"], (function [a;b] -> [ct [a] "metr" "subst:_:$C:m3"; ct [b] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns");
833   - [O "m"; O "3"], (function [a;b] -> [ct [a] "metr" "subst:_:$C:m3"; ct [b] "sześcienny" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns");
834   - [O "min"; S "."], (function [a;b] -> std a b [1,"między","prep:inst";2,"inny","adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns");
835   - [O "mkw"; S "."], (function [a;b] -> std a b [1,"metr","subst:_:$C:m3";2,"kwadratowy","adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns");
836   - [O "n"; S "."; O "e"; S "."], (function [a;b;c;d] -> [ct [a;b] "nasz" "adj:sg:gen:f:pos"; ct [c;d] "era" "subst:sg:gen:f"] | _ -> failwith "abr_patterns");
837   - [O "n"; S "."; O "p"; S "."; O "m"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "nad" "prep:inst"; ct [c;d] "poziom" "subst:sg:inst:m3"; ct [e;f] "morze" "subst:sg:gen:n2"] | _ -> failwith "abr_patterns");
838   - [O "np"; S "."], (function [a;b] -> std a b [1,"na","prep:acc";1,"przykład","subst:sg:acc:m3"] | _ -> failwith "abr_patterns");
839   - [O "nt"; S "."], (function [a;b] -> std a b [1,"na","prep:acc";1,"temat","subst:sg:acc:m3"] | _ -> failwith "abr_patterns");
840   - [O "NTG"], (function [a] -> st a [1,"nie","qub";1,"ta","adj:sg:nom:f:pos";1,"grupa","subst:sg:nom:f"] | _ -> failwith "abr_patterns");
841   - [O "o"; S "."; O "o"; S "."], (function [a;b;c;d] -> [ct [a;b] "ograniczony" "adj:sg:$C:f:pos"; ct [c;d] "odpowiedzialność" "subst:sg:$C:f"] | _ -> failwith "abr_patterns");
842   - [O "p"; S "."; O "n"; S "."; O "e"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "przed" "prep:inst"; ct [c;d] "nasz" "adj:sg:inst:f:pos"; ct [e;f] "era" "subst:sg:inst:f"] | _ -> failwith "abr_patterns");
843   - [O "p"; S "."; O "o"; S "."], (function [a;b;c;d] -> [ct [a;b] "pełniący" "pact:_:_:m1.m2.m3:imperf:aff"; ct [c;d] "obowiązek" "subst:pl:acc:m3"] | _ -> failwith "abr_patterns");
844   - [O "p"; S "."; O "p"; S "."; O "m"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "pod" "prep:inst"; ct [c;d] "poziom" "subst:sg:inst:m3"; ct [e;f] "morze" "subst:sg:gen:n2"] | _ -> failwith "abr_patterns");
845   - [O "p"; S "."; O "t"; S "."], (function [a;b;c;d] -> [ct [a;b] "pod" "prep:inst:nwokc"; ct [c;d] "tytuł" "subst:sg:inst:m3"] | _ -> failwith "abr_patterns");
846   - [O "pn"; S "."], (function [a;b] -> std a b [1,"pod","prep:inst";1,"nazwa","subst:sg:inst:f"] | _ -> failwith "abr_patterns");
847   - [O "pne"; S "."], (function [a;b] -> std a b [1,"przed","prep:inst";1,"nasz","adj:sg:inst:f:pos";1,"era","subst:sg:inst:f"] | _ -> failwith "abr_patterns");
848   - [O "pt"; S "."], (function [a;b] -> std a b [1,"pod","prep:inst";1,"tytuł","subst:sg:inst:m3"] | _ -> failwith "abr_patterns");
849   - [O "PW"], (function [a] -> st a [1,"prywatny","adj:_:$C:f:pos";1,"wiadomość","subst:_:$C:f"] | _ -> failwith "abr_patterns");
850   - [O "pw"; S "."], (function [a;b] -> std a b [1,"pod","prep:inst";1,"wezwanie","subst:sg:inst:n2"] | _ -> failwith "abr_patterns");
851   -(* [O "S"; S "."; O "A"; S "."], (function [a;b;c;d] -> [ct [a;b] "spółka" "subst:sg:$C:f"; ct [c;d] "akcyjny" "adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns");
852   - [O "s"; S "."; O "c"; S "."], (function [a;b;c;d] -> [ct [a;b] "spółka" "subst:sg:$C:f"; ct [c;d] "cywilny" "adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns");*)
853   -(* [O "SA"], (function [a] -> st a [1,"spółka","subst:sg:$C:f";1,"akcyjny","adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns"); *)
854   - [O "ś"; S "."; O "p"; S "."], (function [a;b;c;d] -> [ct [a;b] "święty" "adj:sg:gen:f:pos"; ct [c;d] "pamięć" "subst:sg:gen:f"] | _ -> failwith "abr_patterns");
855   - [O "śp"; S "."], (function [a;b] -> std a b [1,"święty","adj:sg:gen:f:pos";1,"pamięć","subst:sg:gen:f"] | _ -> failwith "abr_patterns");
856   - [O "tgz"; S "."], (function [a;b] -> std a b [2,"tak","adv";1,"zwać","ppas:_:_:_:_:aff"] | _ -> failwith "abr_patterns");
857   - [O "tj"; S "."], (function [a;b] -> std a b [1,"to","subst:sg:nom:n2";1,"być","fin:sg:ter:imperf"] | _ -> failwith "abr_patterns");
858   - [O "tzn"; S "."], (function [a;b] -> std a b [1,"to","subst:sg:nom:n2";2,"znaczyć","fin:sg:ter:imperf"] | _ -> failwith "abr_patterns");
859   - [O "tzw"; S "."], (function [a;b] -> std a b [1,"tak","adv:pos";2,"zwać","ppas:_:_:_:imperf:aff"] | _ -> failwith "abr_patterns");
860   - [O "ub"; S "."; O "r"; S "."], (function [a;b;c;d] -> [ct [a;b] "ubiegły" "adj:sg:$C:m3:pos"; ct [c;d] "rok" "subst:sg:$C:m3"] | _ -> failwith "abr_patterns");
861   - [O "w"; S "."; O "w"; S "."], (function [a;b;c;d] -> [ct [a;b] "wysoko" "adv:com"; ct [c;d] "wymienić" "ppas:_:_:_:perf:aff"] | _ -> failwith "abr_patterns");
862   - [O "w"; O "/"; O "m"], (function [a;b;c] -> [ct [a;b] "w" "prep:loc"; ct [c] "miejsce" "subst:_:loc:m3"] | _ -> failwith "abr_patterns");
863   - [O "w"; O "/"; O "w"], (function [a;b;c] -> [ct [a;b] "wysoko" "adv:com"; ct [c] "wymienić" "ppas:_:_:_:perf:aff"] | _ -> failwith "abr_patterns");
864   - [O "ws"; S "."], (function [a;b] -> std a b [1,"w","prep:loc:nwok";1,"sprawa","subst:sg:loc:f"] | _ -> failwith "abr_patterns");
865   - [O "ww"; S "."], (function [a;b] -> std a b [1,"wysoko","adv:com";1,"wymieniony","ppas:_:_:_:perf:aff"] | _ -> failwith "abr_patterns");
866   - ]
tokenizer/eniam-tokenizer-1.0/ENIAMpatterns.ml deleted
1   -(*
2   - * ENIAMtokenizer, a tokenizer for Polish
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
5   - *
6   - * This library is free software: you can redistribute it and/or modify
7   - * it under the terms of the GNU Lesser General Public License as published by
8   - * the Free Software Foundation, either version 3 of the License, or
9   - * (at your option) any later version.
10   - *
11   - * This library is distributed in the hope that it will be useful,
12   - * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   - * GNU General Public License for more details.
15   - *
16   - * You should have received a copy of the GNU Lesser General Public License
17   - * along with this program. If not, see <http://www.gnu.org/licenses/>.
18   - *)
19   -
20   -open Xstd
21   -open Printf
22   -open ENIAMtokenizerTypes
23   -
24   -let rec flatten_tokens rev_variants = function
25   - | [] -> rev_variants
26   - | Token t :: l -> flatten_tokens (Xlist.map rev_variants (fun rev_variant -> Token t :: rev_variant)) l
27   - | Seq seq :: l -> flatten_tokens rev_variants (seq @ l)
28   - | Variant variants :: l -> flatten_tokens (List.flatten (Xlist.map variants (fun variant -> flatten_tokens rev_variants [variant]))) l
29   -
30   -let rec normalize_tokens rev = function
31   - [] -> List.rev rev
32   - | Token t :: l -> normalize_tokens (Token t :: rev) l
33   - | Seq seq :: l -> normalize_tokens rev (seq @ l)
34   - | Variant[t] :: l -> normalize_tokens rev (t :: l)
35   - | Variant variants :: l ->
36   - let variants = flatten_tokens [[]] [Variant variants] in
37   - let variants = Xlist.map variants (fun rev_seq ->
38   - match List.rev rev_seq with
39   - [] -> failwith "normalize_tokens"
40   - | [t] -> t
41   - | seq -> Seq seq) in
42   - let t = match variants with
43   - [] -> failwith "normalize_tokens"
44   - | [t] -> t
45   - | variants -> Variant variants in
46   - normalize_tokens (t :: rev) l
47   -
48   -let concat_orths l =
49   - String.concat "" (Xlist.map l (fun t -> t.orth))
50   -
51   -let concat_orths2 l =
52   - String.concat "" (Xlist.map l (fun t -> ENIAMtokens.get_orth t.token))
53   -
54   -let concat_intnum = function
55   - [{token=Dig(v4,_)};_;{token=Dig(v3,_)};_;{token=Dig(v2,_)};_;{token=Dig(v1,_)}] -> v4^v3^v2^v1
56   - | [{token=Dig(v3,_)};_;{token=Dig(v2,_)};_;{token=Dig(v1,_)}] -> v3^v2^v1
57   - | [{token=Dig(v2,_)};_;{token=Dig(v1,_)}] -> v2^v1
58   - | [{token=Dig(v1,_)}] -> v1
59   - | _ -> failwith "concat_intnum"
60   -
61   -let dig_value t =
62   - match t.token with
63   - Dig(v,_) -> v
64   - | _ -> failwith "dig_value"
65   -
66   -(* FIXME: problem z ordnum - wyklucza year co stanowi problem na końcu zdania *)
67   -let digit_patterns1 = [ (* FIXME: problem z nadmiarowymi interpretacjami - trzeba uwzględnić w preprocesingu brak spacji - albo w dezambiguacji *)
68   - [D "dig"; S "."; D "dig"; S "."; D "dig"; S "."; D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"]));
69   - [D "dig"; S "."; D "dig"; S "."; D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"]));
70   - [D "dig"; S "."; D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"]));
71   - [D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"]));
72   -(* [D "dig"], "obj-id"; *)
73   - [D "pref3dig"; S "."; D "3dig"; S "."; D "3dig"; S "."; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum"));
74   - [D "pref3dig"; S "."; D "3dig"; S "."; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum"));
75   - [D "pref3dig"; S "."; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum"));
76   - [D "pref3dig"; S " "; D "3dig"; S " "; D "3dig"; S " "; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum"));
77   - [D "pref3dig"; S " "; D "3dig"; S " "; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum"));
78   - [D "pref3dig"; S " "; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum"));
79   - [D "intnum"; S "."], (function [token;_] -> Dig(concat_intnum [token],"ordnum") | _ -> failwith "digit_patterns1"); (* FIXME: to nie powinno wykluczać innych interpretacji *)
80   - [D "day"; S "."; D "month"; S "."; D "year"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns2");
81   - [D "day"; S "."; RD "month"; S "."; D "year"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns3");
82   - [D "day"; S " "; RD "month"; S " "; D "year"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns3");
83   - [D "day"; S "."; D "month"; S "."; D "2dig"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns2");
84   - [D "day"; S "."; RD "month"; S "."; D "2dig"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns3");
85   - [D "day"; S "."; D "month"; S "."], (function [day;_;month;_] -> Compound("day-month",[day.token;month.token]) | _ -> failwith "digit_patterns4");
86   - [D "hour"; S "."; D "minute"], (function [hour;_;minute] -> Compound("hour-minute",[hour.token;minute.token]) | _ -> failwith "digit_patterns5");
87   - [D "hour"; S ":"; D "minute"], (function [hour;_;minute] -> Compound("hour-minute",[hour.token;minute.token]) | _ -> failwith "digit_patterns6");
88   - [D "intnum"; S ":"; D "intnum"], (function [x;_;y] -> Compound("match-result",[x.token;y.token]) | _ -> failwith "digit_patterns7");
89   - ] (* bez 1 i *2 *3 *4 mamy rec *) (* w morfeuszu zawsze num:pl?*)
90   -
91   -let digit_patterns2 = [
92   - [D "intnum"; S ","; D "dig"], (function [x;_;y] -> Dig(dig_value x ^ "," ^ dig_value y,"realnum") | _ -> failwith "digit_patterns8");
93   -(* [S "-"; D "intnum"; S ","; D "dig"], (function [_;x;_;y] -> Dig("-" ^ dig_value x ^ "," ^ dig_value y,"realnum") | _ -> failwith "digit_patterns9");
94   - [S "-"; D "intnum"], (function [_;x] -> Dig("-" ^ dig_value x,"realnum") | _ -> failwith "digit_patterns10");*)
95   - [S "’"; D "2dig"], (function [_;x] -> Dig("’" ^ dig_value x,"year") | _ -> failwith "digit_patterns12");
96   -(* [D "intnum"], "realnum"; *)
97   - ]
98   -
99   -let compose_latek_lemma t interp =
100   - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-latek", interp)
101   -
102   -let compose_latka_lemma t interp =
103   - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-latka", interp)
104   -
105   -let compose_latek_int_lemma t t2 interp =
106   - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-" ^ ENIAMtokens.get_orth t2.token ^ "-latek", interp)
107   -
108   -let compose_latka_int_lemma t t2 interp =
109   - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-" ^ ENIAMtokens.get_orth t2.token ^ "-latka", interp)
110   -
111   -let digit_patterns3 = [
112   - [S "-"; D "intnum"], (function [_;x] -> Dig("-" ^ dig_value x,"intnum") | _ -> failwith "digit_patterns10");
113   - [S "-"; D "realnum"], (function [_;x] -> Dig("-" ^ dig_value x,"realnum") | _ -> failwith "digit_patterns10");
114   - [D "intnum"; S "-"; D "intnum"], (function [x;_;y] -> Compound("intnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns11");
115   - [D "realnum"; S "-"; D "realnum"], (function [x;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *)
116   - [D "intnum"; S "-"; D "realnum"], (function [x;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *)
117   - [D "realnum"; S "-"; D "intnum"], (function [x;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *)
118   - [C "date"; S "-"; C "date"], (function [x;_;y] -> Compound("date-interval",[x.token;y.token]) | _ -> failwith "digit_patterns13");
119   - [C "day-month"; S "-"; C "day-month"], (function [x;_;y] -> Compound("day-month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns14");
120   - [D "day"; S "-"; D "day"], (function [x;_;y] -> Compound("day-interval",[x.token;y.token]) | _ -> failwith "digit_patterns15");
121   - [D "month"; S "-"; D "month"], (function [x;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16");
122   - [RD "month"; S "-"; RD "month"], (function [x;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns17");
123   - [D "year"; S "-"; D "year"], (function [x;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16");
124   - [D "year"; S "-"; D "2dig"], (function [x;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16");
125   - [C "hour-minute"; S "-"; C "hour-minute"], (function [x;_;y] -> Compound("hour-minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns18");
126   - [D "hour"; S "-"; D "hour"], (function [x;_;y] -> Compound("hour-interval",[x.token;y.token]) | _ -> failwith "digit_patterns19");
127   - [D "minute"; S "-"; D "minute"], (function [x;_;y] -> Compound("minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns20");
128   - [RD "roman"; S "-"; RD "roman"], (function [x;_;y] -> Compound("roman-interval",[x.token;y.token]) | _ -> failwith "digit_patterns21");
129   - [D "intnum"; S " "; S "-"; S " "; D "intnum"], (function [x;_;_;_;y] -> Compound("intnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns11");
130   - [D "realnum"; S " "; S "-"; S " "; D "realnum"], (function [x;_;_;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *)
131   - [D "intnum"; S " "; S "-"; S " "; D "realnum"], (function [x;_;_;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *)
132   - [D "realnum"; S " "; S "-"; S " "; D "intnum"], (function [x;_;_;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *)
133   - [C "date"; S " "; S "-"; S " "; C "date"], (function [x;_;_;_;y] -> Compound("date-interval",[x.token;y.token]) | _ -> failwith "digit_patterns13");
134   - [C "day-month"; S " "; S "-"; S " "; C "day-month"], (function [x;_;_;_;y] -> Compound("day-month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns14");
135   - [D "day"; S " "; S "-"; S " "; D "day"], (function [x;_;_;_;y] -> Compound("day-interval",[x.token;y.token]) | _ -> failwith "digit_patterns15");
136   - [D "month"; S " "; S "-"; S " "; D "month"], (function [x;_;_;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16");
137   - [RD "month"; S " "; S "-"; S " "; RD "month"], (function [x;_;_;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns17");
138   - [D "year"; S " "; S "-"; S " "; D "year"], (function [x;_;_;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16");
139   - [D "year"; S " "; S "-"; S " "; D "2dig"], (function [x;_;_;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16");
140   - [C "hour-minute"; S " "; S "-"; S " "; C "hour-minute"], (function [x;_;_;_;y] -> Compound("hour-minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns18");
141   - [D "hour"; S " "; S "-"; S " "; D "hour"], (function [x;_;_;_;y] -> Compound("hour-interval",[x.token;y.token]) | _ -> failwith "digit_patterns19");
142   - [D "minute"; S " "; S "-"; S " "; D "minute"], (function [x;_;_;_;y] -> Compound("minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns20");
143   - [RD "roman"; S " "; S "-"; S " "; RD "roman"], (function [x;_;_;_;y] -> Compound("roman-interval",[x.token;y.token]) | _ -> failwith "digit_patterns21");
144   - [D "intnum"; S "-"; O "latek"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:nom:m1" | _ -> failwith "digit_patterns22");
145   - [D "intnum"; S "-"; O "latka"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:gen.acc:m1" | _ -> failwith "digit_patterns22");
146   - [D "intnum"; S "-"; O "latkowi"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:dat:m1" | _ -> failwith "digit_patterns22");
147   - [D "intnum"; S "-"; O "latkiem"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:inst:m1" | _ -> failwith "digit_patterns22");
148   - [D "intnum"; S "-"; O "latku"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:loc.voc:m1" | _ -> failwith "digit_patterns22");
149   - [D "intnum"; S "-"; O "latkowie"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:nom.voc:m1" | _ -> failwith "digit_patterns22");
150   - [D "intnum"; S "-"; O "latków"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:gen.acc:m1" | _ -> failwith "digit_patterns22");
151   - [D "intnum"; S "-"; O "latkom"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:dat:m1" | _ -> failwith "digit_patterns22");
152   - [D "intnum"; S "-"; O "latkami"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:inst:m1" | _ -> failwith "digit_patterns22");
153   - [D "intnum"; S "-"; O "latkach"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:loc:m1" | _ -> failwith "digit_patterns22");
154   - [D "intnum"; S "-"; O "latka"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:nom:f" | _ -> failwith "digit_patterns22");
155   - [D "intnum"; S "-"; O "latki"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:gen:f" | _ -> failwith "digit_patterns22");
156   - [D "intnum"; S "-"; O "latce"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:dat.loc:f" | _ -> failwith "digit_patterns22");
157   - [D "intnum"; S "-"; O "latkę"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:acc:f" | _ -> failwith "digit_patterns22");
158   - [D "intnum"; S "-"; O "latką"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:inst:f" | _ -> failwith "digit_patterns22");
159   - [D "intnum"; S "-"; O "latko"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:voc:f" | _ -> failwith "digit_patterns22");
160   - [D "intnum"; S "-"; O "latki"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:nom.acc.voc:f" | _ -> failwith "digit_patterns22");
161   - [D "intnum"; S "-"; O "latek"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:gen:f" | _ -> failwith "digit_patterns22");
162   - [D "intnum"; S "-"; O "latkom"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:dat:f" | _ -> failwith "digit_patterns22");
163   - [D "intnum"; S "-"; O "latkami"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:inst:f" | _ -> failwith "digit_patterns22");
164   - [D "intnum"; S "-"; O "latkach"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:loc:f" | _ -> failwith "digit_patterns22");
165   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latek"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:nom:m1" | _ -> failwith "digit_patterns22");
166   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latka"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:gen.acc:m1" | _ -> failwith "digit_patterns22");
167   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkowi"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:dat:m1" | _ -> failwith "digit_patterns22");
168   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkiem"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:inst:m1" | _ -> failwith "digit_patterns22");
169   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latku"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:loc.voc:m1" | _ -> failwith "digit_patterns22");
170   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkowie"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:nom.voc:m1" | _ -> failwith "digit_patterns22");
171   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latków"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:gen.acc:m1" | _ -> failwith "digit_patterns22");
172   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkom"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:dat:m1" | _ -> failwith "digit_patterns22");
173   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkami"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:inst:m1" | _ -> failwith "digit_patterns22");
174   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkach"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:loc:m1" | _ -> failwith "digit_patterns22");
175   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latka"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:nom:f" | _ -> failwith "digit_patterns22");
176   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latki"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:gen:f" | _ -> failwith "digit_patterns22");
177   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latce"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:dat.loc:f" | _ -> failwith "digit_patterns22");
178   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkę"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:acc:f" | _ -> failwith "digit_patterns22");
179   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latką"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:inst:f" | _ -> failwith "digit_patterns22");
180   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latko"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:voc:f" | _ -> failwith "digit_patterns22");
181   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latki"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:nom.acc.voc:f" | _ -> failwith "digit_patterns22");
182   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latek"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:gen:f" | _ -> failwith "digit_patterns22");
183   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkom"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:dat:f" | _ -> failwith "digit_patterns22");
184   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkami"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:inst:f" | _ -> failwith "digit_patterns22");
185   - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkach"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:loc:f" | _ -> failwith "digit_patterns22");
186   - ]
187   -
188   -let url_patterns1 = [
189   - [L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url"));
190   - [L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url"));
191   - [L; S "."; L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url"));
192   - [L; S "."; L; S "."; L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url"));
193   - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url"));
194   - [L; S "."; L; S "-"; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url"));
195   - [L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url"));
196   - [L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url"));
197   - [L; S "."; L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url"));
198   - [L; S "."; L; S "."; L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url"));
199   - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url"));
200   - [L; S "."; L; S "-"; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url"));
201   - [L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url"));
202   - [L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url"));
203   - [L; S "."; L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url"));
204   - [L; S "."; L; S "."; L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url"));
205   - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url"));
206   - [L; S "."; L; S "-"; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url"));
207   - [L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url"));
208   - [L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url"));
209   - [L; S "."; L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url"));
210   - [L; S "."; L; S "."; L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url"));
211   - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url"));
212   - [L; S "."; L; S "-"; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url"));
213   - [L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url"));
214   - [L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url"));
215   - [L; S "."; L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url"));
216   - [L; S "."; L; S "."; L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url"));
217   - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url"));
218   - [L; S "."; L; S "-"; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url"));
219   - [L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url"));
220   - [L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url"));
221   - [L; S "."; L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url"));
222   - [L; S "."; L; S "."; L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url"));
223   - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url"));
224   - [L; S "."; L; S "-"; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url"));
225   - [L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url"));
226   - [L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url"));
227   - [L; S "."; L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url"));
228   - [L; S "."; L; S "."; L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url"));
229   - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url"));
230   - [L; S "."; L; S "-"; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url"));
231   - [L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url"));
232   - [L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url"));
233   - [L; S "."; L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url"));
234   - [L; S "."; L; S "."; L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url"));
235   - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url"));
236   - [L; S "."; L; S "-"; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url"));
237   - ]
238   -
239   -let url_patterns2 = [
240   - [L; S "@"; D "url"], (function l -> Dig(concat_orths2 l,"email"));
241   - [O "http"; S ":"; S "/"; S "/"; D "url"], (function l -> Dig(concat_orths2 l,"email"));
242   - ]
243   -
244   -let url_patterns3 = [
245   - [D "url"; S "/"], (function l -> Dig(concat_orths2 l,"url"));
246   - [D "url"; S "/"; L], (function l -> Dig(concat_orths2 l,"url"));
247   - [D "url"; S "/"; L; S "."; L], (function l -> Dig(concat_orths2 l,"url"));
248   - ]
249   -
250   -type matching = {
251   - prefix: tokens list;
252   - matched: token_record list;
253   - suffix: tokens list;
254   - pattern: pat list;
255   - command: token_record list -> token;
256   - command_abr: token_record list -> tokens list;
257   - }
258   -
259   -let execute_command matching =
260   - let l = List.rev matching.matched in
261   - let len = Xlist.fold l 0 (fun len t -> t.len + len) in
262   - Seq((List.rev matching.prefix) @ [Token{empty_token with
263   - orth=concat_orths l;
264   - beg=(List.hd l).beg;
265   - len=len;
266   - next=(List.hd l).beg+len;
267   - token=matching.command l;
268   - (*weight=0.;*) (* FIXME: dodać wagi do konkretnych reguł i uwzględnić wagi maczowanych tokenów *)
269   - attrs=ENIAMtokens.merge_attrs l}] @ matching.suffix)
270   -
271   -let execute_abr_command matching =
272   - let l = List.rev matching.matched in
273   - Seq((List.rev matching.prefix) @ (matching.command_abr l) @ matching.suffix)
274   -
275   -let match_token = function
276   - D cat, Dig(_,cat2) -> cat = cat2
277   - | C s, Compound(s2,_) -> s = s2
278   - | S s, Symbol s2 -> s = s2
279   - | RD cat, RomanDig(_,cat2) -> cat = cat2
280   - | O pat, Dig(s,"dig") -> pat = s
281   - | O pat, Symbol s -> pat = s
282   - | O pat, SmallLetter orth -> pat = orth
283   - | O pat, CapLetter(orth,lc) -> pat = orth
284   - | O pat, AllSmall orth -> pat = orth
285   - | O pat, AllCap(orth,lc,lc2) -> pat = orth
286   - | O pat, FirstCap(orth,lc,_,_) -> pat = orth
287   - | O pat, SomeCap orth -> pat = orth
288   - | L, SmallLetter _ -> true
289   - | L, CapLetter _ -> true
290   - | L, AllSmall _ -> true
291   - | L, AllCap _ -> true
292   - | L, FirstCap _ -> true
293   - | L, SomeCap _ -> true
294   - | CL, CapLetter _ -> true
295   - | CL, AllCap _ -> true
296   - | CL, SomeCap _ -> true
297   - | _ -> false
298   -
299   -let rec find_first_token matching pat = function
300   - Token t -> if match_token (pat,t.token) then [{matching with matched = t :: matching.matched}] else []
301   - | Seq l -> Xlist.map (find_first_token matching pat (List.hd (List.rev l))) (fun matching -> {matching with prefix = matching.prefix @ (List.tl (List.rev l))})
302   - | Variant l -> List.flatten (Xlist.map l (find_first_token matching pat))
303   -
304   -let rec find_middle_token matching pat = function
305   - Token t -> if match_token (pat,t.token) then [{matching with matched = t :: matching.matched}] else []
306   - | Seq _ -> []
307   - | Variant l -> List.flatten (Xlist.map l (find_middle_token matching pat))
308   -
309   -let rec find_last_token matching pat = function
310   - Token t -> if match_token (pat,t.token) then [{matching with matched = t :: matching.matched}] else []
311   - | Seq l -> Xlist.map (find_last_token matching pat (List.hd l)) (fun matching -> {matching with suffix = matching.suffix @ (List.tl l)})
312   - | Variant l -> List.flatten (Xlist.map l (find_last_token matching pat))
313   -
314   -let rec find_pattern_tail matchings = function
315   - [] -> raise Not_found
316   - | token :: l ->
317   - let found,finished = Xlist.fold matchings ([],[]) (fun (found,finished) matching ->
318   - match matching.pattern with
319   - [pat] -> found, (find_last_token {matching with pattern=[]} pat token) @ finished
320   - | pat :: pattern -> (find_middle_token {matching with pattern=pattern} pat token) @ found, finished
321   - | _ -> failwith "find_pattern: ni") in
322   - (try
323   - if found = [] then raise Not_found else
324   - find_pattern_tail found l
325   - with Not_found ->
326   - let finished = List.flatten (Xlist.map finished (fun matching -> try [execute_command matching] with Not_found -> [])) in
327   - if finished = [] then raise Not_found else Variant finished,l)
328   -
329   -(* wzorce nie mogą mieć długości 1 *)
330   -let rec find_pattern matchings rev = function
331   - token :: l ->
332   - let found = Xlist.fold matchings [] (fun found matching ->
333   - match matching.pattern with
334   - pat :: pattern -> (find_first_token {matching with pattern=pattern} pat token) @ found
335   - | [] -> failwith "find_pattern: empty pattern") in
336   - if found = [] then find_pattern matchings (token :: rev) l else
337   - (try
338   - let token,l = find_pattern_tail found l in
339   - find_pattern matchings (token :: rev) l
340   - with Not_found -> find_pattern matchings (token :: rev) l)
341   - | [] -> List.rev rev
342   -
343   -let find_patterns patterns tokens =
344   - find_pattern (Xlist.map patterns (fun (pattern,command) ->
345   - {prefix=[]; matched=[]; suffix=[]; pattern=pattern; command=command; command_abr=(fun _ -> [])})) [] tokens
346   -
347   -let rec find_abr_pattern_tail matchings = function
348   - [] -> raise Not_found
349   - | token :: l ->
350   - let found,finished = Xlist.fold matchings ([],[]) (fun (found,finished) matching ->
351   - match matching.pattern with
352   - [pat] -> found, (find_last_token {matching with pattern=[]} pat token) @ finished
353   - | pat :: pattern -> (find_middle_token {matching with pattern=pattern} pat token) @ found, finished
354   - | [] -> found, matching :: finished) in
355   - (try
356   - if found = [] then raise Not_found else
357   - find_abr_pattern_tail found l
358   - with Not_found ->
359   - let finished = List.flatten (Xlist.map finished (fun matching -> try [execute_abr_command matching] with Not_found -> [])) in
360   - if finished = [] then raise Not_found else Variant finished,l)
361   -
362   -let rec find_abr_pattern matchings rev = function
363   - token :: l ->
364   - let found = Xlist.fold matchings [] (fun found matching ->
365   - match matching.pattern with
366   - pat :: pattern -> (find_first_token {matching with pattern=pattern} pat token) @ found
367   - | [] -> failwith "find_abr_pattern: empty pattern") in
368   - if found = [] then find_abr_pattern matchings (token :: rev) l else
369   - (try
370   - let token,l = find_abr_pattern_tail found l in
371   - find_abr_pattern matchings (token :: rev) l
372   - with Not_found -> find_abr_pattern matchings (token :: rev) l)
373   - | [] -> List.rev rev
374   -
375   -let find_abr_patterns patterns tokens =
376   - find_abr_pattern (Xlist.map patterns (fun (pattern,command) ->
377   - {prefix=[]; matched=[]; suffix=[]; pattern=pattern; command=(fun _ -> Symbol ""); command_abr=command})) [] tokens
378   -
379   -let find_replacement_patterns tokens =
380   - let tokens = find_patterns digit_patterns1 tokens in
381   - let tokens = normalize_tokens [] tokens in
382   - let tokens = find_patterns digit_patterns2 tokens in
383   - let tokens = normalize_tokens [] tokens in
384   - let tokens = find_patterns digit_patterns3 tokens in
385   - let tokens = normalize_tokens [] tokens in
386   - let tokens = find_patterns ENIAMacronyms.acronym_patterns tokens in
387   - let tokens = normalize_tokens [] tokens in
388   - let tokens = find_patterns ENIAMacronyms.mte_patterns tokens in
389   - let tokens = normalize_tokens [] tokens in
390   -(* Xlist.iter tokens (fun t -> print_endline (ENIAMtokens.string_of_tokens 0 t)); *)
391   - let tokens = find_patterns ENIAMacronyms.name_patterns tokens in
392   -(* Xlist.iter tokens (fun t -> print_endline (ENIAMtokens.string_of_tokens 0 t)); *)
393   - let tokens = normalize_tokens [] tokens in
394   - let tokens = find_patterns url_patterns1 tokens in
395   - let tokens = normalize_tokens [] tokens in
396   - let tokens = find_patterns url_patterns2 tokens in
397   - let tokens = normalize_tokens [] tokens in
398   - let tokens = find_patterns url_patterns3 tokens in
399   - let tokens = normalize_tokens [] tokens in
400   -(* Xlist.iter tokens (fun t -> print_endline (ENIAMtokens.string_of_tokens 0 t)); *)
401   - tokens
402   -
403   -let rec set_next_id n = function
404   - Token t -> Token{t with next=n}
405   - | Seq l ->
406   - (match List.rev l with
407   - t :: l -> Seq(List.rev ((set_next_id n t) :: l))
408   - | [] -> failwith "set_next_id n")
409   - | Variant l -> Variant(Xlist.map l (set_next_id n))
410   -
411   -let rec remove_spaces rev = function
412   - [] -> List.rev rev
413   - | x :: Token{token=Symbol " "; next=n} :: l -> remove_spaces ((set_next_id n x) :: rev) l
414   - | Token{token=Symbol " "} :: l -> remove_spaces rev l
415   - | x :: l -> remove_spaces (x :: rev) l
tokenizer/eniam-tokenizer-1.0/ENIAMtokenizer.ml deleted
1   -(*
2   - * ENIAMtokenizer, a tokenizer for Polish
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
5   - *
6   - * This library is free software: you can redistribute it and/or modify
7   - * it under the terms of the GNU Lesser General Public License as published by
8   - * the Free Software Foundation, either version 3 of the License, or
9   - * (at your option) any later version.
10   - *
11   - * This library is distributed in the hope that it will be useful,
12   - * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   - * GNU General Public License for more details.
15   - *
16   - * You should have received a copy of the GNU Lesser General Public License
17   - * along with this program. If not, see <http://www.gnu.org/licenses/>.
18   - *)
19   -
20   -open Xstd
21   -open ENIAMtokenizerTypes
22   -
23   -let string_of =
24   - ENIAMtokens.string_of_tokens
25   -
26   -let parse query =
27   - let l = Xunicode.classified_chars_of_utf8_string query in
28   - let l = ENIAMtokens.tokenize l in
29   - let l = ENIAMpatterns.normalize_tokens [] l in
30   - let l = ENIAMpatterns.find_replacement_patterns l in
31   - let l = ENIAMpatterns.remove_spaces [] l in
32   - let l = ENIAMpatterns.find_abr_patterns ENIAMacronyms.abr_patterns l in
33   - let l = ENIAMpatterns.normalize_tokens [] l in
34   - l
tokenizer/eniam-tokenizer-1.0/ENIAMtokenizerTypes.ml deleted
1   -(*
2   - * ENIAMtokenizer, a tokenizer for Polish
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
5   - *
6   - * This library is free software: you can redistribute it and/or modify
7   - * it under the terms of the GNU Lesser General Public License as published by
8   - * the Free Software Foundation, either version 3 of the License, or
9   - * (at your option) any later version.
10   - *
11   - * This library is distributed in the hope that it will be useful,
12   - * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   - * GNU General Public License for more details.
15   - *
16   - * You should have received a copy of the GNU Lesser General Public License
17   - * along with this program. If not, see <http://www.gnu.org/licenses/>.
18   - *)
19   -
20   -open Xstd
21   -
22   -(* Długość pojedynczego znaku w tekście *)
23   -let factor = 100
24   -
25   -type token =
26   - SmallLetter of string (* orth *)
27   - | CapLetter of string * string (* orth * lowercase *)
28   - | AllSmall of string (* orth *)
29   - | AllCap of string * string * string (* orth * lowercase * all lowercase *)
30   - | FirstCap of string * string * string * string (* orth * all lowercase * first letter uppercase * first letter lowercase *)
31   - | SomeCap of string (* orth *)
32   - | RomanDig of string * string (* value * cat *)
33   - | Interp of string (* orth *)
34   - | Symbol of string (* orth *)
35   - | Dig of string * string (* value * cat *)
36   - | Other of string (* orth *)
37   - | Lemma of string * string * string list list list (* lemma * cat * interp *)
38   - | Proper of string * string * string list list list * string list (* lemma * cat * interp * senses *)
39   -(* | Sense of string * string * string list list list * (string * string * string list) list (* lemma * cat * interp * senses *) *)
40   - | Compound of string * token list (* sense * components *)
41   - (* | Tokens of string * int list (*cat * token id list *) *)
42   -
43   -(* Tekst reprezentuję jako zbiór obiektów typu token_record zawierających
44   - informacje o poszczególnych tokenach *)
45   -and token_record = {
46   - orth: string; (* sekwencja znaków pierwotnego tekstu składająca się na token *)
47   - corr_orth: string; (* sekwencja znaków pierwotnego tekstu składająca się na token z poprawionymi błędami *)
48   - beg: int; (* pozycja początkowa tokenu względem początku akapitu *)
49   - len: int; (* długość tokenu *)
50   - next: int; (* pozycja początkowa następnego tokenu względem początku akapitu *)
51   - token: token; (* treść tokenu *)
52   - attrs: string list; (* dodatkowe atrybuty *)
53   - }
54   -
55   -(* Tokeny umieszczone są w strukturze danych umożliwiającej efektywne wyszukiwanie ich sekwencji,
56   - struktura danych sama z siebie nie wnosi informacji *)
57   -type tokens =
58   - | Token of token_record
59   - | Variant of tokens list
60   - | Seq of tokens list
61   -
62   -type pat = L | CL | D of string | C of string | S of string | RD of string | O of string
63   -
64   -let empty_token = {
65   - orth="";corr_orth="";beg=0;len=0;next=0; token=Symbol ""; attrs=[]}
66   -
67   -let config =
68   - try File.load_attr_val_pairs "config-tokenizer"
69   - with _ -> (print_endline "ENIAMtokenizer config file not found"; [])
70   -
71   -let mte_filename =
72   - try Xlist.assoc config "MTE_FILENAME"
73   - with Not_found -> (print_endline "ENIAMtokenizer MTE_FILENAME config variable undefined"; "")
tokenizer/eniam-tokenizer-1.0/ENIAMtokens.ml deleted
1   -(*
2   - * ENIAMtokenizer, a tokenizer for Polish
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
5   - *
6   - * This library is free software: you can redistribute it and/or modify
7   - * it under the terms of the GNU Lesser General Public License as published by
8   - * the Free Software Foundation, either version 3 of the License, or
9   - * (at your option) any later version.
10   - *
11   - * This library is distributed in the hope that it will be useful,
12   - * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   - * GNU General Public License for more details.
15   - *
16   - * You should have received a copy of the GNU Lesser General Public License
17   - * along with this program. If not, see <http://www.gnu.org/licenses/>.
18   - *)
19   -
20   -open Printf
21   -open ENIAMtokenizerTypes
22   -open Xstd
23   -open Xunicode
24   -
25   -let string_of_interps interps =
26   - String.concat "|" (Xlist.map interps (fun interp ->
27   - (String.concat ":" (Xlist.map interp (fun interp2 ->
28   - (String.concat "." interp2))))))
29   -
30   -let rec string_of_token = function
31   - SmallLetter orth -> sprintf "SmallLetter(%s)" orth
32   - | CapLetter(orth,lc) -> sprintf "CapLetter(%s,%s)" orth lc
33   - | AllSmall orth -> sprintf "AllSmall(%s)" orth
34   - | AllCap(orth,lc,lc2) -> sprintf "AllCap(%s,%s,%s)" orth lc lc2
35   - | FirstCap(orth,lc,cl,ll) -> sprintf "FirstCap(%s,%s,%s,%s)" orth lc cl ll
36   - | SomeCap orth -> sprintf "SomeCap(%s)" orth
37   - | RomanDig(v,t) -> sprintf "RomanDig(%s,%s)" v t
38   - | Interp orth -> sprintf "Interp(%s)" orth
39   - | Symbol orth -> sprintf "Symbol(%s)" orth
40   - | Dig(v,t) -> sprintf "Dig(%s,%s)" v t
41   - | Other orth -> sprintf "Other(%s)" orth
42   - | Lemma(lemma,cat,interps) -> sprintf "Lemma(%s,%s,%s)" lemma cat (string_of_interps interps)
43   - | Proper(lemma,cat,interps,senses) -> sprintf "Proper(%s,%s,%s,%s)" lemma cat (string_of_interps interps) (String.concat "|" senses)
44   -(* | Sense(lemma,cat,interps,senses) -> sprintf "Sense(%s,%s,%s,%s)" lemma cat (string_of_interps interps)
45   - (String.concat "|" (Xlist.map senses (fun (_,v,_) -> v)))*)
46   - | Compound(sense,l) -> sprintf "Compound(%s,[%s])" sense (String.concat ";" (Xlist.map l string_of_token))
47   - (* | Tokens(cat,l) -> sprintf "Tokens(%s,%s)" cat (String.concat ";" (Xlist.map l string_of_int)) *)
48   -
49   -let rec spaces i =
50   - if i = 0 then "" else " " ^ spaces (i-1)
51   -
52   -let rec string_of_tokens i = function
53   - Token t -> sprintf "%s{orth=%s;beg=%d;len=%d;next=%d;token=%s;attrs=[%s]}" (spaces i) t.orth t.beg t.len t.next (string_of_token t.token)
54   - (String.concat ";" t.attrs)
55   - | Variant l -> sprintf "%sVariant[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1))))
56   - | Seq l -> sprintf "%sSeq[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1))))
57   -
58   -let rec string_of_token_simple = function
59   - SmallLetter orth -> "SmallLetter"
60   - | CapLetter(orth,lc) -> "CapLetter"
61   - | AllSmall orth -> "AllSmall"
62   - | AllCap(orth,lc,lc2) -> "AllCap"
63   - | FirstCap(orth,lc,_,_) -> "FirstCap"
64   - | SomeCap orth -> "SomeCap"
65   - | RomanDig(v,t) -> "RomanDig"
66   - | Interp orth -> sprintf "Interp(%s)" orth
67   - | Symbol orth -> sprintf "Symbol(%s)" orth
68   - | Dig(v,t) -> "Dig"
69   - | Other orth -> sprintf "Other(%s)" orth
70   - | Lemma(lemma,cat,interp) -> "Lemma"
71   - | Proper(lemma,cat,interp,sense) -> "Proper"
72   -(* | Sense(lemma,cat,interp,sense) -> "Sense" *)
73   - | Compound(sense,l) -> sprintf "Compound"
74   - (* | Tokens _ -> sprintf "Tokens" *)
75   -
76   -let rec string_of_tokens_simple = function
77   - Token t -> string_of_token_simple t.token
78   - | Variant l -> sprintf "Variant[%s]" (String.concat ";" (Xlist.map l string_of_tokens_simple))
79   - | Seq l -> sprintf "Seq[%s]" (String.concat ";" (Xlist.map l string_of_tokens_simple))
80   -
81   -let get_orth = function
82   - SmallLetter orth -> orth
83   - | CapLetter(orth,lc) -> orth
84   - | AllSmall orth -> orth
85   - | AllCap(orth,lc,lc2) -> orth
86   - | FirstCap(orth,lc,_,_) -> orth
87   - | SomeCap orth -> orth
88   - | Symbol orth -> orth
89   - | Dig(v,_) -> v
90   - | Other orth -> orth
91   - | _ -> ""(*failwith "get_orth"*)
92   -
93   -
94   -let months = StringSet.of_list ["1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; "01"; "02"; "03"; "04"; "05"; "06"; "07"; "08"; "09"; "10"; "11"; "12"]
95   -let hours = StringSet.of_list ["0"; "1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; "00"; "01"; "02"; "03"; "04"; "05"; "06"; "07"; "08"; "09";
96   - "10"; "11"; "12"; "13"; "14"; "15"; "16"; "17"; "18"; "19"; "20"; "21"; "22"; "23"; "24"]
97   -let days = StringSet.of_list ["1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; "01"; "02"; "03"; "04"; "05"; "06"; "07"; "08"; "09";
98   - "10"; "11"; "12"; "13"; "14"; "15"; "16"; "17"; "18"; "19"; "20"; "21"; "22"; "23"; "24"; "25"; "26"; "27"; "28"; "29"; "30"; "31"]
99   -let romanmonths = StringSet.of_list ["I"; "II"; "III"; "IV"; "V"; "VI"; "VII"; "VIII"; "IX"; "X"; "XI"; "XII"]
100   -
101   -
102   -let s_beg i = Token{empty_token with beg=i;len=1;next=i+1; token=Interp "<sentence>"}
103   -let c_beg i = Token{empty_token with beg=i;len=1;next=i+1; token=Interp "<clause>"}
104   -
105   -let dig_token orth i digs token =
106   - Token{empty_token with orth=orth;beg=i;len=Xlist.size digs * factor;next=i+Xlist.size digs * factor; token=token; attrs=["maybe cs"]}
107   -
108   -let sc_dig_token orth i digs token =
109   - Seq[s_beg i;c_beg (i+1);Token{empty_token with orth=orth;beg=i+2;len=Xlist.size digs * factor - 2;next=i+Xlist.size digs * factor; token=token; attrs=["maybe cs"]}]
110   -
111   -let dig_tokens orth poss_s_beg i digs v cat =
112   - if poss_s_beg then
113   - [dig_token orth i digs (Dig(v,cat));
114   - sc_dig_token orth i digs (Dig(v,cat))]
115   - else
116   - [dig_token orth i digs (Dig(v,cat))]
117   -
118   -let merge_digits poss_s_beg i digs =
119   - let orth = String.concat "" digs in
120   - let t = dig_tokens orth poss_s_beg i digs in
121   - let v = try string_of_int (int_of_string orth) with _ -> failwith "merge_digits" in
122   - let variants =
123   - (t orth "dig") @
124   - [Token{empty_token with orth=orth;beg=i;len=Xlist.size digs * factor;next=i+Xlist.size digs * factor; token=Proper(orth,"obj-id",[[]],["obj-id"]); attrs=["maybe cs"]}] @
125   - (if digs = ["0"] || List.hd digs <> "0" then (t orth "intnum")(* @ (t orth "realnum")*) else []) @
126   - (if List.hd digs <> "0" then (t v "year") else []) @
127   - (if StringSet.mem months orth then (t v "month") else []) @
128   - (if StringSet.mem hours orth then (t v "hour") else []) @
129   - (if StringSet.mem days orth then (t v "day") else []) @
130   - (if Xlist.size digs = 2 && List.hd digs < "6" then (t v "minute") else []) @
131   - (if Xlist.size digs = 3 then (t orth "3dig") else []) @
132   - (if Xlist.size digs = 2 then (t orth "2dig") else []) @
133   - (if Xlist.size digs <= 3 && List.hd digs <> "0" then (t orth "pref3dig") else []) in
134   -(* let t = dig_token orth i digs in
135   - let sc_t = sc_dig_token orth i digs in
136   - let v = try int_of_string orth with _ -> failwith "merge_digits" in
137   - let variants =
138   - [t (Dig(v,"dig"));sc_t (Dig(v,"dig"))] @
139   - (if digs = ["0"] || List.hd digs <> "0" then [t (Dig(v,"intnum"));sc_t (Dig(v,"intnum"))] else []) @
140   - (if List.hd digs <> "0" then [t (Dig(v,"year"));sc_t (Dig(v,"year"))] else []) @
141   - (if StringSet.mem months orth then [t (Dig(v,"month"));sc_t (Dig(v,"month"))] else []) @
142   - (if StringSet.mem hours orth then [t (Dig(v,"hour"));sc_t (Dig(v,"hour"))] else []) @
143   - (if StringSet.mem days orth then [t (Dig(v,"day"));sc_t (Dig(v,"day"))] else []) @
144   - (if Xlist.size digs = 2 && List.hd digs < "6" then [t (Dig(v,"minute"));sc_t (Dig(v,"minute"))] else []) @
145   - (if Xlist.size digs = 3 then [t (Dig(v,"3dig"));sc_t (Dig(v,"3dig"))] else []) @
146   - (if Xlist.size digs <= 3 && List.hd digs <> "0" then [t (Dig(v,"pref3dig"));sc_t (Dig(v,"pref3dig"))] else []) in*)
147   - Variant variants
148   -
149   -let recognize_roman_I v = function
150   - Capital("I",_) :: Capital("I",_) :: Capital("I",_) :: [] -> v+3,false
151   - | Capital("I",_) :: Capital("I",_) :: [] -> v+2,false
152   - | Capital("I",_) :: [] -> v+1,false
153   - | [] -> v,false
154   - | Capital("I",_) :: Capital("I",_) :: Capital("I",_) :: Small("w") :: [] -> v+3,true
155   - | Capital("I",_) :: Capital("I",_) :: Small("w") :: [] -> v+2,true
156   - | Capital("I",_) :: Small("w") :: [] -> v+1,true
157   - | Small("w") :: [] -> v,true
158   - | _ -> 0,false
159   -
160   -let recognize_roman_V v = function
161   - Capital("I",_) :: ForeignCapital("V",_) :: [] -> v+4,false
162   - | ForeignCapital("V",_) :: l -> recognize_roman_I (v+5) l
163   - | Capital("I",_) :: ForeignCapital("X",_) :: [] -> v+9,false
164   - | Capital("I",_) :: ForeignCapital("V",_) :: Small("w") :: [] -> v+4,true
165   - | Capital("I",_) :: ForeignCapital("X",_) :: Small("w") :: [] -> v+9,true
166   - | l -> recognize_roman_I v l
167   -
168   -let recognize_roman_X v = function
169   - | ForeignCapital("X",_) :: ForeignCapital("X",_) :: ForeignCapital("X",_) :: l -> recognize_roman_V (v+30) l
170   - | ForeignCapital("X",_) :: ForeignCapital("X",_) :: l -> recognize_roman_V (v+20) l
171   - | ForeignCapital("X",_) :: l -> recognize_roman_V (v+10) l
172   - | l -> recognize_roman_V v l
173   -
174   -let recognize_roman_L v = function
175   - ForeignCapital("X",_) :: Capital("L",_) :: l -> recognize_roman_V (v+40) l
176   - | Capital("L",_) :: l -> recognize_roman_X (v+50) l
177   - | ForeignCapital("X",_) :: Capital("C",_) :: l -> recognize_roman_V (v+90) l
178   - | l -> recognize_roman_X v l
179   -
180   -let recognize_roman_C v = function
181   - | Capital("C",_) :: Capital("C",_) :: Capital("C",_) :: l -> recognize_roman_L (v+300) l
182   - | Capital("C",_) :: Capital("C",_) :: l -> recognize_roman_L (v+200) l
183   - | Capital("C",_) :: l -> recognize_roman_L (v+100) l
184   - | l -> recognize_roman_L v l
185   -
186   -let recognize_roman_D v = function
187   - Capital("C",_) :: Capital("D",_) :: l -> recognize_roman_L (v+400) l
188   - | Capital("D",_) :: l -> recognize_roman_C (v+500) l
189   - | Capital("C",_) :: Capital("M",_) :: l -> recognize_roman_L (v+900) l
190   - | l -> recognize_roman_C v l
191   -
192   -let recognize_roman_M v = function
193   - | Capital("M",_) :: Capital("M",_) :: Capital("M",_) :: l -> recognize_roman_D (v+3000) l
194   - | Capital("M",_) :: Capital("M",_) :: l -> recognize_roman_D (v+2000) l
195   - | Capital("M",_) :: l -> recognize_roman_D (v+1000) l
196   - | l -> recognize_roman_D v l
197   -
198   -let rec merge l =
199   - String.concat "" (Xlist.map l (function
200   - Capital(s,t) -> s
201   - | ForeignCapital(s,t) -> s
202   - | Small s -> s
203   - | ForeignSmall s -> s
204   - | _ -> failwith "merge"))
205   -
206   -let lowercase_first = function
207   - [] -> []
208   - | Capital(s,t) :: l -> Small t :: l
209   - | ForeignCapital(s,t) :: l -> ForeignSmall t :: l
210   - | Small s :: l -> Small s :: l
211   - | ForeignSmall s :: l -> ForeignSmall s :: l
212   - | _ -> failwith "lowercase_first"
213   -
214   -let rec lowercase_all = function
215   - [] -> []
216   - | Capital(s,t) :: l -> Small t :: lowercase_all l
217   - | ForeignCapital(s,t) :: l -> ForeignSmall t :: lowercase_all l
218   - | Small s :: l -> Small s :: lowercase_all l
219   - | ForeignSmall s :: l -> ForeignSmall s :: lowercase_all l
220   - | _ -> failwith "lowercase_all"
221   -
222   -let lowercase_rest = function
223   - [] -> []
224   - | x :: l -> x :: lowercase_all l
225   -
226   -let first_capital = function
227   - Capital _ :: _ -> true
228   - | ForeignCapital _ :: _ -> true
229   - | Small _ :: _ -> false
230   - | ForeignSmall _ :: _ -> false
231   - | _ -> failwith "first_capital"
232   -
233   -let rec all_capital = function
234   - Capital _ :: l -> all_capital l
235   - | ForeignCapital _ :: l -> all_capital l
236   - | Small _ :: l -> false
237   - | ForeignSmall _ :: l -> false
238   - | [] -> true
239   - | _ -> failwith "first_capital"
240   -
241   -let rec all_small = function
242   - Capital _ :: l -> false
243   - | ForeignCapital _ :: l -> false
244   - | Small _ :: l -> all_small l
245   - | ForeignSmall _ :: l -> all_small l
246   - | [] -> true
247   - | _ -> failwith "first_capital"
248   -
249   -let rest_capital = function
250   - [] -> failwith "rest_capital"
251   - | _ :: l -> all_capital l
252   -
253   -let rest_small = function
254   - [] -> failwith "rest_small"
255   - | _ :: l -> all_small l
256   -
257   -let get_first_cap = function
258   - | Capital(s,t) :: l -> s
259   - | ForeignCapital(s,t) :: l -> s
260   - | _ -> failwith "get_first_cap"
261   -
262   -let get_first_lower = function
263   - | Capital(s,t) :: l -> t
264   - | ForeignCapital(s,t) :: l -> t
265   - | _ -> failwith "get_first_lower"
266   -
267   -(*let cs_weight = -1.
268   -let sc_cap_weight = -0.3*)
269   -
270   -let is_add_attr_token = function
271   - SmallLetter _ -> true
272   - | CapLetter _ -> true
273   - | AllSmall _ -> true
274   - | AllCap _ -> true
275   - | FirstCap _ -> true
276   - | SomeCap _ -> true
277   - | _ -> false
278   -
279   -let rec add_attr s = function
280   - Token t -> if is_add_attr_token t.token then Token{t with attrs=s :: t.attrs} else Token t
281   - | Variant l -> Variant(Xlist.map l (add_attr s))
282   - | Seq l -> Seq(Xlist.map l (add_attr s))
283   -
284   -let recognize_stem poss_s_beg has_sufix i letters =
285   - let orth = merge letters in
286   - let t = {empty_token with orth=orth;beg=i;len=Xlist.size letters * factor;next=i+Xlist.size letters * factor} in
287   - let t = if poss_s_beg then
288   - if Xlist.size letters = 1 then
289   - if first_capital letters then Variant[
290   - Token{t with token=SmallLetter(merge (lowercase_first letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs};
291   - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=SmallLetter(merge (lowercase_first letters)); attrs="maybe cs" :: t.attrs}];
292   - Token{t with token=CapLetter(orth,merge (lowercase_first letters)); attrs="maybe cs" :: t.attrs};
293   - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=CapLetter(orth,merge (lowercase_first letters)); (*weight=sc_cap_weight;*) attrs="maybe cs" :: t.attrs}]]
294   - else Token{t with token=SmallLetter orth}
295   - else
296   - if first_capital letters then
297   - if rest_small letters then Variant[
298   - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=AllSmall(merge (lowercase_first letters))}];
299   - Token{t with token=FirstCap(orth,merge (lowercase_first letters),get_first_cap letters,get_first_lower letters)};
300   - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=FirstCap(orth,merge (lowercase_first letters),get_first_cap letters,get_first_lower letters); (*weight=sc_cap_weight*)}]]
301   - else if rest_capital letters then Variant([
302   - Token{t with token=AllSmall(merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs};
303   - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=AllSmall(merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs}];
304   - Token{t with token=FirstCap(merge (lowercase_rest letters),merge (lowercase_all letters),get_first_cap letters,get_first_lower letters); (*weight=cs_weight;*) attrs="cs" :: t.attrs};
305   - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=FirstCap(merge (lowercase_rest letters),merge (lowercase_all letters),get_first_cap letters,get_first_lower letters); (*weight=cs_weight+.sc_cap_weight;*) attrs="cs" :: t.attrs}]] @
306   - (if has_sufix then [] else [
307   - Token{t with token=AllCap(orth,merge (lowercase_rest letters),merge (lowercase_all letters)); attrs="maybe cs" :: t.attrs};
308   - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=AllCap(orth,merge (lowercase_rest letters),merge (lowercase_all letters)); attrs="maybe cs" :: t.attrs}]]))
309   - else Token{t with token=SomeCap orth}
310   - else
311   - if rest_small letters then Token{t with token=AllSmall orth}
312   - else Token{t with token=SomeCap orth}
313   - else
314   - if Xlist.size letters = 1 then
315   - if first_capital letters then Variant[
316   - Token{t with token=SmallLetter orth; (*weight=cs_weight;*) attrs="cs" :: t.attrs};
317   - Token{t with token=CapLetter(orth,merge (lowercase_first letters)); attrs="maybe cs" :: t.attrs}]
318   - else Token{t with token=SmallLetter orth}
319   - else
320   - if first_capital letters then
321   - if rest_small letters then
322   - Token{t with token=FirstCap(orth,merge (lowercase_first letters),get_first_cap letters,get_first_lower letters)}
323   - else if rest_capital letters then Variant([
324   - Token{t with token=AllSmall(merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs};
325   - Token{t with token=FirstCap(merge (lowercase_rest letters),merge (lowercase_all letters),get_first_cap letters,get_first_lower letters); (*weight=cs_weight;*) attrs="cs" :: t.attrs}] @
326   - (if has_sufix then [] else [
327   - Token{t with token=AllCap(orth,merge (lowercase_rest letters),merge (lowercase_all letters)); attrs="maybe cs" :: t.attrs}]))
328   - else Token{t with token=SomeCap orth}
329   - else
330   - if rest_small letters then Token{t with token=AllSmall orth}
331   - else Token{t with token=SomeCap orth} in
332   - if has_sufix then add_attr "required validated lemmatization" t else t
333   -
334   -let parse_postags s =
335   - List.map (fun s ->
336   - match List.map (fun t -> Str.split (Str.regexp "\\.") t) (Str.split (Str.regexp ":") s) with
337   - [pos] :: tags -> pos, tags
338   - | _ -> failwith ("parse_postags: " ^ s)) (Str.split (Str.regexp "|") s)
339   -
340   -let make_lemma (lemma,interp) =
341   - match parse_postags interp with
342   - [pos,tags] -> Lemma(lemma,pos,[tags])
343   - | _ -> failwith "make_lemma"
344   -
345   -let merge_attrs l =
346   -(* print_endline (String.concat " " (Xlist.map l (fun token -> "[" ^ token.orth ^ " " ^ String.concat ";" token.attrs ^ "]"))); *)
347   - let len = Xlist.size l in
348   - let attrs = Xlist.fold l StringQMap.empty (fun attrs token ->
349   - Xlist.fold token.attrs attrs StringQMap.add) in
350   - let n_cs = try StringQMap.find attrs "cs" with Not_found -> 0 in
351   - let n_maybe_cs = try StringQMap.find attrs "maybe cs" with Not_found -> 0 in
352   - let new_attrs =
353   - (if n_cs > 0 then
354   - if n_cs + n_maybe_cs = len then ["cs"] else raise Not_found
355   - else
356   - if n_maybe_cs = len then ["maybe cs"] else []) @
357   - (StringQMap.fold attrs [] (fun attrs attr _ -> if attr = "cs" || attr = "maybe cs" then attrs else attr :: attrs)) in
358   -(* print_endline (String.concat " " new_attrs); *)
359   - new_attrs
360   -
361   -let suffix_lemmata = Xlist.fold [
362   - "em",make_lemma ("być","aglt:sg:pri:imperf:wok");
363   - "eś",make_lemma ("być","aglt:sg:sec:imperf:wok");
364   - "eście",make_lemma ("być","aglt:pl:sec:imperf:wok");
365   - "eśmy",make_lemma ("być","aglt:pl:pri:imperf:wok");
366   - "m",make_lemma ("być","aglt:sg:pri:imperf:nwok");
367   - "ś",make_lemma ("być","aglt:sg:sec:imperf:nwok");
368   - "ście",make_lemma ("być","aglt:pl:sec:imperf:nwok");
369   - "śmy",make_lemma ("być","aglt:pl:pri:imperf:nwok");
370   - "by",make_lemma ("by","qub");
371   - ] StringMap.empty (fun map (suf,lemma) -> StringMap.add map suf lemma)
372   -
373   -let recognize_suffix i letters =
374   - let orth = merge letters in
375   - let t = {empty_token with orth=orth;beg=i;len=Xlist.size letters * factor;next=i+Xlist.size letters * factor} in
376   - if all_capital letters then Token{t with token=StringMap.find suffix_lemmata (merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs}
377   - else if all_small letters then Token{t with token=StringMap.find suffix_lemmata orth}
378   - else raise Not_found
379   -
380   -let recognize_romandig poss_s_beg i letters =
381   - let roman,w = recognize_roman_M 0 letters in
382   - if roman > 0 then
383   - let letters,w = if w then let l = List.rev letters in List.rev (List.tl l), [List.hd l] else letters,[] in
384   - let orth = merge letters in
385   - let roman = string_of_int roman in
386   - let t = {empty_token with orth=orth;beg=i;len=Xlist.size letters * factor;next=i+Xlist.size letters * factor} in
387   - let w = if w = [] then [] else
388   - let beg = i + Xlist.size letters * factor in
389   - [Variant[Token{empty_token with orth=merge w; beg=beg; len=factor; next=beg+factor; token=SmallLetter(merge w)};
390   - Token{empty_token with orth=merge w; beg=beg; len=factor; next=beg+factor; token=make_lemma ("wiek","subst:sg:_:m3")}]] in
391   - if StringSet.mem romanmonths orth then [
392   - Seq(Token{t with token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}::w);
393   - Seq(Token{t with token=RomanDig(roman,"month"); attrs="maybe cs" :: t.attrs}::w)] @
394   - (if poss_s_beg then [
395   - Seq([s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}]@w);
396   - Seq([s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=RomanDig(roman,"month"); attrs="maybe cs" :: t.attrs}]@w);
397   - ] else [])
398   - else [
399   - Seq(Token{t with token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}::w)] @
400   - (if poss_s_beg then [
401   - Seq([s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}]@w);
402   - ] else [])
403   - else []
404   -
405   -let sufixes1 = Xlist.map [
406   - ["m"];
407   - ["e"; "m"];
408   - ["ś"];
409   - ["e"; "ś"];
410   - ["ś"; "m"; "y"];
411   - ["e"; "ś"; "m"; "y"];
412   - ["ś"; "c"; "i"; "e"];
413   - ["e"; "ś"; "c"; "i"; "e"];
414   - ] List.rev
415   -
416   -let sufixes2 = Xlist.map [
417   - ["b"; "y"];
418   - ] List.rev
419   -
420   -let rec find_suffix rev = function
421   - _, [] -> raise Not_found
422   - | [], l -> rev, l
423   - | s :: pat, Capital(c,t) :: l -> if s = t then find_suffix (Capital(c,t) :: rev) (pat,l) else raise Not_found
424   - | s :: pat, Small t :: l -> if s = t then find_suffix (Small t :: rev) (pat,l) else raise Not_found
425   - | _,_ -> raise Not_found
426   -
427   -let find_suffixes2 sufixes letters sufs =
428   - Xlist.fold sufixes [] (fun l suf ->
429   - try
430   - let suf,rev_stem = find_suffix [] (suf,letters) in
431   - (rev_stem,suf :: sufs) :: l
432   - with Not_found -> l)
433   -
434   -let find_suffixes i letters =
435   - let letters = List.rev letters in
436   - let l = (letters,[]) :: find_suffixes2 sufixes1 letters [] in
437   - let l = Xlist.fold l l (fun l (letters,sufs) ->
438   - (find_suffixes2 sufixes2 letters sufs) @ l) in
439   - Xlist.map l (fun (rev_stem, sufs) ->
440   - List.rev (fst (Xlist.fold (List.rev rev_stem :: sufs) ([],i) (fun (seq,i) letters ->
441   - (letters,i) :: seq, i + factor * Xlist.size letters))))
442   -
443   -let merge_letters poss_s_beg i letters =
444   - let l = find_suffixes i letters in
445   - let roman = recognize_romandig poss_s_beg i letters in
446   - let variants = Xlist.fold l roman (fun variants -> function
447   - [] -> failwith "merge_letters"
448   - | [stem,i] -> (recognize_stem poss_s_beg false i stem) :: variants
449   - | (stem,i) :: suffixes ->
450   - (try (Seq((recognize_stem poss_s_beg true i stem) :: Xlist.map suffixes (fun (suf,i) -> recognize_suffix i suf))) :: variants
451   - with Not_found -> variants)) in
452   - Variant variants
453   -
454   -let rec group_digits rev = function
455   - [] -> List.rev rev, []
456   - | Digit s :: l -> group_digits (s :: rev) l
457   - | x :: l -> List.rev rev, x :: l
458   -
459   -let rec group_letters rev = function
460   - [] -> List.rev rev, []
461   - | Capital(s,t) :: l -> group_letters ((Capital(s,t)) :: rev) l
462   - | ForeignCapital(s,t) :: l -> group_letters ((ForeignCapital(s,t)) :: rev) l
463   - | Small s :: l -> group_letters ((Small s) :: rev) l
464   - | ForeignSmall s :: l -> group_letters ((ForeignSmall s) :: rev) l
465   - | x :: l -> List.rev rev, x :: l
466   -
467   -let rec group_others rev = function
468   - [] -> List.rev rev, []
469   - | Other(s,_) :: l -> group_others (s :: rev) l
470   - | x :: l -> List.rev rev, x :: l
471   -
472   -let create_sign_token poss_s_beg i signs l token =
473   - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in
474   - let len = Xlist.size signs * factor in
475   - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=token; attrs=["maybe cs"]},i+len,l,poss_s_beg
476   -
477   -let create_empty_sign_token i signs =
478   - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in
479   - let len = Xlist.size signs * factor in
480   - {empty_token with orth=orth;beg=i;len=len;next=i+len; attrs=["maybe cs"]},i+len
481   -
482   -let create_sentence_seq i signs l lemma =
483   - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in
484   - let len = Xlist.size signs * factor in
485   - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"};
486   - Token{empty_token with orth=orth;beg=i+20;len=len-30;next=i+len-10;token=make_lemma (lemma,"sinterj")};
487   - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}]
488   -
489   -let create_sentence_seq_hapl i signs l lemma =
490   - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in
491   - let len = Xlist.size signs * factor in
492   - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]};
493   - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</clause>"};
494   - Token{empty_token with orth=orth;beg=i+20;len=len-30;next=i+len-10;token=make_lemma (lemma,"sinterj")};
495   - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}]
496   -
497   -let create_sentence_seq_q i signs l lemma =
498   - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in
499   - let len = Xlist.size signs * factor in
500   - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "?"};
501   - Token{empty_token with beg=i+20;len=10;next=i+30;token=Interp "</clause>"};
502   - Token{empty_token with orth=orth;beg=i+30;len=len-40;next=i+len-10;token=make_lemma (lemma,"sinterj")};
503   - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}]
504   -
505   -let create_sentence_seq_hapl_q i signs l lemma =
506   - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in
507   - let len = Xlist.size signs * factor in
508   - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]};
509   - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "?"};
510   - Token{empty_token with beg=i+20;len=10;next=i+30;token=Interp "</clause>"};
511   - Token{empty_token with orth=orth;beg=i+30;len=len-40;next=i+len-10;token=make_lemma (lemma,"sinterj")};
512   - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}]
513   -
514   -let create_or_beg i signs l poss_s_beg =
515   - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in
516   - let len = Xlist.size signs * factor in
517   - Variant[
518   - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Symbol "-"; attrs=["maybe cs"]};
519   - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Interp "-"; attrs=["maybe cs"]}; (* hyphen *)
520   - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Interp "<or>"};
521   - (* Seq[Token{empty_token with beg=i; len=20; next=i+20; token=Interp "<sentence>"};
522   - Token{empty_token with orth=orth;beg=i+20; len=len-20;next=i+len; token=Interp "<or>"}]; *)
523   - Seq[Token{empty_token with beg=i; len=21; next=i+21; token=Interp "</clause>"};
524   - Token{empty_token with beg=i+21; len=20; next=i+41; token=Interp "</sentence>"};
525   - Token{empty_token with orth=orth;beg=i+41; len=len-59;next=i+len-20;token=Interp "</or>"};
526   - Token{empty_token with beg=i+len-20;len=20; next=i+len; token=Interp "<clause>"}];
527   - Seq[Token{empty_token with orth=orth;beg=i; len=len-22;next=i+len-22;token=Interp "</or>"};
528   - Token{empty_token with beg=i+len-22;len=22; next=i+len; token=Interp "<clause>"}];
529   - ],i+len,l,poss_s_beg
530   -
531   -let create_or_beg2 i signs l poss_s_beg =
532   - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in
533   - let len = Xlist.size signs * factor in
534   - Variant[
535   - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Interp "<or>"};
536   - (* Seq[Token{empty_token with beg=i; len=20; next=i+20; token=Interp "<sentence>"};
537   - Token{empty_token with orth=orth;beg=i+20; len=len-20;next=i+len; token=Interp "<or>"}]; *)
538   - Seq[Token{empty_token with beg=i; len=21; next=i+21; token=Interp "</clause>"};
539   - Token{empty_token with beg=i+21; len=20; next=i+41; token=Interp "</sentence>"};
540   - Token{empty_token with orth=orth;beg=i+41; len=len-59;next=i+len-20;token=Interp "</or>"};
541   - Token{empty_token with beg=i+len-20;len=20; next=i+len; token=Interp "<clause>"}];
542   - Seq[Token{empty_token with orth=orth;beg=i; len=len-22;next=i+len-22;token=Interp "</or>"};
543   - Token{empty_token with beg=i+len-22;len=22; next=i+len; token=Interp "<clause>"}];
544   - ],i+len,l,poss_s_beg
545   -
546   -let is_dot_sentence_end_marker = function
547   - [] -> true
548   - | [Sign " "] -> true
549   - | [Sign ""] -> true
550   - | [Sign " "] -> true
551   - | [Sign "\""] -> true
552   - | [Sign "»"] -> true
553   - | [Sign "”"] -> true
554   - | _ -> false
555   -
556   -let not_dot_sentence_end_marker = function
557   - Sign " " :: Small _ :: _ -> true
558   - | Sign "" :: Small _ :: _ -> true
559   - | Sign " " :: Small _ :: _ -> true
560   - | Sign "," :: _ -> true
561   - | Sign ":" :: _ -> true
562   - | Sign "?" :: _ -> true
563   - | Sign "!" :: _ -> true
564   - | Small _ :: _ -> true
565   - | ForeignSmall _ :: _ -> true
566   - | Capital _ :: _ -> true
567   - | ForeignCapital _ :: _ -> true
568   - | Digit _ :: _ -> true
569   - | _ -> false
570   -
571   -let is_comma_digit_marker = function
572   - Digit _ :: l -> true
573   - | _ -> false
574   -
575   -let is_colon_sentence_end_marker = function
576   - [] -> true
577   - | [Sign " "] -> true
578   - | [Sign ""] -> true
579   - | [Sign " "] -> true
580   - | _ -> false
581   -
582   -let is_colon_symbol = function
583   - Digit _ :: _ -> true
584   - | Sign "/" :: _ -> true
585   - | _ -> false
586   -
587   -let is_multidot_sentence_end_marker = function
588   - [] -> true
589   - | [Sign " "] -> true
590   - | [Sign ""] -> true
591   - | [Sign " "] -> true
592   - | [Sign "\""] -> true
593   - | [Sign "»"] -> true
594   - | [Sign "”"] -> true
595   -(* | "\"" :: l -> true
596   - | "»" :: l -> true
597   - | "”" :: l -> true
598   - | "“" :: l -> true
599   - | " " :: "-" :: l -> true
600   - | " " :: "–" :: l -> true
601   - | " " :: "—" :: l -> true
602   - | ")" :: l -> true
603   - | "]" :: l -> true*)
604   - | _ -> false
605   -
606   -let create_quot_digit_token i signs l =
607   - let t,i2 = create_empty_sign_token i signs in
608   - Variant[
609   - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"};
610   - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"};
611   - Token{t with beg=t.beg+factor; next=t.next+factor;token=Interp "”s"}];
612   - Seq[Token{t with token=Interp "”"};
613   - Token{empty_token with beg=i2;len=20;next=i2+20;token=Interp "</clause>"};
614   - Token{empty_token with orth=".";beg=i2+20;len=factor-20;next=i2+factor;token=Interp "</sentence>"}];
615   - ],i2+factor,l,true
616   -
617   -let rec recognize_sign_group poss_s_beg i = function
618   - | (Sign " ") :: l -> create_sign_token poss_s_beg i [Sign " "] l (Symbol " ")
619   - | (Sign "") :: l -> create_sign_token poss_s_beg i [Sign ""] l (Symbol " ")
620   - | (Sign " ") :: l -> create_sign_token poss_s_beg i [Sign " "] l (Symbol " ")
621   - | (Sign "\"") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "\""] l
622   - | (Sign "\"") :: l ->
623   - let t,i = create_empty_sign_token i [Sign "\""] in
624   - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"};Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg
625   - | (Sign "˝") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "˝"] l
626   - | (Sign "˝") :: l ->
627   - let t,i = create_empty_sign_token i [Sign "˝"] in
628   - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"};Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg
629   - | (Sign "„") :: l ->
630   - let t,i = create_empty_sign_token i [Sign "„"] in
631   - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"}],i,l,poss_s_beg
632   - | (Sign "”") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "”"] l
633   - | (Sign "”") :: l ->
634   - let t,i = create_empty_sign_token i [Sign "”"] in
635   - Variant[Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg
636   - | (Sign "“") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "“"] l
637   - | (Sign "“") :: l ->
638   - let t,i = create_empty_sign_token i [Sign "“"] in
639   - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"};Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg
640   - | (Sign ",") :: (Sign ",") :: l ->
641   - let t,i = create_empty_sign_token i [Sign ",";Sign ","] in
642   - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"}],i,l,poss_s_beg
643   - | (Sign ",") :: l ->
644   - let t,i2 = create_empty_sign_token i [Sign ","] in
645   - if is_comma_digit_marker l then
646   - Token{t with token=Symbol ","},i2,l,false
647   - else
648   - Variant[Token{t with token=Interp ","};
649   - Seq[Token{empty_token with orth=",";beg=i;len=factor/2;next=i+factor/2;token=Interp "</clause>"};
650   - Token{empty_token with beg=i+factor/2;len=factor-(factor/2);next=i+factor;token=Interp "<clause>"}]],i2,l,false
651   - | (Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: []) l (make_lemma ("(…)","sinterj"))
652   - | (Sign "(") :: (Sign "?") :: (Sign "!") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "?") :: (Sign "!") :: (Sign ")") :: []) l (make_lemma ("(?!)","sinterj"))
653   - | (Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: []) l (make_lemma ("(…)","sinterj"))
654   - | (Sign "(") :: (Sign "+") :: (Sign "+") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "+") :: (Sign "+") :: (Sign ")") :: []) l (make_lemma ("(++)","sinterj"))
655   - | (Sign "(") :: (Sign "-") :: (Sign "-") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "-") :: (Sign "-") :: (Sign ")") :: []) l (make_lemma ("(--)","symbol"))
656   - | (Sign "(") :: (Sign "…") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "…") :: (Sign ")") :: []) l (make_lemma ("(…)","sinterj"))
657   - | (Sign "(") :: (Sign "?") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "?") :: (Sign ")") :: []) l (make_lemma ("(?)","sinterj"))
658   - | (Sign "(") :: (Sign "+") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "+") :: (Sign ")") :: []) l (make_lemma ("(+)","symbol"))
659   - | (Sign "(") :: (Sign "!") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "!") :: (Sign ")") :: []) l (make_lemma ("(!)","sinterj"))
660   - | (Sign "(") :: (Sign "-") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "-") :: (Sign ")") :: []) l (make_lemma ("(-)","symbol"))
661   - | (Sign "(") :: (Sign "*") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "*") :: (Sign ")") :: []) l (make_lemma ("(*)","symbol"))
662   - | (Sign "(") :: l -> create_sign_token poss_s_beg i [Sign "("] l (Interp "(")
663   - | (Sign ":") :: (Sign "(") :: (Sign "(") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "(") :: (Sign "(") :: (Sign "(") :: []) l (make_lemma (":(((","sinterj"))
664   - | (Sign ":") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "(") :: []) l (make_lemma (":(","sinterj"))
665   - | (Sign ":") :: (Sign "-") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign "(") :: []) l (make_lemma (":-(","sinterj"))
666   - | (Sign ";") :: (Sign "(") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "(") :: (Sign "(") :: []) l (make_lemma (";((","sinterj"))
667   - | (Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";-))))","sinterj"))
668   - | (Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":-))))","sinterj"))
669   - | (Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":-)))","sinterj"))
670   - | (Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";-)))","sinterj"))
671   - | (Sign ";") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";)))","sinterj"))
672   - | (Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":-))","sinterj"))
673   - | (Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";-))","sinterj"))
674   - | (Sign ":") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":)))","sinterj"))
675   - | (Sign ":") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":))","sinterj"))
676   - | (Sign ";") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";))","sinterj"))
677   - | (Sign ";") :: (Sign "-") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: []) l (make_lemma (";-)","sinterj"))
678   - | (Sign ":") :: (Sign "|") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "|") :: []) l (make_lemma (":|","sinterj"))
679   - | (Sign ":") :: (Sign "\\") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "\\") :: []) l (make_lemma (":\\","sinterj"))
680   - | (Sign ":") :: (Sign "-") :: (Sign "/") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign "/") :: []) l (make_lemma (":-/","sinterj"))
681   - | (Sign ":") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign ")") :: []) l (make_lemma (":)","sinterj"))
682   - | (Sign ";") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign ")") :: []) l (make_lemma (";)","sinterj"))
683   - | (Sign ")") :: l -> create_sign_token poss_s_beg i [Sign ")"] l (Interp ")")
684   - | (Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: []) l (make_lemma ("(…)","sinterj"))
685   - | (Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: []) l (make_lemma ("(…)","sinterj"))
686   - | (Sign "[") :: (Sign "+") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign "+") :: (Sign "]") :: []) l (make_lemma ("[+]","symbol"))
687   - | (Sign "[") :: (Sign "-") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign "-") :: (Sign "]") :: []) l (make_lemma ("[-]","symbol"))
688   - | (Sign "[") :: (Sign "?") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign "?") :: (Sign "]") :: []) l (make_lemma ("[?]","sinterj"))
689   - | (Sign ":") :: (Sign "]") :: l ->
690   - let t,i2 = create_empty_sign_token i [Sign ":";Sign "]"] in
691   - Variant[Token{t with token=make_lemma (":]","sinterj")};
692   - Seq[Token{empty_token with orth=":";beg=i;len=factor;next=i+factor;token=Interp ":"; attrs=["maybe cs"]};
693   - Token{empty_token with orth="]";beg=i+factor;len=factor;next=i+2*factor;token=Interp "]"; attrs=["maybe cs"]}]],i2,l,false
694   - | (Sign ";") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "]") :: []) l (make_lemma (";]","sinterj"))
695   - | (Sign "]") :: l -> create_sign_token poss_s_beg i [Sign "]"] l (Interp "]")
696   - | (Sign "[") :: l -> create_sign_token poss_s_beg i [Sign "["] l (Interp "[")
697   - | (Sign ":") :: l ->
698   - if is_colon_symbol l then
699   - Token{empty_token with orth=":";beg=i;len=factor;next=i+factor;token=Symbol ":"; attrs=["maybe cs"]},i+factor,l,false
700   - else
701   - Variant[
702   - Seq[Token{empty_token with beg=i;len=11;next=i+11;token=Interp "</clause>"}; (* wyliczenie*)
703   - Token{empty_token with orth=":";beg=i+11;len=factor-11;next=i+factor;token=Interp "<clause>"}];
704   - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"};
705   - Token{empty_token with orth=":";beg=i+10;len=factor-30;next=i+factor-20;token=Interp ":"}; (* mowa zależna, koniec zdania *)
706   - Token{empty_token with beg=i+factor-20;len=20;next=i+factor;token=Interp "</sentence>"}];
707   - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"};
708   - Token{empty_token with orth=":";beg=i+10;len=factor-40;next=i+factor-30;token=Interp ":"}; (* po ':' zdanie z małej litery *)
709   - Token{empty_token with beg=i+factor-30;len=10;next=i+factor-20;token=Interp "</sentence>"};
710   - Token{empty_token with beg=i+factor-20;len=10;next=i+factor-10;token=Interp "<sentence>"};
711   - Token{empty_token with beg=i+factor-10;len=10;next=i+factor;token=Interp "<clause>"}];
712   - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"};
713   - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</sentence>"};
714   - Token{empty_token with orth=":";beg=i+20;len=factor-20;next=i+factor;token=Interp ":s"}]; (* speaker *)
715   - ],i+factor,l,true
716   -(* if is_colon_sentence_end_marker l then
717   - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"};
718   - Token{empty_token with orth=":";beg=i+10;len=10;next=i+20;token=Interp ":"};
719   - Token{empty_token with beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}],i+factor,l,true
720   - else
721   - else
722   - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"};
723   - Token{empty_token with orth=":";beg=i+10;len=10;next=i+20;token=Interp ""};
724   - Token{empty_token with beg=i+20;len=factor-20;next=i+factor;token=Interp "<clause>"}],i+factor,l,false*)
725   - | (Sign "'") :: (Sign "'") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "'";Sign "'"] l
726   - | (Sign "'") :: (Sign "'") :: l ->
727   - let t,i = create_empty_sign_token i [Sign "”"] in
728   - Variant[Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg
729   - | (Sign "'") :: l -> create_sign_token poss_s_beg i [Sign "'"] l (Symbol "’")
730   - | (Sign "’") :: (Sign "’") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "’";Sign "’"] l
731   - | (Sign "’") :: (Sign "’") :: l ->
732   - let t,i = create_empty_sign_token i [Sign "”"] in
733   - Variant[Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg
734   - | (Sign "’") :: l -> create_sign_token poss_s_beg i [Sign "’"] l (Symbol "’")
735   - | (Sign ";") :: (Sign "*") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "*") :: []) l (make_lemma (";*","sinterj"))
736   - | (Sign ";") :: l ->
737   - Variant[Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"};
738   - Token{empty_token with orth=";";beg=i+20;len=20;next=i+40;token=Interp "</sentence>"};
739   - Token{empty_token with beg=i+40;len=20;next=i+60;token=Interp "<sentence>"};
740   - Token{empty_token with beg=i+60;len=factor-60;next=i+factor;token=Interp "<clause>"}];
741   - Token{empty_token with orth=";";beg=i;len=factor;next=i+factor;token=Interp ";"; attrs=["maybe cs"]}],i+factor,l,false
742   - | (Sign "?") :: (Sign "!") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l ->
743   - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "?!...",i+5*factor,l,true
744   - | (Sign "?") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l ->
745   - create_sentence_seq_q i ((Sign "?") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "?...",i+4*factor,l,true
746   - | (Sign "?") :: (Sign "?") :: (Sign "?") :: (Sign "?") :: l ->
747   - create_sentence_seq_q i ((Sign "?") :: (Sign "?") :: (Sign "?") :: (Sign "?") :: []) l "????",i+4*factor,l,true
748   - | (Sign "?") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: l ->
749   - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: []) l "?!!!",i+4*factor,l,true
750   - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "?") :: l ->
751   - Variant[create_sentence_seq_hapl_q i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "?") :: []) l "…?";
752   - create_sentence_seq_q i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "?") :: []) l "…?"],i+4*factor,l,true
753   - | (Sign "?") :: (Sign "!") :: (Sign "?") :: l ->
754   - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: (Sign "?") :: []) l "?!?",i+3*factor,l,true
755   - | (Sign "?") :: (Sign "?") :: (Sign "?") :: l ->
756   - create_sentence_seq_q i ((Sign "?") :: (Sign "?") :: (Sign "?") :: []) l "???",i+3*factor,l,true
757   - | (Sign "?") :: (Sign "!") :: l ->
758   - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: []) l "?!",i+2*factor,l,true
759   - | (Sign "?") :: (Sign "?") :: l ->
760   - create_sentence_seq_q i ((Sign "?") :: (Sign "?") :: []) l "??",i+2*factor,l,true
761   -(* | (Sign "?") :: (Sign ".") :: l -> *)
762   - | (Sign "!") :: (Sign "?") :: l ->
763   - create_sentence_seq_q i ((Sign "!") :: (Sign "?") :: []) l "!?",i+2*factor,l,true
764   - | (Sign "?") :: (Sign "…") :: l ->
765   - create_sentence_seq_q i ((Sign "?") :: (Sign "…") :: []) l "?…",i+2*factor,l,true
766   - | (Sign "…") :: (Sign "?") :: l ->
767   - Variant[create_sentence_seq_hapl_q i ((Sign "…") :: (Sign "?") :: []) l "…?";
768   - create_sentence_seq_q i ((Sign "…") :: (Sign "?") :: []) l "…?"],i+2*factor,l,true
769   - | (Sign "?") :: l ->
770   - create_sentence_seq_q i ((Sign "?") :: []) l "?",i+factor,l,true
771   - | (Sign "!") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: l ->
772   - create_sentence_seq i ((Sign "!") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: []) l "!!!!",i+4*factor,l,true
773   - | (Sign "!") :: (Sign "!") :: (Sign "!") :: l ->
774   - create_sentence_seq i ((Sign "!") :: (Sign "!") :: (Sign "!") :: []) l "!!!",i+3*factor,l,true
775   - | (Sign "!") :: (Sign "!") :: l ->
776   - create_sentence_seq i ((Sign "!") :: (Sign "!") :: []) l "!!",i+2*factor,l,true
777   - | (Sign "!") :: l ->
778   - create_sentence_seq i ((Sign "!") :: []) l "!",i+factor,l,true
779   - | (Sign "…") :: l ->
780   - if is_multidot_sentence_end_marker l then
781   - Variant[create_sentence_seq_hapl i ((Sign "…") :: []) l "…";
782   - create_sentence_seq i ((Sign "…") :: []) l "…"],i+factor,l,true
783   - else
784   - Variant[create_sentence_seq_hapl i ((Sign "…") :: []) l "…";
785   - create_sentence_seq i ((Sign "…") :: []) l "…";
786   - Token{empty_token with orth="…";beg=i;len=factor;next=i+factor;token=make_lemma ("…","sinterj"); attrs=["maybe cs"]}],i+factor,l,true
787   - | (Sign "/") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "/") :: l -> create_sign_token poss_s_beg i ((Sign "/") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "/") :: []) l (make_lemma ("(…)","sinterj"))
788   - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> (* Różne natęrzenia wielokropka i wypunktowania *)
789   - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……";
790   - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+8*factor,l,true
791   - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l ->
792   - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……";
793   - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+7*factor,l,true
794   - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l ->
795   - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……";
796   - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+6*factor,l,true
797   - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l ->
798   - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……";
799   - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+5*factor,l,true
800   - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l ->
801   - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……";
802   - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+4*factor,l,true
803   - | (Sign ".") :: (Sign ".") :: (Sign ".") :: l ->
804   - if is_multidot_sentence_end_marker l then
805   - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…";
806   - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"],i+3*factor,l,true
807   - else
808   - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…";
809   - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…";
810   - Token{empty_token with orth="...";beg=i;len=3*factor;next=i+3*factor;token=make_lemma ("…","sinterj"); attrs=["maybe cs"]}],i+3*factor,l,true
811   - | (Sign ".") :: (Sign ".") :: l ->
812   - if is_multidot_sentence_end_marker l then
813   - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: []) l "…";
814   - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"],i+2*factor,l,true
815   - else
816   - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: []) l "…";
817   - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…";
818   - Token{empty_token with orth="..";beg=i;len=2*factor;next=i+2*factor;token=make_lemma ("…","sinterj"); attrs=["maybe cs"]}],i+2*factor,l,true
819   - | (Sign ".") :: l ->
820   - if is_dot_sentence_end_marker l then
821   - Variant[Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]};
822   - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</clause>"};
823   - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}];
824   - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"};
825   - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}]],i+factor,l,true
826   - else if not_dot_sentence_end_marker l then
827   - Token{empty_token with orth=".";beg=i;len=factor;next=i+factor;token=Symbol "."; attrs=["maybe cs"]},i+factor,l,false
828   - else
829   - Variant[Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]};
830   - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</clause>"};
831   - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}];
832   - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"};
833   - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}];
834   - Token{empty_token with orth=".";beg=i;len=factor;next=i+factor;token=Symbol "."; attrs=["maybe cs"]}],i+factor,l,true
835   - | (Sign "*") :: (Sign "*") :: (Sign "*") :: (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*";Sign "*";Sign "*";Sign "*"] l (Interp "*****") (* zastępniki liter *)
836   - | (Sign "*") :: (Sign "*") :: (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*";Sign "*";Sign "*"] l (Interp "****")
837   - | (Sign "*") :: (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*";Sign "*"] l (Interp "***")
838   - | (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*"] l (Interp "**")
839   - | (Sign "*") :: l -> (* Interp zastępnik liter i cudzysłów, symbol listy *)
840   - let t,i = create_empty_sign_token i [Sign "*"] in
841   - Variant[Token{t with token=Interp "*"};Token{t with token=Symbol "*"}],i,l,poss_s_beg
842   - | (Sign "+") :: l -> create_sign_token poss_s_beg i [Sign "+"] l (Symbol "+")
843   - | (Sign "«") :: l ->
844   - let t,i = create_empty_sign_token i [Sign "«"] in
845   - Variant[Token{t with token=Interp "«"};Token{t with token=Interp "«s"}],i,l,poss_s_beg
846   - | (Sign "»") :: l ->
847   - let t,i = create_empty_sign_token i [Sign "»"] in
848   - Variant[Token{t with token=Interp "»"};Token{t with token=Interp "»s"}],i,l,poss_s_beg
849   - | (Sign "<") :: (Sign "<") :: l -> create_sign_token poss_s_beg i [Sign "<";Sign "<"] l (Interp "«") (* prawy cudzysłów *)
850   - | (Sign "<") :: l -> (* prawy cudzysłów i element wzoru matematycznego *)
851   - let t,i = create_empty_sign_token i [Sign "<"] in
852   - Variant[Token{t with token=Interp "«"};Token{t with token=Symbol "<"}],i,l,poss_s_beg
853   - | (Sign ">") :: (Sign ">") :: l -> create_sign_token poss_s_beg i [Sign ">";Sign ">"] l (Interp "»") (* lewy cudzysłów *)
854   - | (Sign ">") :: l -> create_sign_token poss_s_beg i [Sign ">"] l (Symbol ">")
855   - | (Sign "-") :: (Sign "-") :: (Sign "-") :: l -> create_or_beg2 i [Sign "-";Sign "-";Sign "-"] l poss_s_beg
856   - | (Sign "-") :: (Sign "-") :: l -> create_or_beg2 i [Sign "-";Sign "-"] l poss_s_beg
857   - | (Sign "-") :: l -> create_or_beg i [Sign "-"] l poss_s_beg
858   - | (Sign "‐") :: l -> create_or_beg i [Sign "‐"] l poss_s_beg
859   - | (Sign "‑") :: l -> create_or_beg i [Sign "‑"] l poss_s_beg
860   - | (Sign "‒") :: l -> create_or_beg i [Sign "‒"] l poss_s_beg
861   - | (Sign "−") :: l -> create_or_beg i [Sign "−"] l poss_s_beg
862   - | (Sign "–") :: l -> create_or_beg i [Sign "–"] l poss_s_beg
863   - | (Sign "—") :: l -> create_or_beg i [Sign "—"] l poss_s_beg
864   - | (Sign "‘") :: l -> create_sign_token poss_s_beg i [Sign "‘"] l (Interp "‘")
865   - | (Sign "´") :: l -> create_sign_token poss_s_beg i [Sign "´"] l (Symbol "’")
866   - | (Sign "`") :: (Sign "`") :: l ->
867   - let t,i = create_empty_sign_token i [Sign "`";Sign "`"] in
868   - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"}],i,l,poss_s_beg
869   - | (Sign "`") :: l -> create_sign_token poss_s_beg i [Sign "`"] l (Symbol "’")
870   - | (Sign "·") :: l -> create_sign_token poss_s_beg i [Sign "·"] l (Interp "·")
871   - | (Sign "•") :: l -> create_sign_token poss_s_beg i [Sign "•"] l (Interp "•")
872   - | (Sign "¨") :: l -> create_sign_token poss_s_beg i [Sign "¨"] l (Interp "¨")
873   - | (Sign "~") :: l ->
874   - let t,i = create_empty_sign_token i [Sign "~"] in
875   - Variant[Token{t with token=Symbol "~"};Token{t with token=make_lemma ("około","prep:gen")}],i,l,false
876   - | (Sign "{") :: l ->
877   - let t,i = create_empty_sign_token i [Sign "{"] in
878   - Variant[Token{t with token=Symbol "{"};Token{t with token=Interp "{"}],i,l,poss_s_beg
879   - | (Sign "}") :: l ->
880   - let t,i = create_empty_sign_token i [Sign "}"] in
881   - Variant[Token{t with token=Symbol "}"};Token{t with token=Interp "}"}],i,l,poss_s_beg
882   - | (Sign "#") :: l -> create_sign_token poss_s_beg i [Sign ""] l (Symbol "")
883   - | (Sign "^") :: (Sign "^") :: l -> create_sign_token poss_s_beg i [Sign "^";Sign "^"] l (make_lemma ("^^","sinterj"))
884   - | (Sign "^") :: l -> create_sign_token poss_s_beg i [Sign "^"] l (Symbol "^")
885   - | (Sign "|") :: l -> create_sign_token poss_s_beg i [Sign "|"] l (Symbol "|")
886   - | (Sign "&") :: l -> create_sign_token poss_s_beg i [Sign "&"] l (Symbol "&")
887   - | (Sign "=") :: l -> create_sign_token poss_s_beg i [Sign "="] l (Symbol "=")
888   - | (Sign "/") :: l ->
889   - let t,i = create_empty_sign_token i [Sign "/"] in
890   - Variant[Token{t with token=Symbol "/"};Token{t with token=make_lemma ("na","prep:acc")}],i,l,false
891   - | (Sign "_") :: l -> create_sign_token poss_s_beg i [Sign "_"] l (Symbol "_")
892   - | (Sign "@") :: l -> create_sign_token poss_s_beg i [Sign "@"] l (Symbol "@")
893   - | (Sign "×") :: l -> create_sign_token poss_s_beg i [Sign "×"] l (Symbol "×")
894   - | (Sign "%") :: l ->
895   - let t,i = create_empty_sign_token i [Sign "%"] in
896   - Variant[Token{t with token=Symbol "%"};Token{t with token=make_lemma ("procent","subst:_:_:m3")}],i,l,false
897   - | (Sign "$") :: l ->
898   - let t,i = create_empty_sign_token i [Sign "$"] in
899   - Variant[Token{t with token=Symbol "$"};Token{t with token=make_lemma ("dolar","subst:_:_:m2")}],i,l,false
900   - | (Sign "€") :: l -> create_sign_token poss_s_beg i [Sign "€"] l (make_lemma ("euro","subst:_:_:n2"))
901   - | (Sign "²") :: l -> create_sign_token poss_s_beg i [Sign "²"] l (Symbol "²")
902   - | (Sign "°") :: l -> create_sign_token poss_s_beg i [Sign "°"] l (make_lemma ("stopień","subst:_:_:m3"))
903   - | (Sign "§") :: l -> create_sign_token false i [Sign "§"] l (make_lemma ("paragraf","subst:_:_:m3"))
904   - | (Sign s) :: l -> print_endline ("recognize_sign_group: " ^ s); create_sign_token poss_s_beg i [Sign s] l (Symbol s)
905   - | l -> failwith "recognize_sign_group"
906   -
907   -(* FIXME: "„Szpak” frunie." trzeba przenie przenieść <sentence> przed „, ale zostawić po „s. *)
908   -
909   -let rec group_chars poss_s_beg i rev = function
910   - [] -> List.rev ((Token{empty_token with beg=i;len=factor;next=i+factor;token=Interp "</query>"}) :: rev)
911   - | (Digit s) :: l -> let x,l = group_digits [] ((Digit s) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_digits poss_s_beg i x) :: rev) l
912   - | (Sign s) :: l -> let x,i,l,poss_s_beg = recognize_sign_group poss_s_beg i ((Sign s) :: l) in group_chars poss_s_beg i (x :: rev) l
913   - | (Capital(s,t)) :: l -> let x,l = group_letters [] ((Capital(s,t)) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l
914   - | (ForeignCapital(s,t)) :: l -> let x,l = group_letters [] ((ForeignCapital(s,t)) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l
915   - | (Small s) :: l -> let x,l = group_letters [] ((Small s) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l
916   - | (ForeignSmall s) :: l -> let x,l = group_letters [] ((ForeignSmall s) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l
917   - | (Other(s,x)) :: l ->
918   - let x,l = group_others [] ((Other(s,x)) :: l) in
919   - group_chars false (i + Xlist.size x * factor)
920   - ((Token{empty_token with orth=String.concat "" x;beg=i;len=Xlist.size x * factor;next=i+factor;token=Other(String.concat "" x)}) :: rev) l
921   -
922   -let tokenize l =
923   - (Token{empty_token with beg=0;len=factor;next=factor;token=Interp "<query>"}) :: (group_chars true factor [] l)
tokenizer/eniam-tokenizer-1.0/README deleted
1   -ENIAMtokenizer Version 1.0 :
2   ------------------------
3   -
4   -ENIAMtokenizer is a library that provides a tokenizer for Polish.
5   -
6   -Install
7   --------
8   -
9   -ENIAMtokenizer requires OCaml version 4.02.3 compiler
10   -together with Xlib library version 3.1 or later.
11   -
12   -In order to install type:
13   -
14   -make install
15   -
16   -by default, ENIAMtokenizer is installed in the 'ocamlc -where'/eniam directory.
17   -you can change it by editing the Makefile.
18   -
19   -In order to test library type:
20   -make test
21   -./test
22   -
23   -Credits
24   --------
25   -Copyright © 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
26   -Copyright © 2016 Institute of Computer Science Polish Academy of Sciences
27   -
28   -The parser uses the following licensed resources:
29   -
30   -SGJP: Grammatical Dictionary of Polish, version 20151020
31   -Copyright © 2007–2015 Zygmunt Saloni, Włodzimierz Gruszczyński, Marcin
32   -Woliński, Robert Wołosz, Danuta Skowrońska
33   -http://sgjp.pl
34   -
35   -Licence
36   --------
37   -
38   -This library is free software: you can redistribute it and/or modify
39   -it under the terms of the GNU Lesser General Public License as published by
40   -the Free Software Foundation, either version 3 of the License, or
41   -(at your option) any later version.
42   -
43   -This library is distributed in the hope that it will be useful,
44   -but WITHOUT ANY WARRANTY; without even the implied warranty of
45   -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
46   -GNU General Public License for more details.
47   -
48   -You should have received a copy of the GNU Lesser General Public License
49   -along with this program. If not, see <http://www.gnu.org/licenses/>.
50   -
tokenizer/eniam-tokenizer-1.0/config-tokenizer deleted
1   -# Localization of definitions of multi-token-expressions
2   -MTE_FILENAME=/usr/share/eniam/resources/SGJP/mte_20151215.tab
tokenizer/eniam-tokenizer-1.0/lgpl-3.0.txt deleted
1   - GNU LESSER GENERAL PUBLIC LICENSE
2   - Version 3, 29 June 2007
3   -
4   - Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
5   - Everyone is permitted to copy and distribute verbatim copies
6   - of this license document, but changing it is not allowed.
7   -
8   -
9   - This version of the GNU Lesser General Public License incorporates
10   -the terms and conditions of version 3 of the GNU General Public
11   -License, supplemented by the additional permissions listed below.
12   -
13   - 0. Additional Definitions.
14   -
15   - As used herein, "this License" refers to version 3 of the GNU Lesser
16   -General Public License, and the "GNU GPL" refers to version 3 of the GNU
17   -General Public License.
18   -
19   - "The Library" refers to a covered work governed by this License,
20   -other than an Application or a Combined Work as defined below.
21   -
22   - An "Application" is any work that makes use of an interface provided
23   -by the Library, but which is not otherwise based on the Library.
24   -Defining a subclass of a class defined by the Library is deemed a mode
25   -of using an interface provided by the Library.
26   -
27   - A "Combined Work" is a work produced by combining or linking an
28   -Application with the Library. The particular version of the Library
29   -with which the Combined Work was made is also called the "Linked
30   -Version".
31   -
32   - The "Minimal Corresponding Source" for a Combined Work means the
33   -Corresponding Source for the Combined Work, excluding any source code
34   -for portions of the Combined Work that, considered in isolation, are
35   -based on the Application, and not on the Linked Version.
36   -
37   - The "Corresponding Application Code" for a Combined Work means the
38   -object code and/or source code for the Application, including any data
39   -and utility programs needed for reproducing the Combined Work from the
40   -Application, but excluding the System Libraries of the Combined Work.
41   -
42   - 1. Exception to Section 3 of the GNU GPL.
43   -
44   - You may convey a covered work under sections 3 and 4 of this License
45   -without being bound by section 3 of the GNU GPL.
46   -
47   - 2. Conveying Modified Versions.
48   -
49   - If you modify a copy of the Library, and, in your modifications, a
50   -facility refers to a function or data to be supplied by an Application
51   -that uses the facility (other than as an argument passed when the
52   -facility is invoked), then you may convey a copy of the modified
53   -version:
54   -
55   - a) under this License, provided that you make a good faith effort to
56   - ensure that, in the event an Application does not supply the
57   - function or data, the facility still operates, and performs
58   - whatever part of its purpose remains meaningful, or
59   -
60   - b) under the GNU GPL, with none of the additional permissions of
61   - this License applicable to that copy.
62   -
63   - 3. Object Code Incorporating Material from Library Header Files.
64   -
65   - The object code form of an Application may incorporate material from
66   -a header file that is part of the Library. You may convey such object
67   -code under terms of your choice, provided that, if the incorporated
68   -material is not limited to numerical parameters, data structure
69   -layouts and accessors, or small macros, inline functions and templates
70   -(ten or fewer lines in length), you do both of the following:
71   -
72   - a) Give prominent notice with each copy of the object code that the
73   - Library is used in it and that the Library and its use are
74   - covered by this License.
75   -
76   - b) Accompany the object code with a copy of the GNU GPL and this license
77   - document.
78   -
79   - 4. Combined Works.
80   -
81   - You may convey a Combined Work under terms of your choice that,
82   -taken together, effectively do not restrict modification of the
83   -portions of the Library contained in the Combined Work and reverse
84   -engineering for debugging such modifications, if you also do each of
85   -the following:
86   -
87   - a) Give prominent notice with each copy of the Combined Work that
88   - the Library is used in it and that the Library and its use are
89   - covered by this License.
90   -
91   - b) Accompany the Combined Work with a copy of the GNU GPL and this license
92   - document.
93   -
94   - c) For a Combined Work that displays copyright notices during
95   - execution, include the copyright notice for the Library among
96   - these notices, as well as a reference directing the user to the
97   - copies of the GNU GPL and this license document.
98   -
99   - d) Do one of the following:
100   -
101   - 0) Convey the Minimal Corresponding Source under the terms of this
102   - License, and the Corresponding Application Code in a form
103   - suitable for, and under terms that permit, the user to
104   - recombine or relink the Application with a modified version of
105   - the Linked Version to produce a modified Combined Work, in the
106   - manner specified by section 6 of the GNU GPL for conveying
107   - Corresponding Source.
108   -
109   - 1) Use a suitable shared library mechanism for linking with the
110   - Library. A suitable mechanism is one that (a) uses at run time
111   - a copy of the Library already present on the user's computer
112   - system, and (b) will operate properly with a modified version
113   - of the Library that is interface-compatible with the Linked
114   - Version.
115   -
116   - e) Provide Installation Information, but only if you would otherwise
117   - be required to provide such information under section 6 of the
118   - GNU GPL, and only to the extent that such information is
119   - necessary to install and execute a modified version of the
120   - Combined Work produced by recombining or relinking the
121   - Application with a modified version of the Linked Version. (If
122   - you use option 4d0, the Installation Information must accompany
123   - the Minimal Corresponding Source and Corresponding Application
124   - Code. If you use option 4d1, you must provide the Installation
125   - Information in the manner specified by section 6 of the GNU GPL
126   - for conveying Corresponding Source.)
127   -
128   - 5. Combined Libraries.
129   -
130   - You may place library facilities that are a work based on the
131   -Library side by side in a single library together with other library
132   -facilities that are not Applications and are not covered by this
133   -License, and convey such a combined library under terms of your
134   -choice, if you do both of the following:
135   -
136   - a) Accompany the combined library with a copy of the same work based
137   - on the Library, uncombined with any other library facilities,
138   - conveyed under the terms of this License.
139   -
140   - b) Give prominent notice with the combined library that part of it
141   - is a work based on the Library, and explaining where to find the
142   - accompanying uncombined form of the same work.
143   -
144   - 6. Revised Versions of the GNU Lesser General Public License.
145   -
146   - The Free Software Foundation may publish revised and/or new versions
147   -of the GNU Lesser General Public License from time to time. Such new
148   -versions will be similar in spirit to the present version, but may
149   -differ in detail to address new problems or concerns.
150   -
151   - Each version is given a distinguishing version number. If the
152   -Library as you received it specifies that a certain numbered version
153   -of the GNU Lesser General Public License "or any later version"
154   -applies to it, you have the option of following the terms and
155   -conditions either of that published version or of any later version
156   -published by the Free Software Foundation. If the Library as you
157   -received it does not specify a version number of the GNU Lesser
158   -General Public License, you may choose any version of the GNU Lesser
159   -General Public License ever published by the Free Software Foundation.
160   -
161   - If the Library as you received it specifies that a proxy can decide
162   -whether future versions of the GNU Lesser General Public License shall
163   -apply, that proxy's public statement of acceptance of any version is
164   -permanent authorization for you to choose that version for the
165   -Library.
tokenizer/eniam-tokenizer-1.0/makefile deleted
1   -OCAMLC=ocamlc
2   -OCAMLOPT=ocamlopt
3   -OCAMLDEP=ocamldep
4   -INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam
5   -OCAMLFLAGS=$(INCLUDES) -g
6   -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa
7   -INSTALLDIR=`ocamlc -where`/eniam
8   -
9   -SOURCES= ENIAMtokenizerTypes.ml ENIAMtokens.ml ENIAMacronyms.ml ENIAMpatterns.ml ENIAMtokenizer.ml
10   -
11   -all: eniam-tokenizer.cma eniam-tokenizer.cmxa
12   -
13   -install: all
14   - mkdir -p $(INSTALLDIR)
15   - cp eniam-tokenizer.cmxa eniam-tokenizer.a eniam-tokenizer.cma config-tokenizer $(INSTALLDIR)
16   - cp ENIAMtokenizerTypes.cmi ENIAMtokens.cmi ENIAMacronyms.cmi ENIAMpatterns.cmi ENIAMtokenizer.cmi $(INSTALLDIR)
17   - cp ENIAMtokenizerTypes.cmx ENIAMtokens.cmx ENIAMacronyms.cmx ENIAMpatterns.cmx ENIAMtokenizer.cmx $(INSTALLDIR)
18   - mkdir -p /usr/share/eniam/resources/SGJP
19   - cp resources/SGJP/* /usr/share/eniam/resources/SGJP
20   -
21   -eniam-tokenizer.cma: $(SOURCES)
22   - ocamlc -linkall -a -o eniam-tokenizer.cma $(OCAMLFLAGS) $^
23   -
24   -eniam-tokenizer.cmxa: $(SOURCES)
25   - ocamlopt -linkall -a -o eniam-tokenizer.cmxa $(INCLUDES) $^
26   -
27   -test: test.ml
28   - $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml
29   -
30   -.SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx
31   -
32   -.mll.ml:
33   - ocamllex $<
34   -
35   -.mly.mli:
36   - ocamlyacc $<
37   -
38   -.mly.ml:
39   - ocamlyacc $<
40   -
41   -.ml.cmo:
42   - $(OCAMLC) $(OCAMLFLAGS) -c $<
43   -
44   -.mli.cmi:
45   - $(OCAMLC) $(OCAMLFALGS) -c $<
46   -
47   -.ml.cmx:
48   - $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $<
49   -
50   -clean:
51   - rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a test
tokenizer/eniam-tokenizer-1.0/test.ml deleted
1   -(*
2   - * ENIAMtokenizer, a tokenizer for Polish
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
5   - *
6   - * This library is free software: you can redistribute it and/or modify
7   - * it under the terms of the GNU Lesser General Public License as published by
8   - * the Free Software Foundation, either version 3 of the License, or
9   - * (at your option) any later version.
10   - *
11   - * This library is distributed in the hope that it will be useful,
12   - * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   - * GNU General Public License for more details.
15   - *
16   - * You should have received a copy of the GNU Lesser General Public License
17   - * along with this program. If not, see <http://www.gnu.org/licenses/>.
18   - *)
19   -
20   -
21   -let test_strings = [
22   -(* "a gdybym miałem";
23   - "A Gdy Miałem";
24   - "GDY MIAŁEM";
25   - "I II III IV V VI VII VIII IX X MCXIV MXC";
26   - "Kiedy Piotr Prabucki, przewodniczący Komisji Budżetu PeKaO";
27   - "25 idzie 20.";
28   - "Kot. Kot. kot.";
29   - "25.";
30   - "25.888.231";
31   - "Ala 25.888.231.111 ma.";
32   - "Ala 25.888.031,011.";
33   - "Ala -25.888.031,011.";
34   - "Ala -25 .";
35   - "Ala -1° C 3° ciepła 20—30°C od 180° do 260°C około 6° poniżej horyzontu.";
36   - "Ala 22-25 .";
37   - "Ala 22.5.2000-25.5.2001 .";
38   - "Szpak frunie.";*)
39   - "Kot miauczy.";
40   -(* "Np. Ala.";*)
41   - "w. dom.";
42   - "tzn.";
43   - "c.d.n.";
44   -(* "Arabia Saudyjska biegnie.";
45   - "Cauchy'ego ONZ-owska biegnie.";*)
46   - "TE-cie E-e.";
47   - "MS-DOS-owska CI-cie KRRi-cie UJ-ocie UJ-OCIE.";
48   - "rock'n'rollowy d’Alembertowi staro-cerkiewno-słowiańskimi";
49   -(* "Tom idzie.";*)
50   - "Miałem miał.";
51   -(* "Szpak śpiewa.";
52   - "Ala ma kota.";
53   - "Ale mają kota:"*)
54   - ]
55   -
56   -let _ =
57   - print_endline "Testy wbudowane";
58   - Xlist.iter test_strings (fun s ->
59   - print_endline ("\nTEST: " ^ s);
60   - let tokens = ENIAMtokenizer.parse s in
61   - (* print_endline (ENIAMtokenizer.xml_of tokens); *)
62   - Xlist.iter tokens (fun token -> print_endline (ENIAMtokenizer.string_of 0 token)));
63   - print_endline "Testy użytkownika.";
64   - print_endline "Wpisz tekst i naciśnij ENTER, pusty tekst kończy.";
65   - let s = ref (read_line ()) in
66   - while !s <> "" do
67   - let tokens = ENIAMtokenizer.parse !s in
68   - (* print_endline (ENIAMtokenizer.xml_of tokens); *)
69   - Xlist.iter tokens (fun token -> print_endline (ENIAMtokenizer.string_of 0 token));
70   - print_endline "Wpisz tekst i naciśnij ENTER, pusty tekst kończy.";
71   - s := read_line ()
72   - done;
73   - ()
tokenizer/makefile
... ... @@ -12,11 +12,13 @@ all: eniam-tokenizer.cma eniam-tokenizer.cmxa
12 12  
13 13 install: all
14 14 mkdir -p $(INSTALLDIR)
15   - cp eniam-tokenizer.cmxa eniam-tokenizer.a eniam-tokenizer.cma config-tokenizer $(INSTALLDIR)
  15 + cp eniam-tokenizer.cmxa eniam-tokenizer.a eniam-tokenizer.cma $(INSTALLDIR)
16 16 cp ENIAMtokenizerTypes.cmi ENIAMtokens.cmi ENIAMacronyms.cmi ENIAMpatterns.cmi ENIAMtokenizer.cmi $(INSTALLDIR)
17 17 cp ENIAMtokenizerTypes.cmx ENIAMtokens.cmx ENIAMacronyms.cmx ENIAMpatterns.cmx ENIAMtokenizer.cmx $(INSTALLDIR)
18   - mkdir -p /usr/share/eniam/resources/SGJP
19   - cp resources/SGJP/* /usr/share/eniam/resources/SGJP
  18 + mkdir -p /usr/share/eniam/tokenizer
  19 + cp resources/mte_20151215.tab /usr/share/eniam/tokenizer/mte_20151215.tab
  20 + cp resources/README /usr/share/eniam/tokenizer/README
  21 + ln -s /usr/share/eniam/tokenizer/mte_20151215.tab /usr/share/eniam/tokenizer/mte.tab
20 22  
21 23 eniam-tokenizer.cma: $(SOURCES)
22 24 ocamlc -linkall -a -o eniam-tokenizer.cma $(OCAMLFLAGS) $^
... ...
tokenizer/resources/SGJP/README renamed to tokenizer/resources/README
tokenizer/resources/SGJP/mte_20151215.tab renamed to tokenizer/resources/mte_20151215.tab