Commit e0700d7b6ee4a00d085da7acc3256bc12e80eb29

Authored by Wojciech Jaworski
1 parent 9f38bc0d

biblioteka eniam-tokenizer-1.0 z poprawioną konfiguracją

tokenizer/ENIAMtokenizerTypes.ml
@@ -64,10 +64,8 @@ type pat = L | CL | D of string | C of string | S of string | RD of string | O o @@ -64,10 +64,8 @@ type pat = L | CL | D of string | C of string | S of string | RD of string | O o
64 let empty_token = { 64 let empty_token = {
65 orth="";corr_orth="";beg=0;len=0;next=0; token=Symbol ""; attrs=[]} 65 orth="";corr_orth="";beg=0;len=0;next=0; token=Symbol ""; attrs=[]}
66 66
67 -let config =  
68 - try File.load_attr_val_pairs "config-tokenizer"  
69 - with _ -> (print_endline "ENIAMtokenizer config file not found"; []) 67 +let resource_path =
  68 + try Sys.getenv "ENIAM_RESOURCE_PATH"
  69 + with Not_found -> "/usr/share/eniam"
70 70
71 -let mte_filename =  
72 - try Xlist.assoc config "MTE_FILENAME"  
73 - with Not_found -> (print_endline "ENIAMtokenizer MTE_FILENAME config variable undefined"; "") 71 +let mte_filename = resource_path ^ "/tokenizer/mte.tab"
tokenizer/README
@@ -6,7 +6,7 @@ ENIAMtokenizer is a library that provides a tokenizer for Polish. @@ -6,7 +6,7 @@ ENIAMtokenizer is a library that provides a tokenizer for Polish.
6 Install 6 Install
7 ------- 7 -------
8 8
9 -ENIAMtokenizer requires OCaml version 4.02.3 compiler 9 +ENIAMtokenizer requires OCaml version 4.02.3 compiler
10 together with Xlib library version 3.1 or later. 10 together with Xlib library version 3.1 or later.
11 11
12 In order to install type: 12 In order to install type:
@@ -20,6 +20,10 @@ In order to test library type: @@ -20,6 +20,10 @@ In order to test library type:
20 make test 20 make test
21 ./test 21 ./test
22 22
  23 +By default ENIAMtokenizer looks for resources in /usr/share/eniam directory.
  24 +However this behaviour may be changed by setting end exporting ENIAM_RESOURCE_PATH
  25 +environment variable.
  26 +
23 Credits 27 Credits
24 ------- 28 -------
25 Copyright © 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> 29 Copyright © 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
@@ -47,4 +51,3 @@ GNU General Public License for more details. @@ -47,4 +51,3 @@ GNU General Public License for more details.
47 51
48 You should have received a copy of the GNU Lesser General Public License 52 You should have received a copy of the GNU Lesser General Public License
49 along with this program. If not, see <http://www.gnu.org/licenses/>. 53 along with this program. If not, see <http://www.gnu.org/licenses/>.
50 -  
tokenizer/config-tokenizer deleted
1 -# Localization of definitions of multi-token-expressions  
2 -MTE_FILENAME=/usr/share/eniam/resources/SGJP/mte_20151215.tab  
tokenizer/eniam-tokenizer-1.0.tar.bz2
No preview for this file type
tokenizer/eniam-tokenizer-1.0/ENIAMacronyms.ml deleted
1 -(*  
2 - * ENIAMtokenizer, a tokenizer for Polish  
3 - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>  
4 - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences  
5 - *  
6 - * This library is free software: you can redistribute it and/or modify  
7 - * it under the terms of the GNU Lesser General Public License as published by  
8 - * the Free Software Foundation, either version 3 of the License, or  
9 - * (at your option) any later version.  
10 - *  
11 - * This library is distributed in the hope that it will be useful,  
12 - * but WITHOUT ANY WARRANTY; without even the implied warranty of  
13 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  
14 - * GNU General Public License for more details.  
15 - *  
16 - * You should have received a copy of the GNU Lesser General Public License  
17 - * along with this program. If not, see <http://www.gnu.org/licenses/>.  
18 - *)  
19 -  
20 -open ENIAMtokenizerTypes  
21 -  
22 -let mte_patterns =  
23 - let lines = try File.load_lines mte_filename  
24 - with _ -> (print_endline ("ENIAMtokenizer mte file " ^ mte_filename ^ " not found"); []) in  
25 - let l = List.rev (Xlist.rev_map lines (fun line ->  
26 - match Str.split (Str.regexp "\t") line with  
27 - [orths; lemma; interp] -> Str.split (Str.regexp " ") orths, lemma, interp  
28 - | _ -> failwith ("mte_patterns: " ^ line))) in  
29 - List.rev (Xlist.rev_map l (fun (orths,lemma,interp) ->  
30 - Xlist.map orths (fun orth -> O orth), (fun (_:token_record list) -> ENIAMtokens.make_lemma (lemma,interp))))  
31 -  
32 -  
33 -let compose_lemma t lemma_suf interp =  
34 - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ lemma_suf, interp)  
35 -  
36 -let compose_lemma3 t1 t2 t3 lemma_suf interp =  
37 - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t1.token ^ ENIAMtokens.get_orth t2.token ^ ENIAMtokens.get_orth t3.token ^ lemma_suf, interp)  
38 -  
39 -let concat_orths l =  
40 - String.concat "" (Xlist.map l (fun t -> t.orth))  
41 -  
42 -let ct l lemma interp =  
43 - let beg = (List.hd l).beg in  
44 - let t = List.hd (List.rev l) in  
45 - let len = t.beg + t.len - beg in  
46 - Token{empty_token with  
47 - orth=concat_orths l;  
48 - beg=beg;  
49 - len=len;  
50 - next=t.next;  
51 - token=ENIAMtokens.make_lemma (lemma,interp);  
52 - attrs=ENIAMtokens.merge_attrs l}  
53 -  
54 -let rec get_orth_prefix i l =  
55 - if i = 0 then "",l else  
56 - match l with  
57 - c :: l -> let s,l = get_orth_prefix (i-1) l in c ^ s, l  
58 - | [] -> failwith "get_orth_prefix"  
59 -  
60 -let make_sub_tokens t l =  
61 - let n = Xlist.fold l 0 (fun n (i,_,_) -> n + i) in  
62 - let orth = Xunicode.utf8_chars_of_utf8_string t.orth in  
63 - if Xlist.size orth <> n then failwith "make_sub_tokens: invalid orth length" else  
64 - let l,_,_,_ = Xlist.fold l ([],t.beg,t.len,orth) (fun (l,beg,remaining_len,orth) (i,lemma,interp) ->  
65 - let orth,remaining_orth = get_orth_prefix i orth in  
66 - let len = if beg mod factor = 0 then i * factor else ((i-1) * factor) + (beg mod factor) in  
67 - if remaining_len = 0 then failwith "make_sub_tokens: invalid remaining_len" else  
68 - let len = if len > remaining_len then remaining_len else len in  
69 - Token{empty_token with  
70 - orth=orth;  
71 - beg=beg;  
72 - len=len;  
73 - next=beg+len;  
74 - token=ENIAMtokens.make_lemma (lemma,interp);  
75 - attrs=t.attrs} :: l,  
76 - beg+len, remaining_len-len, remaining_orth) in  
77 - l  
78 -  
79 -let st t l =  
80 - let l = make_sub_tokens t l in  
81 - match l with  
82 - Token s :: l -> List.rev (Token{s with next=t.next} :: l)  
83 - | _ -> failwith "st"  
84 -  
85 -let std t d l =  
86 - let l = make_sub_tokens t l in  
87 - match l with  
88 - Token s :: l -> List.rev (Token{s with orth=s.orth^d.orth; len=d.beg+d.len-s.beg; next=d.next} :: l)  
89 - | _ -> failwith "std"  
90 -  
91 -let acronym_patterns = [  
92 - [L; S "-"; O "owscy"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns");  
93 - [L; S "-"; O "owska"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns");  
94 - [L; S "-"; O "owski"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns");  
95 - [L; S "-"; O "owski"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns");  
96 - [L; S "-"; O "owskich"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns");  
97 - [L; S "-"; O "owskich"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
98 - [L; S "-"; O "owskich"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
99 - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");  
100 - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");  
101 - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns");  
102 - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns");  
103 - [L; S "-"; O "owskiego"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns");  
104 - [L; S "-"; O "owskiego"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
105 - [L; S "-"; O "owskiej"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns");  
106 - [L; S "-"; O "owskiej"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns");  
107 - [L; S "-"; O "owskiej"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns");  
108 - [L; S "-"; O "owskiemu"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
109 - [L; S "-"; O "owskim"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
110 - [L; S "-"; O "owskim"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
111 - [L; S "-"; O "owskim"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
112 - [L; S "-"; O "owskimi"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
113 - [L; S "-"; O "owsko"], (function [x;_;_] -> compose_lemma x "-owski" "adja" | _ -> failwith "acronym_patterns");  
114 - [L; S "-"; O "owsko"], (function [x;_;_] -> compose_lemma x "-owsko" "adv:pos" | _ -> failwith "acronym_patterns");  
115 - [L; S "-"; O "owsku"], (function [x;_;_] -> compose_lemma x "-owski" "adjp" | _ -> failwith "acronym_patterns");  
116 - [L; S "-"; O "owską"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns");  
117 - [L; S "-"; O "owską"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns");  
118 - [L; S "-"; O "wscy"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns");  
119 - [L; S "-"; O "wska"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns");  
120 - [L; S "-"; O "wski"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns");  
121 - [L; S "-"; O "wski"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns");  
122 - [L; S "-"; O "wskich"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns");  
123 - [L; S "-"; O "wskich"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
124 - [L; S "-"; O "wskich"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
125 - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");  
126 - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");  
127 - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns");  
128 - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns");  
129 - [L; S "-"; O "wskiego"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns");  
130 - [L; S "-"; O "wskiego"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
131 - [L; S "-"; O "wskiej"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns");  
132 - [L; S "-"; O "wskiej"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns");  
133 - [L; S "-"; O "wskiej"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns");  
134 - [L; S "-"; O "wskiemu"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
135 - [L; S "-"; O "wskim"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
136 - [L; S "-"; O "wskim"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
137 - [L; S "-"; O "wskim"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
138 - [L; S "-"; O "wskimi"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
139 - [L; S "-"; O "wsko"], (function [x;_;_] -> compose_lemma x "-wski" "adja" | _ -> failwith "acronym_patterns");  
140 - [L; S "-"; O "wsko"], (function [x;_;_] -> compose_lemma x "-wsko" "adv:pos" | _ -> failwith "acronym_patterns");  
141 - [L; S "-"; O "wsku"], (function [x;_;_] -> compose_lemma x "-wski" "adjp" | _ -> failwith "acronym_patterns");  
142 - [L; S "-"; O "wską"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns");  
143 - [L; S "-"; O "wską"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns");  
144 - [L; S "’"; O "owa"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns");  
145 - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");  
146 - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");  
147 - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns");  
148 - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns");  
149 - [L; S "’"; O "owego"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns");  
150 - [L; S "’"; O "owego"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
151 - [L; S "’"; O "owej"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns");  
152 - [L; S "’"; O "owej"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns");  
153 - [L; S "’"; O "owej"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns");  
154 - [L; S "’"; O "owemu"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
155 - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns");  
156 - [L; S "’"; O "owo"], (function [x;_;_] -> compose_lemma x "’owo" "adv:pos" | _ -> failwith "acronym_patterns");  
157 - [L; S "’"; O "owo"], (function [x;_;_] -> compose_lemma x "’owy" "adja" | _ -> failwith "acronym_patterns");  
158 - [L; S "’"; O "owy"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns");  
159 - [L; S "’"; O "owy"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns");  
160 - [L; S "’"; O "owych"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns");  
161 - [L; S "’"; O "owych"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
162 - [L; S "’"; O "owych"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
163 - [L; S "’"; O "owym"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
164 - [L; S "’"; O "owym"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
165 - [L; S "’"; O "owym"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
166 - [L; S "’"; O "owymi"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
167 - [L; S "’"; O "ową"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns");  
168 - [L; S "’"; O "ową"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns");  
169 - [L; S "’"; O "owscy"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns");  
170 - [L; S "’"; O "owska"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns");  
171 - [L; S "’"; O "owski"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns");  
172 - [L; S "’"; O "owski"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns");  
173 - [L; S "’"; O "owskich"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns");  
174 - [L; S "’"; O "owskich"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
175 - [L; S "’"; O "owskich"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
176 - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");  
177 - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");  
178 - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns");  
179 - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns");  
180 - [L; S "’"; O "owskiego"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns");  
181 - [L; S "’"; O "owskiego"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
182 - [L; S "’"; O "owskiej"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns");  
183 - [L; S "’"; O "owskiej"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns");  
184 - [L; S "’"; O "owskiej"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns");  
185 - [L; S "’"; O "owskiemu"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
186 - [L; S "’"; O "owskim"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
187 - [L; S "’"; O "owskim"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
188 - [L; S "’"; O "owskim"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
189 - [L; S "’"; O "owskimi"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
190 - [L; S "’"; O "owsko"], (function [x;_;_] -> compose_lemma x "’owski" "adja" | _ -> failwith "acronym_patterns");  
191 - [L; S "’"; O "owsko"], (function [x;_;_] -> compose_lemma x "’owsko" "adv:pos" | _ -> failwith "acronym_patterns");  
192 - [L; S "’"; O "owsku"], (function [x;_;_] -> compose_lemma x "’owski" "adjp" | _ -> failwith "acronym_patterns");  
193 - [L; S "’"; O "owską"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns");  
194 - [L; S "’"; O "owską"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns");  
195 - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");  
196 - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns");  
197 - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");  
198 - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns");  
199 - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");  
200 - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");  
201 - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns");  
202 - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns");  
203 - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:n2" | _ -> failwith "acronym_patterns");  
204 - [CL; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:loc:f" | _ -> failwith "acronym_patterns");  
205 - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");  
206 - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns");  
207 - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns");  
208 - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:n2" | _ -> failwith "acronym_patterns");  
209 - [CL; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:inst:f" | _ -> failwith "acronym_patterns");  
210 - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "T" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");  
211 - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "T" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");  
212 - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "TA" "subst:sg:dat:f" | _ -> failwith "acronym_patterns");  
213 - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "TA" "subst:sg:loc:f" | _ -> failwith "acronym_patterns");  
214 - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");  
215 - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");  
216 - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");  
217 - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");  
218 - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");  
219 - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");  
220 - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");  
221 - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");  
222 - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");  
223 - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");  
224 - [CL; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns");  
225 - [CL; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns");  
226 - [CL; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns");  
227 - [L; S "-"; O "ecie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");  
228 - [L; S "-"; O "ecie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");  
229 - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");  
230 - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns");  
231 - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");  
232 - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:n2" | _ -> failwith "acronym_patterns");  
233 - [L; S "-"; O "etach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns");  
234 - [L; S "-"; O "etami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns");  
235 - [L; S "-"; O "etem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");  
236 - [L; S "-"; O "etom"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns");  
237 - [L; S "-"; O "etowi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns");  
238 - [L; S "-"; O "etu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");  
239 - [L; S "-"; O "ety"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");  
240 - [L; S "-"; O "ety"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");  
241 - [L; S "-"; O "ety"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");  
242 - [L; S "-"; O "etów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");  
243 - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");  
244 - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");  
245 - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");  
246 - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");  
247 - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");  
248 - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");  
249 - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");  
250 - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");  
251 - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");  
252 - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:gen:f" | _ -> failwith "acronym_patterns");  
253 - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns");  
254 - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns");  
255 - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns");  
256 - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");  
257 - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns");  
258 - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");  
259 - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");  
260 - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns");  
261 - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");  
262 - [CL; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns");  
263 - [CL; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns");  
264 - [L; S "-"; O "iem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns");  
265 - [L; S "-"; O "iem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");  
266 - [CL; S "-"; O "o"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:voc:f" | _ -> failwith "acronym_patterns");  
267 - [L; S "-"; O "ocie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");  
268 - [L; S "-"; O "ocie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");  
269 - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");  
270 - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns");  
271 - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns");  
272 - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:n2" | _ -> failwith "acronym_patterns");  
273 - [CL; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:dat:f" | _ -> failwith "acronym_patterns");  
274 - [L; S "-"; O "otach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns");  
275 - [L; S "-"; O "otami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns");  
276 - [L; S "-"; O "otem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");  
277 - [L; S "-"; O "otom"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns");  
278 - [L; S "-"; O "otowi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns");  
279 - [L; S "-"; O "otu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");  
280 - [L; S "-"; O "oty"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");  
281 - [L; S "-"; O "oty"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");  
282 - [L; S "-"; O "oty"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");  
283 - [L; S "-"; O "otów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");  
284 - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");  
285 - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns");  
286 - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns");  
287 - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:n2" | _ -> failwith "acronym_patterns");  
288 - [L; S "-"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");  
289 - [L; S "-"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");  
290 - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns");  
291 - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns");  
292 - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");  
293 - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:n2" | _ -> failwith "acronym_patterns");  
294 - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");  
295 - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns");  
296 - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");  
297 - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");  
298 - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns");  
299 - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");  
300 - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");  
301 - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");  
302 - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");  
303 - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");  
304 - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:n2" | _ -> failwith "acronym_patterns");  
305 - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");  
306 - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");  
307 - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:n2" | _ -> failwith "acronym_patterns");  
308 - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");  
309 - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");  
310 - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:n2" | _ -> failwith "acronym_patterns");  
311 - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns");  
312 - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns");  
313 - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns");  
314 - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns");  
315 - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns");  
316 - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");  
317 - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:n2" | _ -> failwith "acronym_patterns");  
318 - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns");  
319 - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");  
320 - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:n2" | _ -> failwith "acronym_patterns");  
321 - [L; S "-"; O "zie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");  
322 - [L; S "-"; O "zie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");  
323 - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");  
324 - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");  
325 - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns");  
326 - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");  
327 - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:n2" | _ -> failwith "acronym_patterns");  
328 - [CL; S "-"; O "ą"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:inst:f" | _ -> failwith "acronym_patterns");  
329 - [CL; S "-"; O "ę"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:acc:f" | _ -> failwith "acronym_patterns");  
330 - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");  
331 - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns");  
332 - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");  
333 - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns");  
334 - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");  
335 - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");  
336 - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns");  
337 - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns");  
338 - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:loc:p3" | _ -> failwith "acronym_patterns");  
339 - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");  
340 - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns");  
341 - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns");  
342 - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:inst:p3" | _ -> failwith "acronym_patterns");  
343 - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");  
344 - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");  
345 - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");  
346 - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");  
347 - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");  
348 - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");  
349 - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");  
350 - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");  
351 - [L; S "’"; O "ego"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");  
352 - [L; S "’"; O "ego"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");  
353 - [L; S "’"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");  
354 - [L; S "’"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns");  
355 - [L; S "’"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");  
356 - [L; S "’"; O "emu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");  
357 - [L; S "’"; O "go"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");  
358 - [L; S "’"; O "go"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");  
359 - [L; S "’"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns");  
360 - [L; S "’"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");  
361 - [L; S "’"; O "m"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");  
362 - [L; S "’"; O "m"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");  
363 - [L; S "’"; O "mu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");  
364 - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");  
365 - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns");  
366 - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns");  
367 - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:dat:p3" | _ -> failwith "acronym_patterns");  
368 - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");  
369 - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns");  
370 - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns");  
371 - [L; S "’"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");  
372 - [L; S "’"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");  
373 - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");  
374 - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");  
375 - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns");  
376 - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");  
377 - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");  
378 - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns");  
379 - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");  
380 - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");  
381 - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");  
382 - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");  
383 - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");  
384 - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");  
385 - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");  
386 - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");  
387 - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");  
388 - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");  
389 - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");  
390 - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");  
391 - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns");  
392 - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");  
393 - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:gen:p3" | _ -> failwith "acronym_patterns");  
394 - [L; S "-"; O "ista"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns");  
395 - [L; S "-"; O "istach"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");  
396 - [L; S "-"; O "istami"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");  
397 - [L; S "-"; O "isto"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");  
398 - [L; S "-"; O "istom"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");  
399 - [L; S "-"; O "isty"], (function [x;_;_] -> compose_lemma x "-ista" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");  
400 - [L; S "-"; O "isty"], (function [x;_;_] -> compose_lemma x "-ista" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");  
401 - [L; S "-"; O "isty"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");  
402 - [L; S "-"; O "istów"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");  
403 - [L; S "-"; O "istów"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");  
404 - [L; S "-"; O "istą"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");  
405 - [L; S "-"; O "istę"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");  
406 - [L; S "-"; O "iści"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");  
407 - [L; S "-"; O "iści"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");  
408 - [L; S "-"; O "iście"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");  
409 - [L; S "-"; O "iście"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");  
410 - [L; S "-"; O "owca"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");  
411 - [L; S "-"; O "owca"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");  
412 - [L; S "-"; O "owcach"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");  
413 - [L; S "-"; O "owcami"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");  
414 - [L; S "-"; O "owce"], (function [x;_;_] -> compose_lemma x "-owiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");  
415 - [L; S "-"; O "owce"], (function [x;_;_] -> compose_lemma x "-owiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");  
416 - [L; S "-"; O "owcem"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");  
417 - [L; S "-"; O "owcom"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");  
418 - [L; S "-"; O "owcowi"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");  
419 - [L; S "-"; O "owcu"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");  
420 - [L; S "-"; O "owcu"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");  
421 - [L; S "-"; O "owcy"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");  
422 - [L; S "-"; O "owcy"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");  
423 - [L; S "-"; O "owcze"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");  
424 - [L; S "-"; O "owców"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");  
425 - [L; S "-"; O "owców"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");  
426 - [L; S "-"; O "owiec"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns");  
427 - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:acc:f" | _ -> failwith "acronym_patterns");  
428 - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:gen:f" | _ -> failwith "acronym_patterns");  
429 - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:nom:f" | _ -> failwith "acronym_patterns");  
430 - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:voc:f" | _ -> failwith "acronym_patterns");  
431 - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:dat:f" | _ -> failwith "acronym_patterns");  
432 - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:gen:f" | _ -> failwith "acronym_patterns");  
433 - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:loc:f" | _ -> failwith "acronym_patterns");  
434 - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:voc:f" | _ -> failwith "acronym_patterns");  
435 - [L; S "-"; O "owskościach"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:loc:f" | _ -> failwith "acronym_patterns");  
436 - [L; S "-"; O "owskościami"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:inst:f" | _ -> failwith "acronym_patterns");  
437 - [L; S "-"; O "owskościom"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:dat:f" | _ -> failwith "acronym_patterns");  
438 - [L; S "-"; O "owskością"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:inst:f" | _ -> failwith "acronym_patterns");  
439 - [L; S "-"; O "owskość"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:acc:f" | _ -> failwith "acronym_patterns");  
440 - [L; S "-"; O "owskość"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:nom:f" | _ -> failwith "acronym_patterns");  
441 - [L; S "-"; O "wca"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");  
442 - [L; S "-"; O "wca"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");  
443 - [L; S "-"; O "wcach"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");  
444 - [L; S "-"; O "wcami"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");  
445 - [L; S "-"; O "wce"], (function [x;_;_] -> compose_lemma x "-wiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");  
446 - [L; S "-"; O "wce"], (function [x;_;_] -> compose_lemma x "-wiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");  
447 - [L; S "-"; O "wcem"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");  
448 - [L; S "-"; O "wcom"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");  
449 - [L; S "-"; O "wcowi"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");  
450 - [L; S "-"; O "wcu"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");  
451 - [L; S "-"; O "wcu"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");  
452 - [L; S "-"; O "wcy"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");  
453 - [L; S "-"; O "wcy"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");  
454 - [L; S "-"; O "wców"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");  
455 - [L; S "-"; O "wców"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");  
456 - [L; S "-"; O "wiec"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns");  
457 - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:acc:f" | _ -> failwith "acronym_patterns");  
458 - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:gen:f" | _ -> failwith "acronym_patterns");  
459 - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:nom:f" | _ -> failwith "acronym_patterns");  
460 - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:voc:f" | _ -> failwith "acronym_patterns");  
461 - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:dat:f" | _ -> failwith "acronym_patterns");  
462 - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:gen:f" | _ -> failwith "acronym_patterns");  
463 - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:loc:f" | _ -> failwith "acronym_patterns");  
464 - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:voc:f" | _ -> failwith "acronym_patterns");  
465 - [L; S "’"; O "owościach"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:loc:f" | _ -> failwith "acronym_patterns");  
466 - [L; S "’"; O "owościami"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:inst:f" | _ -> failwith "acronym_patterns");  
467 - [L; S "’"; O "owościom"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:dat:f" | _ -> failwith "acronym_patterns");  
468 - [L; S "’"; O "owością"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:inst:f" | _ -> failwith "acronym_patterns");  
469 - [L; S "’"; O "owość"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:acc:f" | _ -> failwith "acronym_patterns");  
470 - [L; S "’"; O "owość"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:nom:f" | _ -> failwith "acronym_patterns");  
471 -  
472 - [L; S "-"; L; S "-"; O "owscy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns");  
473 - [L; S "-"; L; S "-"; O "owska"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns");  
474 - [L; S "-"; L; S "-"; O "owski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns");  
475 - [L; S "-"; L; S "-"; O "owski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns");  
476 - [L; S "-"; L; S "-"; O "owskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns");  
477 - [L; S "-"; L; S "-"; O "owskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
478 - [L; S "-"; L; S "-"; O "owskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
479 - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");  
480 - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");  
481 - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns");  
482 - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns");  
483 - [L; S "-"; L; S "-"; O "owskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns");  
484 - [L; S "-"; L; S "-"; O "owskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
485 - [L; S "-"; L; S "-"; O "owskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns");  
486 - [L; S "-"; L; S "-"; O "owskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns");  
487 - [L; S "-"; L; S "-"; O "owskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns");  
488 - [L; S "-"; L; S "-"; O "owskiemu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
489 - [L; S "-"; L; S "-"; O "owskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
490 - [L; S "-"; L; S "-"; O "owskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
491 - [L; S "-"; L; S "-"; O "owskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
492 - [L; S "-"; L; S "-"; O "owskimi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
493 - [L; S "-"; L; S "-"; O "owsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adja" | _ -> failwith "acronym_patterns");  
494 - [L; S "-"; L; S "-"; O "owsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owsko" "adv:pos" | _ -> failwith "acronym_patterns");  
495 - [L; S "-"; L; S "-"; O "owsku"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adjp" | _ -> failwith "acronym_patterns");  
496 - [L; S "-"; L; S "-"; O "owską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns");  
497 - [L; S "-"; L; S "-"; O "owską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns");  
498 - [L; S "-"; L; S "-"; O "wscy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns");  
499 - [L; S "-"; L; S "-"; O "wska"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns");  
500 - [L; S "-"; L; S "-"; O "wski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns");  
501 - [L; S "-"; L; S "-"; O "wski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns");  
502 - [L; S "-"; L; S "-"; O "wskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns");  
503 - [L; S "-"; L; S "-"; O "wskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
504 - [L; S "-"; L; S "-"; O "wskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
505 - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");  
506 - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns");  
507 - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns");  
508 - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns");  
509 - [L; S "-"; L; S "-"; O "wskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns");  
510 - [L; S "-"; L; S "-"; O "wskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
511 - [L; S "-"; L; S "-"; O "wskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns");  
512 - [L; S "-"; L; S "-"; O "wskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns");  
513 - [L; S "-"; L; S "-"; O "wskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns");  
514 - [L; S "-"; L; S "-"; O "wskiemu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
515 - [L; S "-"; L; S "-"; O "wskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
516 - [L; S "-"; L; S "-"; O "wskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
517 - [L; S "-"; L; S "-"; O "wskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns");  
518 - [L; S "-"; L; S "-"; O "wskimi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns");  
519 - [L; S "-"; L; S "-"; O "wsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adja" | _ -> failwith "acronym_patterns");  
520 - [L; S "-"; L; S "-"; O "wsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wsko" "adv:pos" | _ -> failwith "acronym_patterns");  
521 - [L; S "-"; L; S "-"; O "wsku"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adjp" | _ -> failwith "acronym_patterns");  
522 - [L; S "-"; L; S "-"; O "wską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns");  
523 - [L; S "-"; L; S "-"; O "wską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns");  
524 - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");  
525 - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns");  
526 - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");  
527 - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns");  
528 - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");  
529 - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");  
530 - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns");  
531 - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns");  
532 - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:n2" | _ -> failwith "acronym_patterns");  
533 - [CL; S "-"; CL; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:loc:f" | _ -> failwith "acronym_patterns");  
534 - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");  
535 - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns");  
536 - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns");  
537 - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:n2" | _ -> failwith "acronym_patterns");  
538 - [CL; S "-"; CL; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:inst:f" | _ -> failwith "acronym_patterns");  
539 - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "T" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");  
540 - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "T" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");  
541 - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "TA" "subst:sg:dat:f" | _ -> failwith "acronym_patterns");  
542 - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "TA" "subst:sg:loc:f" | _ -> failwith "acronym_patterns");  
543 - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");  
544 - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");  
545 - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");  
546 - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");  
547 - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");  
548 - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");  
549 - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");  
550 - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");  
551 - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");  
552 - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");  
553 - [CL; S "-"; CL; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns");  
554 - [CL; S "-"; CL; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns");  
555 - [CL; S "-"; CL; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns");  
556 - [L; S "-"; L; S "-"; O "ecie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");  
557 - [L; S "-"; L; S "-"; O "ecie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");  
558 - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");  
559 - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns");  
560 - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");  
561 - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:n2" | _ -> failwith "acronym_patterns");  
562 - [L; S "-"; L; S "-"; O "etach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns");  
563 - [L; S "-"; L; S "-"; O "etami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns");  
564 - [L; S "-"; L; S "-"; O "etem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");  
565 - [L; S "-"; L; S "-"; O "etom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns");  
566 - [L; S "-"; L; S "-"; O "etowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns");  
567 - [L; S "-"; L; S "-"; O "etu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");  
568 - [L; S "-"; L; S "-"; O "ety"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");  
569 - [L; S "-"; L; S "-"; O "ety"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");  
570 - [L; S "-"; L; S "-"; O "ety"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");  
571 - [L; S "-"; L; S "-"; O "etów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");  
572 - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");  
573 - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");  
574 - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");  
575 - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");  
576 - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");  
577 - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");  
578 - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");  
579 - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");  
580 - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");  
581 - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:gen:f" | _ -> failwith "acronym_patterns");  
582 - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns");  
583 - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns");  
584 - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns");  
585 - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");  
586 - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns");  
587 - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");  
588 - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");  
589 - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns");  
590 - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");  
591 - [CL; S "-"; CL; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns");  
592 - [CL; S "-"; CL; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns");  
593 - [L; S "-"; L; S "-"; O "iem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns");  
594 - [L; S "-"; L; S "-"; O "iem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");  
595 - [CL; S "-"; CL; S "-"; O "o"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:voc:f" | _ -> failwith "acronym_patterns");  
596 - [L; S "-"; L; S "-"; O "ocie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");  
597 - [L; S "-"; L; S "-"; O "ocie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");  
598 - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");  
599 - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns");  
600 - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns");  
601 - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:n2" | _ -> failwith "acronym_patterns");  
602 - [CL; S "-"; CL; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:dat:f" | _ -> failwith "acronym_patterns");  
603 - [L; S "-"; L; S "-"; O "otach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns");  
604 - [L; S "-"; L; S "-"; O "otami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns");  
605 - [L; S "-"; L; S "-"; O "otem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");  
606 - [L; S "-"; L; S "-"; O "otom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns");  
607 - [L; S "-"; L; S "-"; O "otowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns");  
608 - [L; S "-"; L; S "-"; O "otu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");  
609 - [L; S "-"; L; S "-"; O "oty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");  
610 - [L; S "-"; L; S "-"; O "oty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");  
611 - [L; S "-"; L; S "-"; O "oty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");  
612 - [L; S "-"; L; S "-"; O "otów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");  
613 - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");  
614 - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns");  
615 - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns");  
616 - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:n2" | _ -> failwith "acronym_patterns");  
617 - [L; S "-"; L; S "-"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");  
618 - [L; S "-"; L; S "-"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");  
619 - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns");  
620 - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns");  
621 - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");  
622 - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:n2" | _ -> failwith "acronym_patterns");  
623 - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");  
624 - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns");  
625 - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");  
626 - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");  
627 - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns");  
628 - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");  
629 - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");  
630 - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");  
631 - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");  
632 - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");  
633 - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:n2" | _ -> failwith "acronym_patterns");  
634 - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");  
635 - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");  
636 - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:n2" | _ -> failwith "acronym_patterns");  
637 - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");  
638 - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");  
639 - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:n2" | _ -> failwith "acronym_patterns");  
640 - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns");  
641 - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns");  
642 - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns");  
643 - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns");  
644 - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns");  
645 - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");  
646 - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:n2" | _ -> failwith "acronym_patterns");  
647 - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns");  
648 - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");  
649 - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:n2" | _ -> failwith "acronym_patterns");  
650 - [L; S "-"; L; S "-"; O "zie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");  
651 - [L; S "-"; L; S "-"; O "zie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");  
652 - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");  
653 - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");  
654 - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns");  
655 - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");  
656 - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:n2" | _ -> failwith "acronym_patterns");  
657 - [CL; S "-"; CL; S "-"; O "ą"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:inst:f" | _ -> failwith "acronym_patterns");  
658 - [CL; S "-"; CL; S "-"; O "ę"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:acc:f" | _ -> failwith "acronym_patterns");  
659 - [L; S "-"; L; S "-"; O "ista"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns");  
660 - [L; S "-"; L; S "-"; O "istach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");  
661 - [L; S "-"; L; S "-"; O "istami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");  
662 - [L; S "-"; L; S "-"; O "isto"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");  
663 - [L; S "-"; L; S "-"; O "istom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");  
664 - [L; S "-"; L; S "-"; O "isty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");  
665 - [L; S "-"; L; S "-"; O "isty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");  
666 - [L; S "-"; L; S "-"; O "isty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");  
667 - [L; S "-"; L; S "-"; O "istów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");  
668 - [L; S "-"; L; S "-"; O "istów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");  
669 - [L; S "-"; L; S "-"; O "istą"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");  
670 - [L; S "-"; L; S "-"; O "istę"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");  
671 - [L; S "-"; L; S "-"; O "iści"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");  
672 - [L; S "-"; L; S "-"; O "iści"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");  
673 - [L; S "-"; L; S "-"; O "iście"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");  
674 - [L; S "-"; L; S "-"; O "iście"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");  
675 - [L; S "-"; L; S "-"; O "owca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");  
676 - [L; S "-"; L; S "-"; O "owca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");  
677 - [L; S "-"; L; S "-"; O "owcach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");  
678 - [L; S "-"; L; S "-"; O "owcami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");  
679 - [L; S "-"; L; S "-"; O "owce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");  
680 - [L; S "-"; L; S "-"; O "owce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");  
681 - [L; S "-"; L; S "-"; O "owcem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");  
682 - [L; S "-"; L; S "-"; O "owcom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");  
683 - [L; S "-"; L; S "-"; O "owcowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");  
684 - [L; S "-"; L; S "-"; O "owcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");  
685 - [L; S "-"; L; S "-"; O "owcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");  
686 - [L; S "-"; L; S "-"; O "owcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");  
687 - [L; S "-"; L; S "-"; O "owcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");  
688 - [L; S "-"; L; S "-"; O "owcze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");  
689 - [L; S "-"; L; S "-"; O "owców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");  
690 - [L; S "-"; L; S "-"; O "owców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");  
691 - [L; S "-"; L; S "-"; O "owiec"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns");  
692 - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:acc:f" | _ -> failwith "acronym_patterns");  
693 - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:gen:f" | _ -> failwith "acronym_patterns");  
694 - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:nom:f" | _ -> failwith "acronym_patterns");  
695 - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:voc:f" | _ -> failwith "acronym_patterns");  
696 - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:dat:f" | _ -> failwith "acronym_patterns");  
697 - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:gen:f" | _ -> failwith "acronym_patterns");  
698 - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:loc:f" | _ -> failwith "acronym_patterns");  
699 - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:voc:f" | _ -> failwith "acronym_patterns");  
700 - [L; S "-"; L; S "-"; O "owskościach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:loc:f" | _ -> failwith "acronym_patterns");  
701 - [L; S "-"; L; S "-"; O "owskościami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:inst:f" | _ -> failwith "acronym_patterns");  
702 - [L; S "-"; L; S "-"; O "owskościom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:dat:f" | _ -> failwith "acronym_patterns");  
703 - [L; S "-"; L; S "-"; O "owskością"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:inst:f" | _ -> failwith "acronym_patterns");  
704 - [L; S "-"; L; S "-"; O "owskość"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:acc:f" | _ -> failwith "acronym_patterns");  
705 - [L; S "-"; L; S "-"; O "owskość"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:nom:f" | _ -> failwith "acronym_patterns");  
706 - [L; S "-"; L; S "-"; O "wca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");  
707 - [L; S "-"; L; S "-"; O "wca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");  
708 - [L; S "-"; L; S "-"; O "wcach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");  
709 - [L; S "-"; L; S "-"; O "wcami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");  
710 - [L; S "-"; L; S "-"; O "wce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");  
711 - [L; S "-"; L; S "-"; O "wce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");  
712 - [L; S "-"; L; S "-"; O "wcem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");  
713 - [L; S "-"; L; S "-"; O "wcom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");  
714 - [L; S "-"; L; S "-"; O "wcowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");  
715 - [L; S "-"; L; S "-"; O "wcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");  
716 - [L; S "-"; L; S "-"; O "wcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");  
717 - [L; S "-"; L; S "-"; O "wcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");  
718 - [L; S "-"; L; S "-"; O "wcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");  
719 - [L; S "-"; L; S "-"; O "wców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");  
720 - [L; S "-"; L; S "-"; O "wców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");  
721 - [L; S "-"; L; S "-"; O "wiec"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns");  
722 - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");  
723 - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns");  
724 - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");  
725 - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns");  
726 - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");  
727 - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns");  
728 - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns");  
729 - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns");  
730 - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:loc:p3" | _ -> failwith "acronym_patterns");  
731 - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns");  
732 - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns");  
733 - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns");  
734 - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:inst:p3" | _ -> failwith "acronym_patterns");  
735 - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");  
736 - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");  
737 - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");  
738 - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");  
739 - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");  
740 - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");  
741 - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");  
742 - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");  
743 - [L; S "-"; L; S "’"; O "ego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");  
744 - [L; S "-"; L; S "’"; O "ego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");  
745 - [L; S "-"; L; S "’"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");  
746 - [L; S "-"; L; S "’"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns");  
747 - [L; S "-"; L; S "’"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns");  
748 - [L; S "-"; L; S "’"; O "emu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");  
749 - [L; S "-"; L; S "’"; O "go"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns");  
750 - [L; S "-"; L; S "’"; O "go"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns");  
751 - [L; S "-"; L; S "’"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns");  
752 - [L; S "-"; L; S "’"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");  
753 - [L; S "-"; L; S "’"; O "m"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns");  
754 - [L; S "-"; L; S "’"; O "m"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");  
755 - [L; S "-"; L; S "’"; O "mu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");  
756 - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns");  
757 - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns");  
758 - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns");  
759 - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:dat:p3" | _ -> failwith "acronym_patterns");  
760 - [L; S "-"; L; S "’"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns");  
761 - [L; S "-"; L; S "’"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns");  
762 - [L; S "-"; L; S "’"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns");  
763 - [L; S "-"; L; S "’"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns");  
764 - [L; S "-"; L; S "’"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns");  
765 - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns");  
766 - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns");  
767 - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns");  
768 - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns");  
769 - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns");  
770 - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns");  
771 - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns");  
772 - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns");  
773 - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns");  
774 - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns");  
775 - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns");  
776 - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");  
777 - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns");  
778 - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns");  
779 - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns");  
780 - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns");  
781 - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns");  
782 - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns");  
783 - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns");  
784 - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns");  
785 - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:gen:p3" | _ -> failwith "acronym_patterns");  
786 - ]  
787 -  
788 -let name_patterns = [  
789 - [O "O"; S "’"; L], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns");  
790 - [O "d"; S "’"; L], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns");  
791 - [O "l"; S "’"; L], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns");  
792 - [L; S "’"; O "s"], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns");  
793 - [L; S "’"; O "sa"], (function [x;_;_] -> compose_lemma x "’s" "subst:sg:gen.acc:_" | _ -> failwith "name_patterns");  
794 - ]  
795 -  
796 -let abr_patterns = [  
797 - [O "b"; S "."; O "u"; S "."], (function [a;b;c;d] -> [ct [a;b] "bez" "prep:gen:nwok"; ct [c;d] "uwaga" "subst:pl:gen:f"] | _ -> failwith "abr_patterns");  
798 - [O "b"; S "."; O "zm"; S "."], (function [a;b;c;d] -> [ct [a;b] "bez" "prep:gen:nwok"; ct [c;d] "zmiana" "subst:pl:gen:f"] | _ -> failwith "abr_patterns");  
799 - [O "blm"], (function [a] -> st a [1,"bez","prep:gen:nwok";1,"liczba","subst:sg:gen:f";1,"mnogi","adj:sg:gen:f:pos"] | _ -> failwith "abr_patterns");  
800 - [O "blp"], (function [a] -> st a [1,"bez","prep:gen:nwok";1,"liczba","subst:sg:gen:f";1,"pojedynczy","adj:sg:gen:f:pos"] | _ -> failwith "abr_patterns");  
801 - [O "błp"; S "."], (function [a;b] -> std a b [2,"błogosławiony","adj:sg:gen:f:pos";1,"pamięć","subst:sg:gen:f"] | _ -> failwith "abr_patterns");  
802 - [O "bm"], (function [a] -> st a [1,"bieżący","adj:sg:$C:m3:pos";1,"miesiąc","subst:sg:$C:m3"] | _ -> failwith "abr_patterns");  
803 - [O "bm"; S "."], (function [a;b] -> std a b [1,"bieżący","adj:sg:$C:m3:pos";1,"miesiąc","subst:sg:$C:m3"] | _ -> failwith "abr_patterns");  
804 - [O "bp"; S "."], (function [a;b] -> std a b [1,"błogosławiony","adj:sg:gen:f:pos";1,"pamięć","subst:sg:gen:f"] | _ -> failwith "abr_patterns");  
805 - [O "br"], (function [a] -> st a [1,"bieżący","adj:sg:$C:m3:pos";1,"rok","subst:sg:$C:m3"] | _ -> failwith "abr_patterns");  
806 - [O "br"; S "."], (function [a;b] -> std a b [1,"bieżący","adj:sg:$C:m3:pos";1,"rok","subst:sg:$C:m3"] | _ -> failwith "abr_patterns");  
807 - [O "c"; S "."; O "d"; S "."; O "n"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "ciąg" "subst:sg:nom:m3"; ct [c;d] "daleki" "adj:sg:nom:m3:com"; ct [e;f] "nastąpić" "fin:sg:ter:perf"] | _ -> failwith "abr_patterns");  
808 - [O "ccm"], (function [a] -> st a [1,"sześcienny","adj:_:$C:m3:pos";2,"centymetr","subst:_:$C:m3"] | _ -> failwith "abr_patterns");  
809 - [O "cd"; S "."], (function [a;b] -> std a b [1,"ciąg","subst:sg:nom:m3";1,"daleki","adj:sg:nom:m3:com"] | _ -> failwith "abr_patterns");  
810 - [O "cdn"; S "."], (function [a;b] -> std a b [1,"ciąg","subst:sg:nom:m3";1,"daleki","adj:sg:nom:m3:com";1,"nastąpić","fin:sg:ter:perf"] | _ -> failwith "abr_patterns");  
811 - [O "cm"; O "3"], (function [a;b] -> [ct [a] "centymetr" "subst:_:$C:m3"; ct [b] "sześcienny" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns");  
812 - [O "dcn"; S "."], (function [a;b] -> std a b [1,"daleki","adj:sg:nom:m3:com";1,"ciąg","subst:sg:nom:m3";1,"nastąpić","fin:sg:ter:perf"] | _ -> failwith "abr_patterns");  
813 - [O "dm"; O "3"], (function [a;b] -> [ct [a] "decymetr" "subst:_:$C:m3"; ct [b] "sześcienny" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns");  
814 - [O "ds"; S "."], (function [a;b] -> std a b [1,"do","prep:gen";1,"sprawa","subst:pl:gen:f"] | _ -> failwith "abr_patterns");  
815 - [O "d"; O "/"; O "s"], (function [a;b;c] -> [ct [a;b] "do" "prep:gen"; ct [c] "sprawa" "subst:pl:gen:f"] | _ -> failwith "abr_patterns");  
816 - [O "itd"; S "."], (function [a;b] -> std a b [1,"i","conj";1,"tak","adv:pos";1,"daleko","adv:com"] | _ -> failwith "abr_patterns");  
817 - [O "itede"; S "."], (function [a;b] -> std a b [1,"i","conj";2,"tak","adv:pos";2,"daleko","adv:com"] | _ -> failwith "abr_patterns");  
818 - [O "itp"; S "."], (function [a;b] -> std a b [1,"i","conj";1,"tym","adv";1,"podobny","adj:pl:nom:_:pos"] | _ -> failwith "abr_patterns");  
819 - [O "jw"; S "."], (function [a;b] -> std a b [1,"jak","adv:pos";1,"wysoko","adv:com"] | _ -> failwith "abr_patterns");  
820 - [O "JWP"], (function [a] -> st a [1,"jaśnie","adv:pos";1,"wielmożny","adj:_:$C:m1:pos";1,"pan","subst:_:$C:m1"] | _ -> failwith "abr_patterns");  
821 - [O "JWP"], (function [a] -> st a [1,"jaśnie","adv:pos";1,"wielmożny","adj:_:$C:f:pos";1,"pani","subst:_:$C:f"] | _ -> failwith "abr_patterns");  
822 - [O "km"; S "."; O "2"], (function [a;b;c] -> [ct [a;b] "kilometr" "subst:_:$C:m3"; ct [c] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns");  
823 - [O "km"; O "2"], (function [a;b] -> [ct [a] "kilometr" "subst:_:$C:m3"; ct [b] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns");  
824 - [O "km"; O "²"], (function [a;b] -> [ct [a] "kilometr" "subst:_:$C:m3"; ct [b] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns");  
825 - [O "lm"; S "."], (function [a;b] -> std a b [1,"liczba","subst:sg:$C:f";1,"mnogi","adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns");  
826 - [O "lp"; S "."], (function [a;b] -> std a b [1,"liczba","subst:sg:$C:f";1,"pojedynczy","adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns");  
827 - [O "m"; S "."; O "in"; S "."], (function [a;b;c;d] -> [ct [a;b] "między" "prep:inst"; ct [c;d] "inny" "adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns");  
828 - [O "m"; S "."; O "in"], (function [a;b;c] -> [ct [a;b] "między" "prep:inst"; ct [c] "inny" "adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns");  
829 - [O "m"; S "."; O "inn"; S "."], (function [a;b;c;d] -> [ct [a;b] "między" "prep:inst"; ct [c;d] "inny" "adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns");  
830 - [O "m"; S "."; O "st"; S "."], (function [a;b;c;d] -> [ct [a;b] "miasto" "subst:_:$C:n2"; ct [c;d] "stołeczny" "adj:_:$C:n2:pos"] | _ -> failwith "abr_patterns");  
831 - [O "m"; O "^"; O "2"], (function [a;b;c] -> [ct [a] "metr" "subst:_:$C:m3"; ct [b;c] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns");  
832 - [O "m"; O "2"], (function [a;b] -> [ct [a] "metr" "subst:_:$C:m3"; ct [b] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns");  
833 - [O "m"; O "3"], (function [a;b] -> [ct [a] "metr" "subst:_:$C:m3"; ct [b] "sześcienny" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns");  
834 - [O "min"; S "."], (function [a;b] -> std a b [1,"między","prep:inst";2,"inny","adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns");  
835 - [O "mkw"; S "."], (function [a;b] -> std a b [1,"metr","subst:_:$C:m3";2,"kwadratowy","adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns");  
836 - [O "n"; S "."; O "e"; S "."], (function [a;b;c;d] -> [ct [a;b] "nasz" "adj:sg:gen:f:pos"; ct [c;d] "era" "subst:sg:gen:f"] | _ -> failwith "abr_patterns");  
837 - [O "n"; S "."; O "p"; S "."; O "m"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "nad" "prep:inst"; ct [c;d] "poziom" "subst:sg:inst:m3"; ct [e;f] "morze" "subst:sg:gen:n2"] | _ -> failwith "abr_patterns");  
838 - [O "np"; S "."], (function [a;b] -> std a b [1,"na","prep:acc";1,"przykład","subst:sg:acc:m3"] | _ -> failwith "abr_patterns");  
839 - [O "nt"; S "."], (function [a;b] -> std a b [1,"na","prep:acc";1,"temat","subst:sg:acc:m3"] | _ -> failwith "abr_patterns");  
840 - [O "NTG"], (function [a] -> st a [1,"nie","qub";1,"ta","adj:sg:nom:f:pos";1,"grupa","subst:sg:nom:f"] | _ -> failwith "abr_patterns");  
841 - [O "o"; S "."; O "o"; S "."], (function [a;b;c;d] -> [ct [a;b] "ograniczony" "adj:sg:$C:f:pos"; ct [c;d] "odpowiedzialność" "subst:sg:$C:f"] | _ -> failwith "abr_patterns");  
842 - [O "p"; S "."; O "n"; S "."; O "e"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "przed" "prep:inst"; ct [c;d] "nasz" "adj:sg:inst:f:pos"; ct [e;f] "era" "subst:sg:inst:f"] | _ -> failwith "abr_patterns");  
843 - [O "p"; S "."; O "o"; S "."], (function [a;b;c;d] -> [ct [a;b] "pełniący" "pact:_:_:m1.m2.m3:imperf:aff"; ct [c;d] "obowiązek" "subst:pl:acc:m3"] | _ -> failwith "abr_patterns");  
844 - [O "p"; S "."; O "p"; S "."; O "m"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "pod" "prep:inst"; ct [c;d] "poziom" "subst:sg:inst:m3"; ct [e;f] "morze" "subst:sg:gen:n2"] | _ -> failwith "abr_patterns");  
845 - [O "p"; S "."; O "t"; S "."], (function [a;b;c;d] -> [ct [a;b] "pod" "prep:inst:nwokc"; ct [c;d] "tytuł" "subst:sg:inst:m3"] | _ -> failwith "abr_patterns");  
846 - [O "pn"; S "."], (function [a;b] -> std a b [1,"pod","prep:inst";1,"nazwa","subst:sg:inst:f"] | _ -> failwith "abr_patterns");  
847 - [O "pne"; S "."], (function [a;b] -> std a b [1,"przed","prep:inst";1,"nasz","adj:sg:inst:f:pos";1,"era","subst:sg:inst:f"] | _ -> failwith "abr_patterns");  
848 - [O "pt"; S "."], (function [a;b] -> std a b [1,"pod","prep:inst";1,"tytuł","subst:sg:inst:m3"] | _ -> failwith "abr_patterns");  
849 - [O "PW"], (function [a] -> st a [1,"prywatny","adj:_:$C:f:pos";1,"wiadomość","subst:_:$C:f"] | _ -> failwith "abr_patterns");  
850 - [O "pw"; S "."], (function [a;b] -> std a b [1,"pod","prep:inst";1,"wezwanie","subst:sg:inst:n2"] | _ -> failwith "abr_patterns");  
851 -(* [O "S"; S "."; O "A"; S "."], (function [a;b;c;d] -> [ct [a;b] "spółka" "subst:sg:$C:f"; ct [c;d] "akcyjny" "adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns");  
852 - [O "s"; S "."; O "c"; S "."], (function [a;b;c;d] -> [ct [a;b] "spółka" "subst:sg:$C:f"; ct [c;d] "cywilny" "adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns");*)  
853 -(* [O "SA"], (function [a] -> st a [1,"spółka","subst:sg:$C:f";1,"akcyjny","adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns"); *)  
854 - [O "ś"; S "."; O "p"; S "."], (function [a;b;c;d] -> [ct [a;b] "święty" "adj:sg:gen:f:pos"; ct [c;d] "pamięć" "subst:sg:gen:f"] | _ -> failwith "abr_patterns");  
855 - [O "śp"; S "."], (function [a;b] -> std a b [1,"święty","adj:sg:gen:f:pos";1,"pamięć","subst:sg:gen:f"] | _ -> failwith "abr_patterns");  
856 - [O "tgz"; S "."], (function [a;b] -> std a b [2,"tak","adv";1,"zwać","ppas:_:_:_:_:aff"] | _ -> failwith "abr_patterns");  
857 - [O "tj"; S "."], (function [a;b] -> std a b [1,"to","subst:sg:nom:n2";1,"być","fin:sg:ter:imperf"] | _ -> failwith "abr_patterns");  
858 - [O "tzn"; S "."], (function [a;b] -> std a b [1,"to","subst:sg:nom:n2";2,"znaczyć","fin:sg:ter:imperf"] | _ -> failwith "abr_patterns");  
859 - [O "tzw"; S "."], (function [a;b] -> std a b [1,"tak","adv:pos";2,"zwać","ppas:_:_:_:imperf:aff"] | _ -> failwith "abr_patterns");  
860 - [O "ub"; S "."; O "r"; S "."], (function [a;b;c;d] -> [ct [a;b] "ubiegły" "adj:sg:$C:m3:pos"; ct [c;d] "rok" "subst:sg:$C:m3"] | _ -> failwith "abr_patterns");  
861 - [O "w"; S "."; O "w"; S "."], (function [a;b;c;d] -> [ct [a;b] "wysoko" "adv:com"; ct [c;d] "wymienić" "ppas:_:_:_:perf:aff"] | _ -> failwith "abr_patterns");  
862 - [O "w"; O "/"; O "m"], (function [a;b;c] -> [ct [a;b] "w" "prep:loc"; ct [c] "miejsce" "subst:_:loc:m3"] | _ -> failwith "abr_patterns");  
863 - [O "w"; O "/"; O "w"], (function [a;b;c] -> [ct [a;b] "wysoko" "adv:com"; ct [c] "wymienić" "ppas:_:_:_:perf:aff"] | _ -> failwith "abr_patterns");  
864 - [O "ws"; S "."], (function [a;b] -> std a b [1,"w","prep:loc:nwok";1,"sprawa","subst:sg:loc:f"] | _ -> failwith "abr_patterns");  
865 - [O "ww"; S "."], (function [a;b] -> std a b [1,"wysoko","adv:com";1,"wymieniony","ppas:_:_:_:perf:aff"] | _ -> failwith "abr_patterns");  
866 - ]  
tokenizer/eniam-tokenizer-1.0/ENIAMpatterns.ml deleted
1 -(*  
2 - * ENIAMtokenizer, a tokenizer for Polish  
3 - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>  
4 - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences  
5 - *  
6 - * This library is free software: you can redistribute it and/or modify  
7 - * it under the terms of the GNU Lesser General Public License as published by  
8 - * the Free Software Foundation, either version 3 of the License, or  
9 - * (at your option) any later version.  
10 - *  
11 - * This library is distributed in the hope that it will be useful,  
12 - * but WITHOUT ANY WARRANTY; without even the implied warranty of  
13 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  
14 - * GNU General Public License for more details.  
15 - *  
16 - * You should have received a copy of the GNU Lesser General Public License  
17 - * along with this program. If not, see <http://www.gnu.org/licenses/>.  
18 - *)  
19 -  
20 -open Xstd  
21 -open Printf  
22 -open ENIAMtokenizerTypes  
23 -  
24 -let rec flatten_tokens rev_variants = function  
25 - | [] -> rev_variants  
26 - | Token t :: l -> flatten_tokens (Xlist.map rev_variants (fun rev_variant -> Token t :: rev_variant)) l  
27 - | Seq seq :: l -> flatten_tokens rev_variants (seq @ l)  
28 - | Variant variants :: l -> flatten_tokens (List.flatten (Xlist.map variants (fun variant -> flatten_tokens rev_variants [variant]))) l  
29 -  
30 -let rec normalize_tokens rev = function  
31 - [] -> List.rev rev  
32 - | Token t :: l -> normalize_tokens (Token t :: rev) l  
33 - | Seq seq :: l -> normalize_tokens rev (seq @ l)  
34 - | Variant[t] :: l -> normalize_tokens rev (t :: l)  
35 - | Variant variants :: l ->  
36 - let variants = flatten_tokens [[]] [Variant variants] in  
37 - let variants = Xlist.map variants (fun rev_seq ->  
38 - match List.rev rev_seq with  
39 - [] -> failwith "normalize_tokens"  
40 - | [t] -> t  
41 - | seq -> Seq seq) in  
42 - let t = match variants with  
43 - [] -> failwith "normalize_tokens"  
44 - | [t] -> t  
45 - | variants -> Variant variants in  
46 - normalize_tokens (t :: rev) l  
47 -  
48 -let concat_orths l =  
49 - String.concat "" (Xlist.map l (fun t -> t.orth))  
50 -  
51 -let concat_orths2 l =  
52 - String.concat "" (Xlist.map l (fun t -> ENIAMtokens.get_orth t.token))  
53 -  
54 -let concat_intnum = function  
55 - [{token=Dig(v4,_)};_;{token=Dig(v3,_)};_;{token=Dig(v2,_)};_;{token=Dig(v1,_)}] -> v4^v3^v2^v1  
56 - | [{token=Dig(v3,_)};_;{token=Dig(v2,_)};_;{token=Dig(v1,_)}] -> v3^v2^v1  
57 - | [{token=Dig(v2,_)};_;{token=Dig(v1,_)}] -> v2^v1  
58 - | [{token=Dig(v1,_)}] -> v1  
59 - | _ -> failwith "concat_intnum"  
60 -  
61 -let dig_value t =  
62 - match t.token with  
63 - Dig(v,_) -> v  
64 - | _ -> failwith "dig_value"  
65 -  
66 -(* FIXME: problem z ordnum - wyklucza year co stanowi problem na końcu zdania *)  
67 -let digit_patterns1 = [ (* FIXME: problem z nadmiarowymi interpretacjami - trzeba uwzględnić w preprocesingu brak spacji - albo w dezambiguacji *)  
68 - [D "dig"; S "."; D "dig"; S "."; D "dig"; S "."; D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"]));  
69 - [D "dig"; S "."; D "dig"; S "."; D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"]));  
70 - [D "dig"; S "."; D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"]));  
71 - [D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"]));  
72 -(* [D "dig"], "obj-id"; *)  
73 - [D "pref3dig"; S "."; D "3dig"; S "."; D "3dig"; S "."; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum"));  
74 - [D "pref3dig"; S "."; D "3dig"; S "."; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum"));  
75 - [D "pref3dig"; S "."; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum"));  
76 - [D "pref3dig"; S " "; D "3dig"; S " "; D "3dig"; S " "; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum"));  
77 - [D "pref3dig"; S " "; D "3dig"; S " "; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum"));  
78 - [D "pref3dig"; S " "; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum"));  
79 - [D "intnum"; S "."], (function [token;_] -> Dig(concat_intnum [token],"ordnum") | _ -> failwith "digit_patterns1"); (* FIXME: to nie powinno wykluczać innych interpretacji *)  
80 - [D "day"; S "."; D "month"; S "."; D "year"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns2");  
81 - [D "day"; S "."; RD "month"; S "."; D "year"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns3");  
82 - [D "day"; S " "; RD "month"; S " "; D "year"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns3");  
83 - [D "day"; S "."; D "month"; S "."; D "2dig"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns2");  
84 - [D "day"; S "."; RD "month"; S "."; D "2dig"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns3");  
85 - [D "day"; S "."; D "month"; S "."], (function [day;_;month;_] -> Compound("day-month",[day.token;month.token]) | _ -> failwith "digit_patterns4");  
86 - [D "hour"; S "."; D "minute"], (function [hour;_;minute] -> Compound("hour-minute",[hour.token;minute.token]) | _ -> failwith "digit_patterns5");  
87 - [D "hour"; S ":"; D "minute"], (function [hour;_;minute] -> Compound("hour-minute",[hour.token;minute.token]) | _ -> failwith "digit_patterns6");  
88 - [D "intnum"; S ":"; D "intnum"], (function [x;_;y] -> Compound("match-result",[x.token;y.token]) | _ -> failwith "digit_patterns7");  
89 - ] (* bez 1 i *2 *3 *4 mamy rec *) (* w morfeuszu zawsze num:pl?*)  
90 -  
91 -let digit_patterns2 = [  
92 - [D "intnum"; S ","; D "dig"], (function [x;_;y] -> Dig(dig_value x ^ "," ^ dig_value y,"realnum") | _ -> failwith "digit_patterns8");  
93 -(* [S "-"; D "intnum"; S ","; D "dig"], (function [_;x;_;y] -> Dig("-" ^ dig_value x ^ "," ^ dig_value y,"realnum") | _ -> failwith "digit_patterns9");  
94 - [S "-"; D "intnum"], (function [_;x] -> Dig("-" ^ dig_value x,"realnum") | _ -> failwith "digit_patterns10");*)  
95 - [S "’"; D "2dig"], (function [_;x] -> Dig("’" ^ dig_value x,"year") | _ -> failwith "digit_patterns12");  
96 -(* [D "intnum"], "realnum"; *)  
97 - ]  
98 -  
99 -let compose_latek_lemma t interp =  
100 - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-latek", interp)  
101 -  
102 -let compose_latka_lemma t interp =  
103 - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-latka", interp)  
104 -  
105 -let compose_latek_int_lemma t t2 interp =  
106 - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-" ^ ENIAMtokens.get_orth t2.token ^ "-latek", interp)  
107 -  
108 -let compose_latka_int_lemma t t2 interp =  
109 - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-" ^ ENIAMtokens.get_orth t2.token ^ "-latka", interp)  
110 -  
111 -let digit_patterns3 = [  
112 - [S "-"; D "intnum"], (function [_;x] -> Dig("-" ^ dig_value x,"intnum") | _ -> failwith "digit_patterns10");  
113 - [S "-"; D "realnum"], (function [_;x] -> Dig("-" ^ dig_value x,"realnum") | _ -> failwith "digit_patterns10");  
114 - [D "intnum"; S "-"; D "intnum"], (function [x;_;y] -> Compound("intnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns11");  
115 - [D "realnum"; S "-"; D "realnum"], (function [x;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *)  
116 - [D "intnum"; S "-"; D "realnum"], (function [x;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *)  
117 - [D "realnum"; S "-"; D "intnum"], (function [x;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *)  
118 - [C "date"; S "-"; C "date"], (function [x;_;y] -> Compound("date-interval",[x.token;y.token]) | _ -> failwith "digit_patterns13");  
119 - [C "day-month"; S "-"; C "day-month"], (function [x;_;y] -> Compound("day-month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns14");  
120 - [D "day"; S "-"; D "day"], (function [x;_;y] -> Compound("day-interval",[x.token;y.token]) | _ -> failwith "digit_patterns15");  
121 - [D "month"; S "-"; D "month"], (function [x;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16");  
122 - [RD "month"; S "-"; RD "month"], (function [x;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns17");  
123 - [D "year"; S "-"; D "year"], (function [x;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16");  
124 - [D "year"; S "-"; D "2dig"], (function [x;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16");  
125 - [C "hour-minute"; S "-"; C "hour-minute"], (function [x;_;y] -> Compound("hour-minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns18");  
126 - [D "hour"; S "-"; D "hour"], (function [x;_;y] -> Compound("hour-interval",[x.token;y.token]) | _ -> failwith "digit_patterns19");  
127 - [D "minute"; S "-"; D "minute"], (function [x;_;y] -> Compound("minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns20");  
128 - [RD "roman"; S "-"; RD "roman"], (function [x;_;y] -> Compound("roman-interval",[x.token;y.token]) | _ -> failwith "digit_patterns21");  
129 - [D "intnum"; S " "; S "-"; S " "; D "intnum"], (function [x;_;_;_;y] -> Compound("intnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns11");  
130 - [D "realnum"; S " "; S "-"; S " "; D "realnum"], (function [x;_;_;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *)  
131 - [D "intnum"; S " "; S "-"; S " "; D "realnum"], (function [x;_;_;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *)  
132 - [D "realnum"; S " "; S "-"; S " "; D "intnum"], (function [x;_;_;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *)  
133 - [C "date"; S " "; S "-"; S " "; C "date"], (function [x;_;_;_;y] -> Compound("date-interval",[x.token;y.token]) | _ -> failwith "digit_patterns13");  
134 - [C "day-month"; S " "; S "-"; S " "; C "day-month"], (function [x;_;_;_;y] -> Compound("day-month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns14");  
135 - [D "day"; S " "; S "-"; S " "; D "day"], (function [x;_;_;_;y] -> Compound("day-interval",[x.token;y.token]) | _ -> failwith "digit_patterns15");  
136 - [D "month"; S " "; S "-"; S " "; D "month"], (function [x;_;_;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16");  
137 - [RD "month"; S " "; S "-"; S " "; RD "month"], (function [x;_;_;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns17");  
138 - [D "year"; S " "; S "-"; S " "; D "year"], (function [x;_;_;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16");  
139 - [D "year"; S " "; S "-"; S " "; D "2dig"], (function [x;_;_;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16");  
140 - [C "hour-minute"; S " "; S "-"; S " "; C "hour-minute"], (function [x;_;_;_;y] -> Compound("hour-minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns18");  
141 - [D "hour"; S " "; S "-"; S " "; D "hour"], (function [x;_;_;_;y] -> Compound("hour-interval",[x.token;y.token]) | _ -> failwith "digit_patterns19");  
142 - [D "minute"; S " "; S "-"; S " "; D "minute"], (function [x;_;_;_;y] -> Compound("minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns20");  
143 - [RD "roman"; S " "; S "-"; S " "; RD "roman"], (function [x;_;_;_;y] -> Compound("roman-interval",[x.token;y.token]) | _ -> failwith "digit_patterns21");  
144 - [D "intnum"; S "-"; O "latek"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:nom:m1" | _ -> failwith "digit_patterns22");  
145 - [D "intnum"; S "-"; O "latka"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:gen.acc:m1" | _ -> failwith "digit_patterns22");  
146 - [D "intnum"; S "-"; O "latkowi"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:dat:m1" | _ -> failwith "digit_patterns22");  
147 - [D "intnum"; S "-"; O "latkiem"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:inst:m1" | _ -> failwith "digit_patterns22");  
148 - [D "intnum"; S "-"; O "latku"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:loc.voc:m1" | _ -> failwith "digit_patterns22");  
149 - [D "intnum"; S "-"; O "latkowie"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:nom.voc:m1" | _ -> failwith "digit_patterns22");  
150 - [D "intnum"; S "-"; O "latków"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:gen.acc:m1" | _ -> failwith "digit_patterns22");  
151 - [D "intnum"; S "-"; O "latkom"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:dat:m1" | _ -> failwith "digit_patterns22");  
152 - [D "intnum"; S "-"; O "latkami"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:inst:m1" | _ -> failwith "digit_patterns22");  
153 - [D "intnum"; S "-"; O "latkach"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:loc:m1" | _ -> failwith "digit_patterns22");  
154 - [D "intnum"; S "-"; O "latka"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:nom:f" | _ -> failwith "digit_patterns22");  
155 - [D "intnum"; S "-"; O "latki"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:gen:f" | _ -> failwith "digit_patterns22");  
156 - [D "intnum"; S "-"; O "latce"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:dat.loc:f" | _ -> failwith "digit_patterns22");  
157 - [D "intnum"; S "-"; O "latkę"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:acc:f" | _ -> failwith "digit_patterns22");  
158 - [D "intnum"; S "-"; O "latką"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:inst:f" | _ -> failwith "digit_patterns22");  
159 - [D "intnum"; S "-"; O "latko"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:voc:f" | _ -> failwith "digit_patterns22");  
160 - [D "intnum"; S "-"; O "latki"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:nom.acc.voc:f" | _ -> failwith "digit_patterns22");  
161 - [D "intnum"; S "-"; O "latek"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:gen:f" | _ -> failwith "digit_patterns22");  
162 - [D "intnum"; S "-"; O "latkom"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:dat:f" | _ -> failwith "digit_patterns22");  
163 - [D "intnum"; S "-"; O "latkami"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:inst:f" | _ -> failwith "digit_patterns22");  
164 - [D "intnum"; S "-"; O "latkach"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:loc:f" | _ -> failwith "digit_patterns22");  
165 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latek"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:nom:m1" | _ -> failwith "digit_patterns22");  
166 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latka"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:gen.acc:m1" | _ -> failwith "digit_patterns22");  
167 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkowi"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:dat:m1" | _ -> failwith "digit_patterns22");  
168 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkiem"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:inst:m1" | _ -> failwith "digit_patterns22");  
169 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latku"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:loc.voc:m1" | _ -> failwith "digit_patterns22");  
170 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkowie"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:nom.voc:m1" | _ -> failwith "digit_patterns22");  
171 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latków"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:gen.acc:m1" | _ -> failwith "digit_patterns22");  
172 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkom"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:dat:m1" | _ -> failwith "digit_patterns22");  
173 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkami"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:inst:m1" | _ -> failwith "digit_patterns22");  
174 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkach"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:loc:m1" | _ -> failwith "digit_patterns22");  
175 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latka"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:nom:f" | _ -> failwith "digit_patterns22");  
176 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latki"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:gen:f" | _ -> failwith "digit_patterns22");  
177 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latce"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:dat.loc:f" | _ -> failwith "digit_patterns22");  
178 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkę"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:acc:f" | _ -> failwith "digit_patterns22");  
179 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latką"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:inst:f" | _ -> failwith "digit_patterns22");  
180 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latko"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:voc:f" | _ -> failwith "digit_patterns22");  
181 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latki"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:nom.acc.voc:f" | _ -> failwith "digit_patterns22");  
182 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latek"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:gen:f" | _ -> failwith "digit_patterns22");  
183 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkom"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:dat:f" | _ -> failwith "digit_patterns22");  
184 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkami"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:inst:f" | _ -> failwith "digit_patterns22");  
185 - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkach"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:loc:f" | _ -> failwith "digit_patterns22");  
186 - ]  
187 -  
188 -let url_patterns1 = [  
189 - [L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url"));  
190 - [L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url"));  
191 - [L; S "."; L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url"));  
192 - [L; S "."; L; S "."; L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url"));  
193 - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url"));  
194 - [L; S "."; L; S "-"; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url"));  
195 - [L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url"));  
196 - [L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url"));  
197 - [L; S "."; L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url"));  
198 - [L; S "."; L; S "."; L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url"));  
199 - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url"));  
200 - [L; S "."; L; S "-"; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url"));  
201 - [L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url"));  
202 - [L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url"));  
203 - [L; S "."; L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url"));  
204 - [L; S "."; L; S "."; L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url"));  
205 - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url"));  
206 - [L; S "."; L; S "-"; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url"));  
207 - [L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url"));  
208 - [L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url"));  
209 - [L; S "."; L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url"));  
210 - [L; S "."; L; S "."; L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url"));  
211 - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url"));  
212 - [L; S "."; L; S "-"; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url"));  
213 - [L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url"));  
214 - [L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url"));  
215 - [L; S "."; L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url"));  
216 - [L; S "."; L; S "."; L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url"));  
217 - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url"));  
218 - [L; S "."; L; S "-"; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url"));  
219 - [L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url"));  
220 - [L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url"));  
221 - [L; S "."; L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url"));  
222 - [L; S "."; L; S "."; L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url"));  
223 - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url"));  
224 - [L; S "."; L; S "-"; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url"));  
225 - [L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url"));  
226 - [L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url"));  
227 - [L; S "."; L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url"));  
228 - [L; S "."; L; S "."; L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url"));  
229 - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url"));  
230 - [L; S "."; L; S "-"; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url"));  
231 - [L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url"));  
232 - [L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url"));  
233 - [L; S "."; L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url"));  
234 - [L; S "."; L; S "."; L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url"));  
235 - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url"));  
236 - [L; S "."; L; S "-"; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url"));  
237 - ]  
238 -  
239 -let url_patterns2 = [  
240 - [L; S "@"; D "url"], (function l -> Dig(concat_orths2 l,"email"));  
241 - [O "http"; S ":"; S "/"; S "/"; D "url"], (function l -> Dig(concat_orths2 l,"email"));  
242 - ]  
243 -  
244 -let url_patterns3 = [  
245 - [D "url"; S "/"], (function l -> Dig(concat_orths2 l,"url"));  
246 - [D "url"; S "/"; L], (function l -> Dig(concat_orths2 l,"url"));  
247 - [D "url"; S "/"; L; S "."; L], (function l -> Dig(concat_orths2 l,"url"));  
248 - ]  
249 -  
250 -type matching = {  
251 - prefix: tokens list;  
252 - matched: token_record list;  
253 - suffix: tokens list;  
254 - pattern: pat list;  
255 - command: token_record list -> token;  
256 - command_abr: token_record list -> tokens list;  
257 - }  
258 -  
259 -let execute_command matching =  
260 - let l = List.rev matching.matched in  
261 - let len = Xlist.fold l 0 (fun len t -> t.len + len) in  
262 - Seq((List.rev matching.prefix) @ [Token{empty_token with  
263 - orth=concat_orths l;  
264 - beg=(List.hd l).beg;  
265 - len=len;  
266 - next=(List.hd l).beg+len;  
267 - token=matching.command l;  
268 - (*weight=0.;*) (* FIXME: dodać wagi do konkretnych reguł i uwzględnić wagi maczowanych tokenów *)  
269 - attrs=ENIAMtokens.merge_attrs l}] @ matching.suffix)  
270 -  
271 -let execute_abr_command matching =  
272 - let l = List.rev matching.matched in  
273 - Seq((List.rev matching.prefix) @ (matching.command_abr l) @ matching.suffix)  
274 -  
275 -let match_token = function  
276 - D cat, Dig(_,cat2) -> cat = cat2  
277 - | C s, Compound(s2,_) -> s = s2  
278 - | S s, Symbol s2 -> s = s2  
279 - | RD cat, RomanDig(_,cat2) -> cat = cat2  
280 - | O pat, Dig(s,"dig") -> pat = s  
281 - | O pat, Symbol s -> pat = s  
282 - | O pat, SmallLetter orth -> pat = orth  
283 - | O pat, CapLetter(orth,lc) -> pat = orth  
284 - | O pat, AllSmall orth -> pat = orth  
285 - | O pat, AllCap(orth,lc,lc2) -> pat = orth  
286 - | O pat, FirstCap(orth,lc,_,_) -> pat = orth  
287 - | O pat, SomeCap orth -> pat = orth  
288 - | L, SmallLetter _ -> true  
289 - | L, CapLetter _ -> true  
290 - | L, AllSmall _ -> true  
291 - | L, AllCap _ -> true  
292 - | L, FirstCap _ -> true  
293 - | L, SomeCap _ -> true  
294 - | CL, CapLetter _ -> true  
295 - | CL, AllCap _ -> true  
296 - | CL, SomeCap _ -> true  
297 - | _ -> false  
298 -  
299 -let rec find_first_token matching pat = function  
300 - Token t -> if match_token (pat,t.token) then [{matching with matched = t :: matching.matched}] else []  
301 - | Seq l -> Xlist.map (find_first_token matching pat (List.hd (List.rev l))) (fun matching -> {matching with prefix = matching.prefix @ (List.tl (List.rev l))})  
302 - | Variant l -> List.flatten (Xlist.map l (find_first_token matching pat))  
303 -  
304 -let rec find_middle_token matching pat = function  
305 - Token t -> if match_token (pat,t.token) then [{matching with matched = t :: matching.matched}] else []  
306 - | Seq _ -> []  
307 - | Variant l -> List.flatten (Xlist.map l (find_middle_token matching pat))  
308 -  
309 -let rec find_last_token matching pat = function  
310 - Token t -> if match_token (pat,t.token) then [{matching with matched = t :: matching.matched}] else []  
311 - | Seq l -> Xlist.map (find_last_token matching pat (List.hd l)) (fun matching -> {matching with suffix = matching.suffix @ (List.tl l)})  
312 - | Variant l -> List.flatten (Xlist.map l (find_last_token matching pat))  
313 -  
314 -let rec find_pattern_tail matchings = function  
315 - [] -> raise Not_found  
316 - | token :: l ->  
317 - let found,finished = Xlist.fold matchings ([],[]) (fun (found,finished) matching ->  
318 - match matching.pattern with  
319 - [pat] -> found, (find_last_token {matching with pattern=[]} pat token) @ finished  
320 - | pat :: pattern -> (find_middle_token {matching with pattern=pattern} pat token) @ found, finished  
321 - | _ -> failwith "find_pattern: ni") in  
322 - (try  
323 - if found = [] then raise Not_found else  
324 - find_pattern_tail found l  
325 - with Not_found ->  
326 - let finished = List.flatten (Xlist.map finished (fun matching -> try [execute_command matching] with Not_found -> [])) in  
327 - if finished = [] then raise Not_found else Variant finished,l)  
328 -  
329 -(* wzorce nie mogą mieć długości 1 *)  
330 -let rec find_pattern matchings rev = function  
331 - token :: l ->  
332 - let found = Xlist.fold matchings [] (fun found matching ->  
333 - match matching.pattern with  
334 - pat :: pattern -> (find_first_token {matching with pattern=pattern} pat token) @ found  
335 - | [] -> failwith "find_pattern: empty pattern") in  
336 - if found = [] then find_pattern matchings (token :: rev) l else  
337 - (try  
338 - let token,l = find_pattern_tail found l in  
339 - find_pattern matchings (token :: rev) l  
340 - with Not_found -> find_pattern matchings (token :: rev) l)  
341 - | [] -> List.rev rev  
342 -  
343 -let find_patterns patterns tokens =  
344 - find_pattern (Xlist.map patterns (fun (pattern,command) ->  
345 - {prefix=[]; matched=[]; suffix=[]; pattern=pattern; command=command; command_abr=(fun _ -> [])})) [] tokens  
346 -  
347 -let rec find_abr_pattern_tail matchings = function  
348 - [] -> raise Not_found  
349 - | token :: l ->  
350 - let found,finished = Xlist.fold matchings ([],[]) (fun (found,finished) matching ->  
351 - match matching.pattern with  
352 - [pat] -> found, (find_last_token {matching with pattern=[]} pat token) @ finished  
353 - | pat :: pattern -> (find_middle_token {matching with pattern=pattern} pat token) @ found, finished  
354 - | [] -> found, matching :: finished) in  
355 - (try  
356 - if found = [] then raise Not_found else  
357 - find_abr_pattern_tail found l  
358 - with Not_found ->  
359 - let finished = List.flatten (Xlist.map finished (fun matching -> try [execute_abr_command matching] with Not_found -> [])) in  
360 - if finished = [] then raise Not_found else Variant finished,l)  
361 -  
362 -let rec find_abr_pattern matchings rev = function  
363 - token :: l ->  
364 - let found = Xlist.fold matchings [] (fun found matching ->  
365 - match matching.pattern with  
366 - pat :: pattern -> (find_first_token {matching with pattern=pattern} pat token) @ found  
367 - | [] -> failwith "find_abr_pattern: empty pattern") in  
368 - if found = [] then find_abr_pattern matchings (token :: rev) l else  
369 - (try  
370 - let token,l = find_abr_pattern_tail found l in  
371 - find_abr_pattern matchings (token :: rev) l  
372 - with Not_found -> find_abr_pattern matchings (token :: rev) l)  
373 - | [] -> List.rev rev  
374 -  
375 -let find_abr_patterns patterns tokens =  
376 - find_abr_pattern (Xlist.map patterns (fun (pattern,command) ->  
377 - {prefix=[]; matched=[]; suffix=[]; pattern=pattern; command=(fun _ -> Symbol ""); command_abr=command})) [] tokens  
378 -  
379 -let find_replacement_patterns tokens =  
380 - let tokens = find_patterns digit_patterns1 tokens in  
381 - let tokens = normalize_tokens [] tokens in  
382 - let tokens = find_patterns digit_patterns2 tokens in  
383 - let tokens = normalize_tokens [] tokens in  
384 - let tokens = find_patterns digit_patterns3 tokens in  
385 - let tokens = normalize_tokens [] tokens in  
386 - let tokens = find_patterns ENIAMacronyms.acronym_patterns tokens in  
387 - let tokens = normalize_tokens [] tokens in  
388 - let tokens = find_patterns ENIAMacronyms.mte_patterns tokens in  
389 - let tokens = normalize_tokens [] tokens in  
390 -(* Xlist.iter tokens (fun t -> print_endline (ENIAMtokens.string_of_tokens 0 t)); *)  
391 - let tokens = find_patterns ENIAMacronyms.name_patterns tokens in  
392 -(* Xlist.iter tokens (fun t -> print_endline (ENIAMtokens.string_of_tokens 0 t)); *)  
393 - let tokens = normalize_tokens [] tokens in  
394 - let tokens = find_patterns url_patterns1 tokens in  
395 - let tokens = normalize_tokens [] tokens in  
396 - let tokens = find_patterns url_patterns2 tokens in  
397 - let tokens = normalize_tokens [] tokens in  
398 - let tokens = find_patterns url_patterns3 tokens in  
399 - let tokens = normalize_tokens [] tokens in  
400 -(* Xlist.iter tokens (fun t -> print_endline (ENIAMtokens.string_of_tokens 0 t)); *)  
401 - tokens  
402 -  
403 -let rec set_next_id n = function  
404 - Token t -> Token{t with next=n}  
405 - | Seq l ->  
406 - (match List.rev l with  
407 - t :: l -> Seq(List.rev ((set_next_id n t) :: l))  
408 - | [] -> failwith "set_next_id n")  
409 - | Variant l -> Variant(Xlist.map l (set_next_id n))  
410 -  
411 -let rec remove_spaces rev = function  
412 - [] -> List.rev rev  
413 - | x :: Token{token=Symbol " "; next=n} :: l -> remove_spaces ((set_next_id n x) :: rev) l  
414 - | Token{token=Symbol " "} :: l -> remove_spaces rev l  
415 - | x :: l -> remove_spaces (x :: rev) l  
tokenizer/eniam-tokenizer-1.0/ENIAMtokenizer.ml deleted
1 -(*  
2 - * ENIAMtokenizer, a tokenizer for Polish  
3 - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>  
4 - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences  
5 - *  
6 - * This library is free software: you can redistribute it and/or modify  
7 - * it under the terms of the GNU Lesser General Public License as published by  
8 - * the Free Software Foundation, either version 3 of the License, or  
9 - * (at your option) any later version.  
10 - *  
11 - * This library is distributed in the hope that it will be useful,  
12 - * but WITHOUT ANY WARRANTY; without even the implied warranty of  
13 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  
14 - * GNU General Public License for more details.  
15 - *  
16 - * You should have received a copy of the GNU Lesser General Public License  
17 - * along with this program. If not, see <http://www.gnu.org/licenses/>.  
18 - *)  
19 -  
20 -open Xstd  
21 -open ENIAMtokenizerTypes  
22 -  
23 -let string_of =  
24 - ENIAMtokens.string_of_tokens  
25 -  
26 -let parse query =  
27 - let l = Xunicode.classified_chars_of_utf8_string query in  
28 - let l = ENIAMtokens.tokenize l in  
29 - let l = ENIAMpatterns.normalize_tokens [] l in  
30 - let l = ENIAMpatterns.find_replacement_patterns l in  
31 - let l = ENIAMpatterns.remove_spaces [] l in  
32 - let l = ENIAMpatterns.find_abr_patterns ENIAMacronyms.abr_patterns l in  
33 - let l = ENIAMpatterns.normalize_tokens [] l in  
34 - l  
tokenizer/eniam-tokenizer-1.0/ENIAMtokenizerTypes.ml deleted
1 -(*  
2 - * ENIAMtokenizer, a tokenizer for Polish  
3 - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>  
4 - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences  
5 - *  
6 - * This library is free software: you can redistribute it and/or modify  
7 - * it under the terms of the GNU Lesser General Public License as published by  
8 - * the Free Software Foundation, either version 3 of the License, or  
9 - * (at your option) any later version.  
10 - *  
11 - * This library is distributed in the hope that it will be useful,  
12 - * but WITHOUT ANY WARRANTY; without even the implied warranty of  
13 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  
14 - * GNU General Public License for more details.  
15 - *  
16 - * You should have received a copy of the GNU Lesser General Public License  
17 - * along with this program. If not, see <http://www.gnu.org/licenses/>.  
18 - *)  
19 -  
20 -open Xstd  
21 -  
22 -(* Długość pojedynczego znaku w tekście *)  
23 -let factor = 100  
24 -  
25 -type token =  
26 - SmallLetter of string (* orth *)  
27 - | CapLetter of string * string (* orth * lowercase *)  
28 - | AllSmall of string (* orth *)  
29 - | AllCap of string * string * string (* orth * lowercase * all lowercase *)  
30 - | FirstCap of string * string * string * string (* orth * all lowercase * first letter uppercase * first letter lowercase *)  
31 - | SomeCap of string (* orth *)  
32 - | RomanDig of string * string (* value * cat *)  
33 - | Interp of string (* orth *)  
34 - | Symbol of string (* orth *)  
35 - | Dig of string * string (* value * cat *)  
36 - | Other of string (* orth *)  
37 - | Lemma of string * string * string list list list (* lemma * cat * interp *)  
38 - | Proper of string * string * string list list list * string list (* lemma * cat * interp * senses *)  
39 -(* | Sense of string * string * string list list list * (string * string * string list) list (* lemma * cat * interp * senses *) *)  
40 - | Compound of string * token list (* sense * components *)  
41 - (* | Tokens of string * int list (*cat * token id list *) *)  
42 -  
43 -(* Tekst reprezentuję jako zbiór obiektów typu token_record zawierających  
44 - informacje o poszczególnych tokenach *)  
45 -and token_record = {  
46 - orth: string; (* sekwencja znaków pierwotnego tekstu składająca się na token *)  
47 - corr_orth: string; (* sekwencja znaków pierwotnego tekstu składająca się na token z poprawionymi błędami *)  
48 - beg: int; (* pozycja początkowa tokenu względem początku akapitu *)  
49 - len: int; (* długość tokenu *)  
50 - next: int; (* pozycja początkowa następnego tokenu względem początku akapitu *)  
51 - token: token; (* treść tokenu *)  
52 - attrs: string list; (* dodatkowe atrybuty *)  
53 - }  
54 -  
55 -(* Tokeny umieszczone są w strukturze danych umożliwiającej efektywne wyszukiwanie ich sekwencji,  
56 - struktura danych sama z siebie nie wnosi informacji *)  
57 -type tokens =  
58 - | Token of token_record  
59 - | Variant of tokens list  
60 - | Seq of tokens list  
61 -  
62 -type pat = L | CL | D of string | C of string | S of string | RD of string | O of string  
63 -  
64 -let empty_token = {  
65 - orth="";corr_orth="";beg=0;len=0;next=0; token=Symbol ""; attrs=[]}  
66 -  
67 -let config =  
68 - try File.load_attr_val_pairs "config-tokenizer"  
69 - with _ -> (print_endline "ENIAMtokenizer config file not found"; [])  
70 -  
71 -let mte_filename =  
72 - try Xlist.assoc config "MTE_FILENAME"  
73 - with Not_found -> (print_endline "ENIAMtokenizer MTE_FILENAME config variable undefined"; "")  
tokenizer/eniam-tokenizer-1.0/ENIAMtokens.ml deleted
1 -(*  
2 - * ENIAMtokenizer, a tokenizer for Polish  
3 - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>  
4 - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences  
5 - *  
6 - * This library is free software: you can redistribute it and/or modify  
7 - * it under the terms of the GNU Lesser General Public License as published by  
8 - * the Free Software Foundation, either version 3 of the License, or  
9 - * (at your option) any later version.  
10 - *  
11 - * This library is distributed in the hope that it will be useful,  
12 - * but WITHOUT ANY WARRANTY; without even the implied warranty of  
13 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  
14 - * GNU General Public License for more details.  
15 - *  
16 - * You should have received a copy of the GNU Lesser General Public License  
17 - * along with this program. If not, see <http://www.gnu.org/licenses/>.  
18 - *)  
19 -  
20 -open Printf  
21 -open ENIAMtokenizerTypes  
22 -open Xstd  
23 -open Xunicode  
24 -  
25 -let string_of_interps interps =  
26 - String.concat "|" (Xlist.map interps (fun interp ->  
27 - (String.concat ":" (Xlist.map interp (fun interp2 ->  
28 - (String.concat "." interp2))))))  
29 -  
30 -let rec string_of_token = function  
31 - SmallLetter orth -> sprintf "SmallLetter(%s)" orth  
32 - | CapLetter(orth,lc) -> sprintf "CapLetter(%s,%s)" orth lc  
33 - | AllSmall orth -> sprintf "AllSmall(%s)" orth  
34 - | AllCap(orth,lc,lc2) -> sprintf "AllCap(%s,%s,%s)" orth lc lc2  
35 - | FirstCap(orth,lc,cl,ll) -> sprintf "FirstCap(%s,%s,%s,%s)" orth lc cl ll  
36 - | SomeCap orth -> sprintf "SomeCap(%s)" orth  
37 - | RomanDig(v,t) -> sprintf "RomanDig(%s,%s)" v t  
38 - | Interp orth -> sprintf "Interp(%s)" orth  
39 - | Symbol orth -> sprintf "Symbol(%s)" orth  
40 - | Dig(v,t) -> sprintf "Dig(%s,%s)" v t  
41 - | Other orth -> sprintf "Other(%s)" orth  
42 - | Lemma(lemma,cat,interps) -> sprintf "Lemma(%s,%s,%s)" lemma cat (string_of_interps interps)  
43 - | Proper(lemma,cat,interps,senses) -> sprintf "Proper(%s,%s,%s,%s)" lemma cat (string_of_interps interps) (String.concat "|" senses)  
44 -(* | Sense(lemma,cat,interps,senses) -> sprintf "Sense(%s,%s,%s,%s)" lemma cat (string_of_interps interps)  
45 - (String.concat "|" (Xlist.map senses (fun (_,v,_) -> v)))*)  
46 - | Compound(sense,l) -> sprintf "Compound(%s,[%s])" sense (String.concat ";" (Xlist.map l string_of_token))  
47 - (* | Tokens(cat,l) -> sprintf "Tokens(%s,%s)" cat (String.concat ";" (Xlist.map l string_of_int)) *)  
48 -  
49 -let rec spaces i =  
50 - if i = 0 then "" else " " ^ spaces (i-1)  
51 -  
52 -let rec string_of_tokens i = function  
53 - Token t -> sprintf "%s{orth=%s;beg=%d;len=%d;next=%d;token=%s;attrs=[%s]}" (spaces i) t.orth t.beg t.len t.next (string_of_token t.token)  
54 - (String.concat ";" t.attrs)  
55 - | Variant l -> sprintf "%sVariant[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1))))  
56 - | Seq l -> sprintf "%sSeq[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1))))  
57 -  
58 -let rec string_of_token_simple = function  
59 - SmallLetter orth -> "SmallLetter"  
60 - | CapLetter(orth,lc) -> "CapLetter"  
61 - | AllSmall orth -> "AllSmall"  
62 - | AllCap(orth,lc,lc2) -> "AllCap"  
63 - | FirstCap(orth,lc,_,_) -> "FirstCap"  
64 - | SomeCap orth -> "SomeCap"  
65 - | RomanDig(v,t) -> "RomanDig"  
66 - | Interp orth -> sprintf "Interp(%s)" orth  
67 - | Symbol orth -> sprintf "Symbol(%s)" orth  
68 - | Dig(v,t) -> "Dig"  
69 - | Other orth -> sprintf "Other(%s)" orth  
70 - | Lemma(lemma,cat,interp) -> "Lemma"  
71 - | Proper(lemma,cat,interp,sense) -> "Proper"  
72 -(* | Sense(lemma,cat,interp,sense) -> "Sense" *)  
73 - | Compound(sense,l) -> sprintf "Compound"  
74 - (* | Tokens _ -> sprintf "Tokens" *)  
75 -  
76 -let rec string_of_tokens_simple = function  
77 - Token t -> string_of_token_simple t.token  
78 - | Variant l -> sprintf "Variant[%s]" (String.concat ";" (Xlist.map l string_of_tokens_simple))  
79 - | Seq l -> sprintf "Seq[%s]" (String.concat ";" (Xlist.map l string_of_tokens_simple))  
80 -  
81 -let get_orth = function  
82 - SmallLetter orth -> orth  
83 - | CapLetter(orth,lc) -> orth  
84 - | AllSmall orth -> orth  
85 - | AllCap(orth,lc,lc2) -> orth  
86 - | FirstCap(orth,lc,_,_) -> orth  
87 - | SomeCap orth -> orth  
88 - | Symbol orth -> orth  
89 - | Dig(v,_) -> v  
90 - | Other orth -> orth  
91 - | _ -> ""(*failwith "get_orth"*)  
92 -  
93 -  
94 -let months = StringSet.of_list ["1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; "01"; "02"; "03"; "04"; "05"; "06"; "07"; "08"; "09"; "10"; "11"; "12"]  
95 -let hours = StringSet.of_list ["0"; "1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; "00"; "01"; "02"; "03"; "04"; "05"; "06"; "07"; "08"; "09";  
96 - "10"; "11"; "12"; "13"; "14"; "15"; "16"; "17"; "18"; "19"; "20"; "21"; "22"; "23"; "24"]  
97 -let days = StringSet.of_list ["1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; "01"; "02"; "03"; "04"; "05"; "06"; "07"; "08"; "09";  
98 - "10"; "11"; "12"; "13"; "14"; "15"; "16"; "17"; "18"; "19"; "20"; "21"; "22"; "23"; "24"; "25"; "26"; "27"; "28"; "29"; "30"; "31"]  
99 -let romanmonths = StringSet.of_list ["I"; "II"; "III"; "IV"; "V"; "VI"; "VII"; "VIII"; "IX"; "X"; "XI"; "XII"]  
100 -  
101 -  
102 -let s_beg i = Token{empty_token with beg=i;len=1;next=i+1; token=Interp "<sentence>"}  
103 -let c_beg i = Token{empty_token with beg=i;len=1;next=i+1; token=Interp "<clause>"}  
104 -  
105 -let dig_token orth i digs token =  
106 - Token{empty_token with orth=orth;beg=i;len=Xlist.size digs * factor;next=i+Xlist.size digs * factor; token=token; attrs=["maybe cs"]}  
107 -  
108 -let sc_dig_token orth i digs token =  
109 - Seq[s_beg i;c_beg (i+1);Token{empty_token with orth=orth;beg=i+2;len=Xlist.size digs * factor - 2;next=i+Xlist.size digs * factor; token=token; attrs=["maybe cs"]}]  
110 -  
111 -let dig_tokens orth poss_s_beg i digs v cat =  
112 - if poss_s_beg then  
113 - [dig_token orth i digs (Dig(v,cat));  
114 - sc_dig_token orth i digs (Dig(v,cat))]  
115 - else  
116 - [dig_token orth i digs (Dig(v,cat))]  
117 -  
118 -let merge_digits poss_s_beg i digs =  
119 - let orth = String.concat "" digs in  
120 - let t = dig_tokens orth poss_s_beg i digs in  
121 - let v = try string_of_int (int_of_string orth) with _ -> failwith "merge_digits" in  
122 - let variants =  
123 - (t orth "dig") @  
124 - [Token{empty_token with orth=orth;beg=i;len=Xlist.size digs * factor;next=i+Xlist.size digs * factor; token=Proper(orth,"obj-id",[[]],["obj-id"]); attrs=["maybe cs"]}] @  
125 - (if digs = ["0"] || List.hd digs <> "0" then (t orth "intnum")(* @ (t orth "realnum")*) else []) @  
126 - (if List.hd digs <> "0" then (t v "year") else []) @  
127 - (if StringSet.mem months orth then (t v "month") else []) @  
128 - (if StringSet.mem hours orth then (t v "hour") else []) @  
129 - (if StringSet.mem days orth then (t v "day") else []) @  
130 - (if Xlist.size digs = 2 && List.hd digs < "6" then (t v "minute") else []) @  
131 - (if Xlist.size digs = 3 then (t orth "3dig") else []) @  
132 - (if Xlist.size digs = 2 then (t orth "2dig") else []) @  
133 - (if Xlist.size digs <= 3 && List.hd digs <> "0" then (t orth "pref3dig") else []) in  
134 -(* let t = dig_token orth i digs in  
135 - let sc_t = sc_dig_token orth i digs in  
136 - let v = try int_of_string orth with _ -> failwith "merge_digits" in  
137 - let variants =  
138 - [t (Dig(v,"dig"));sc_t (Dig(v,"dig"))] @  
139 - (if digs = ["0"] || List.hd digs <> "0" then [t (Dig(v,"intnum"));sc_t (Dig(v,"intnum"))] else []) @  
140 - (if List.hd digs <> "0" then [t (Dig(v,"year"));sc_t (Dig(v,"year"))] else []) @  
141 - (if StringSet.mem months orth then [t (Dig(v,"month"));sc_t (Dig(v,"month"))] else []) @  
142 - (if StringSet.mem hours orth then [t (Dig(v,"hour"));sc_t (Dig(v,"hour"))] else []) @  
143 - (if StringSet.mem days orth then [t (Dig(v,"day"));sc_t (Dig(v,"day"))] else []) @  
144 - (if Xlist.size digs = 2 && List.hd digs < "6" then [t (Dig(v,"minute"));sc_t (Dig(v,"minute"))] else []) @  
145 - (if Xlist.size digs = 3 then [t (Dig(v,"3dig"));sc_t (Dig(v,"3dig"))] else []) @  
146 - (if Xlist.size digs <= 3 && List.hd digs <> "0" then [t (Dig(v,"pref3dig"));sc_t (Dig(v,"pref3dig"))] else []) in*)  
147 - Variant variants  
148 -  
149 -let recognize_roman_I v = function  
150 - Capital("I",_) :: Capital("I",_) :: Capital("I",_) :: [] -> v+3,false  
151 - | Capital("I",_) :: Capital("I",_) :: [] -> v+2,false  
152 - | Capital("I",_) :: [] -> v+1,false  
153 - | [] -> v,false  
154 - | Capital("I",_) :: Capital("I",_) :: Capital("I",_) :: Small("w") :: [] -> v+3,true  
155 - | Capital("I",_) :: Capital("I",_) :: Small("w") :: [] -> v+2,true  
156 - | Capital("I",_) :: Small("w") :: [] -> v+1,true  
157 - | Small("w") :: [] -> v,true  
158 - | _ -> 0,false  
159 -  
160 -let recognize_roman_V v = function  
161 - Capital("I",_) :: ForeignCapital("V",_) :: [] -> v+4,false  
162 - | ForeignCapital("V",_) :: l -> recognize_roman_I (v+5) l  
163 - | Capital("I",_) :: ForeignCapital("X",_) :: [] -> v+9,false  
164 - | Capital("I",_) :: ForeignCapital("V",_) :: Small("w") :: [] -> v+4,true  
165 - | Capital("I",_) :: ForeignCapital("X",_) :: Small("w") :: [] -> v+9,true  
166 - | l -> recognize_roman_I v l  
167 -  
168 -let recognize_roman_X v = function  
169 - | ForeignCapital("X",_) :: ForeignCapital("X",_) :: ForeignCapital("X",_) :: l -> recognize_roman_V (v+30) l  
170 - | ForeignCapital("X",_) :: ForeignCapital("X",_) :: l -> recognize_roman_V (v+20) l  
171 - | ForeignCapital("X",_) :: l -> recognize_roman_V (v+10) l  
172 - | l -> recognize_roman_V v l  
173 -  
174 -let recognize_roman_L v = function  
175 - ForeignCapital("X",_) :: Capital("L",_) :: l -> recognize_roman_V (v+40) l  
176 - | Capital("L",_) :: l -> recognize_roman_X (v+50) l  
177 - | ForeignCapital("X",_) :: Capital("C",_) :: l -> recognize_roman_V (v+90) l  
178 - | l -> recognize_roman_X v l  
179 -  
180 -let recognize_roman_C v = function  
181 - | Capital("C",_) :: Capital("C",_) :: Capital("C",_) :: l -> recognize_roman_L (v+300) l  
182 - | Capital("C",_) :: Capital("C",_) :: l -> recognize_roman_L (v+200) l  
183 - | Capital("C",_) :: l -> recognize_roman_L (v+100) l  
184 - | l -> recognize_roman_L v l  
185 -  
186 -let recognize_roman_D v = function  
187 - Capital("C",_) :: Capital("D",_) :: l -> recognize_roman_L (v+400) l  
188 - | Capital("D",_) :: l -> recognize_roman_C (v+500) l  
189 - | Capital("C",_) :: Capital("M",_) :: l -> recognize_roman_L (v+900) l  
190 - | l -> recognize_roman_C v l  
191 -  
192 -let recognize_roman_M v = function  
193 - | Capital("M",_) :: Capital("M",_) :: Capital("M",_) :: l -> recognize_roman_D (v+3000) l  
194 - | Capital("M",_) :: Capital("M",_) :: l -> recognize_roman_D (v+2000) l  
195 - | Capital("M",_) :: l -> recognize_roman_D (v+1000) l  
196 - | l -> recognize_roman_D v l  
197 -  
198 -let rec merge l =  
199 - String.concat "" (Xlist.map l (function  
200 - Capital(s,t) -> s  
201 - | ForeignCapital(s,t) -> s  
202 - | Small s -> s  
203 - | ForeignSmall s -> s  
204 - | _ -> failwith "merge"))  
205 -  
206 -let lowercase_first = function  
207 - [] -> []  
208 - | Capital(s,t) :: l -> Small t :: l  
209 - | ForeignCapital(s,t) :: l -> ForeignSmall t :: l  
210 - | Small s :: l -> Small s :: l  
211 - | ForeignSmall s :: l -> ForeignSmall s :: l  
212 - | _ -> failwith "lowercase_first"  
213 -  
214 -let rec lowercase_all = function  
215 - [] -> []  
216 - | Capital(s,t) :: l -> Small t :: lowercase_all l  
217 - | ForeignCapital(s,t) :: l -> ForeignSmall t :: lowercase_all l  
218 - | Small s :: l -> Small s :: lowercase_all l  
219 - | ForeignSmall s :: l -> ForeignSmall s :: lowercase_all l  
220 - | _ -> failwith "lowercase_all"  
221 -  
222 -let lowercase_rest = function  
223 - [] -> []  
224 - | x :: l -> x :: lowercase_all l  
225 -  
226 -let first_capital = function  
227 - Capital _ :: _ -> true  
228 - | ForeignCapital _ :: _ -> true  
229 - | Small _ :: _ -> false  
230 - | ForeignSmall _ :: _ -> false  
231 - | _ -> failwith "first_capital"  
232 -  
233 -let rec all_capital = function  
234 - Capital _ :: l -> all_capital l  
235 - | ForeignCapital _ :: l -> all_capital l  
236 - | Small _ :: l -> false  
237 - | ForeignSmall _ :: l -> false  
238 - | [] -> true  
239 - | _ -> failwith "first_capital"  
240 -  
241 -let rec all_small = function  
242 - Capital _ :: l -> false  
243 - | ForeignCapital _ :: l -> false  
244 - | Small _ :: l -> all_small l  
245 - | ForeignSmall _ :: l -> all_small l  
246 - | [] -> true  
247 - | _ -> failwith "first_capital"  
248 -  
249 -let rest_capital = function  
250 - [] -> failwith "rest_capital"  
251 - | _ :: l -> all_capital l  
252 -  
253 -let rest_small = function  
254 - [] -> failwith "rest_small"  
255 - | _ :: l -> all_small l  
256 -  
257 -let get_first_cap = function  
258 - | Capital(s,t) :: l -> s  
259 - | ForeignCapital(s,t) :: l -> s  
260 - | _ -> failwith "get_first_cap"  
261 -  
262 -let get_first_lower = function  
263 - | Capital(s,t) :: l -> t  
264 - | ForeignCapital(s,t) :: l -> t  
265 - | _ -> failwith "get_first_lower"  
266 -  
267 -(*let cs_weight = -1.  
268 -let sc_cap_weight = -0.3*)  
269 -  
270 -let is_add_attr_token = function  
271 - SmallLetter _ -> true  
272 - | CapLetter _ -> true  
273 - | AllSmall _ -> true  
274 - | AllCap _ -> true  
275 - | FirstCap _ -> true  
276 - | SomeCap _ -> true  
277 - | _ -> false  
278 -  
279 -let rec add_attr s = function  
280 - Token t -> if is_add_attr_token t.token then Token{t with attrs=s :: t.attrs} else Token t  
281 - | Variant l -> Variant(Xlist.map l (add_attr s))  
282 - | Seq l -> Seq(Xlist.map l (add_attr s))  
283 -  
284 -let recognize_stem poss_s_beg has_sufix i letters =  
285 - let orth = merge letters in  
286 - let t = {empty_token with orth=orth;beg=i;len=Xlist.size letters * factor;next=i+Xlist.size letters * factor} in  
287 - let t = if poss_s_beg then  
288 - if Xlist.size letters = 1 then  
289 - if first_capital letters then Variant[  
290 - Token{t with token=SmallLetter(merge (lowercase_first letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs};  
291 - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=SmallLetter(merge (lowercase_first letters)); attrs="maybe cs" :: t.attrs}];  
292 - Token{t with token=CapLetter(orth,merge (lowercase_first letters)); attrs="maybe cs" :: t.attrs};  
293 - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=CapLetter(orth,merge (lowercase_first letters)); (*weight=sc_cap_weight;*) attrs="maybe cs" :: t.attrs}]]  
294 - else Token{t with token=SmallLetter orth}  
295 - else  
296 - if first_capital letters then  
297 - if rest_small letters then Variant[  
298 - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=AllSmall(merge (lowercase_first letters))}];  
299 - Token{t with token=FirstCap(orth,merge (lowercase_first letters),get_first_cap letters,get_first_lower letters)};  
300 - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=FirstCap(orth,merge (lowercase_first letters),get_first_cap letters,get_first_lower letters); (*weight=sc_cap_weight*)}]]  
301 - else if rest_capital letters then Variant([  
302 - Token{t with token=AllSmall(merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs};  
303 - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=AllSmall(merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs}];  
304 - Token{t with token=FirstCap(merge (lowercase_rest letters),merge (lowercase_all letters),get_first_cap letters,get_first_lower letters); (*weight=cs_weight;*) attrs="cs" :: t.attrs};  
305 - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=FirstCap(merge (lowercase_rest letters),merge (lowercase_all letters),get_first_cap letters,get_first_lower letters); (*weight=cs_weight+.sc_cap_weight;*) attrs="cs" :: t.attrs}]] @  
306 - (if has_sufix then [] else [  
307 - Token{t with token=AllCap(orth,merge (lowercase_rest letters),merge (lowercase_all letters)); attrs="maybe cs" :: t.attrs};  
308 - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=AllCap(orth,merge (lowercase_rest letters),merge (lowercase_all letters)); attrs="maybe cs" :: t.attrs}]]))  
309 - else Token{t with token=SomeCap orth}  
310 - else  
311 - if rest_small letters then Token{t with token=AllSmall orth}  
312 - else Token{t with token=SomeCap orth}  
313 - else  
314 - if Xlist.size letters = 1 then  
315 - if first_capital letters then Variant[  
316 - Token{t with token=SmallLetter orth; (*weight=cs_weight;*) attrs="cs" :: t.attrs};  
317 - Token{t with token=CapLetter(orth,merge (lowercase_first letters)); attrs="maybe cs" :: t.attrs}]  
318 - else Token{t with token=SmallLetter orth}  
319 - else  
320 - if first_capital letters then  
321 - if rest_small letters then  
322 - Token{t with token=FirstCap(orth,merge (lowercase_first letters),get_first_cap letters,get_first_lower letters)}  
323 - else if rest_capital letters then Variant([  
324 - Token{t with token=AllSmall(merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs};  
325 - Token{t with token=FirstCap(merge (lowercase_rest letters),merge (lowercase_all letters),get_first_cap letters,get_first_lower letters); (*weight=cs_weight;*) attrs="cs" :: t.attrs}] @  
326 - (if has_sufix then [] else [  
327 - Token{t with token=AllCap(orth,merge (lowercase_rest letters),merge (lowercase_all letters)); attrs="maybe cs" :: t.attrs}]))  
328 - else Token{t with token=SomeCap orth}  
329 - else  
330 - if rest_small letters then Token{t with token=AllSmall orth}  
331 - else Token{t with token=SomeCap orth} in  
332 - if has_sufix then add_attr "required validated lemmatization" t else t  
333 -  
334 -let parse_postags s =  
335 - List.map (fun s ->  
336 - match List.map (fun t -> Str.split (Str.regexp "\\.") t) (Str.split (Str.regexp ":") s) with  
337 - [pos] :: tags -> pos, tags  
338 - | _ -> failwith ("parse_postags: " ^ s)) (Str.split (Str.regexp "|") s)  
339 -  
340 -let make_lemma (lemma,interp) =  
341 - match parse_postags interp with  
342 - [pos,tags] -> Lemma(lemma,pos,[tags])  
343 - | _ -> failwith "make_lemma"  
344 -  
345 -let merge_attrs l =  
346 -(* print_endline (String.concat " " (Xlist.map l (fun token -> "[" ^ token.orth ^ " " ^ String.concat ";" token.attrs ^ "]"))); *)  
347 - let len = Xlist.size l in  
348 - let attrs = Xlist.fold l StringQMap.empty (fun attrs token ->  
349 - Xlist.fold token.attrs attrs StringQMap.add) in  
350 - let n_cs = try StringQMap.find attrs "cs" with Not_found -> 0 in  
351 - let n_maybe_cs = try StringQMap.find attrs "maybe cs" with Not_found -> 0 in  
352 - let new_attrs =  
353 - (if n_cs > 0 then  
354 - if n_cs + n_maybe_cs = len then ["cs"] else raise Not_found  
355 - else  
356 - if n_maybe_cs = len then ["maybe cs"] else []) @  
357 - (StringQMap.fold attrs [] (fun attrs attr _ -> if attr = "cs" || attr = "maybe cs" then attrs else attr :: attrs)) in  
358 -(* print_endline (String.concat " " new_attrs); *)  
359 - new_attrs  
360 -  
361 -let suffix_lemmata = Xlist.fold [  
362 - "em",make_lemma ("być","aglt:sg:pri:imperf:wok");  
363 - "eś",make_lemma ("być","aglt:sg:sec:imperf:wok");  
364 - "eście",make_lemma ("być","aglt:pl:sec:imperf:wok");  
365 - "eśmy",make_lemma ("być","aglt:pl:pri:imperf:wok");  
366 - "m",make_lemma ("być","aglt:sg:pri:imperf:nwok");  
367 - "ś",make_lemma ("być","aglt:sg:sec:imperf:nwok");  
368 - "ście",make_lemma ("być","aglt:pl:sec:imperf:nwok");  
369 - "śmy",make_lemma ("być","aglt:pl:pri:imperf:nwok");  
370 - "by",make_lemma ("by","qub");  
371 - ] StringMap.empty (fun map (suf,lemma) -> StringMap.add map suf lemma)  
372 -  
373 -let recognize_suffix i letters =  
374 - let orth = merge letters in  
375 - let t = {empty_token with orth=orth;beg=i;len=Xlist.size letters * factor;next=i+Xlist.size letters * factor} in  
376 - if all_capital letters then Token{t with token=StringMap.find suffix_lemmata (merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs}  
377 - else if all_small letters then Token{t with token=StringMap.find suffix_lemmata orth}  
378 - else raise Not_found  
379 -  
380 -let recognize_romandig poss_s_beg i letters =  
381 - let roman,w = recognize_roman_M 0 letters in  
382 - if roman > 0 then  
383 - let letters,w = if w then let l = List.rev letters in List.rev (List.tl l), [List.hd l] else letters,[] in  
384 - let orth = merge letters in  
385 - let roman = string_of_int roman in  
386 - let t = {empty_token with orth=orth;beg=i;len=Xlist.size letters * factor;next=i+Xlist.size letters * factor} in  
387 - let w = if w = [] then [] else  
388 - let beg = i + Xlist.size letters * factor in  
389 - [Variant[Token{empty_token with orth=merge w; beg=beg; len=factor; next=beg+factor; token=SmallLetter(merge w)};  
390 - Token{empty_token with orth=merge w; beg=beg; len=factor; next=beg+factor; token=make_lemma ("wiek","subst:sg:_:m3")}]] in  
391 - if StringSet.mem romanmonths orth then [  
392 - Seq(Token{t with token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}::w);  
393 - Seq(Token{t with token=RomanDig(roman,"month"); attrs="maybe cs" :: t.attrs}::w)] @  
394 - (if poss_s_beg then [  
395 - Seq([s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}]@w);  
396 - Seq([s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=RomanDig(roman,"month"); attrs="maybe cs" :: t.attrs}]@w);  
397 - ] else [])  
398 - else [  
399 - Seq(Token{t with token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}::w)] @  
400 - (if poss_s_beg then [  
401 - Seq([s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}]@w);  
402 - ] else [])  
403 - else []  
404 -  
405 -let sufixes1 = Xlist.map [  
406 - ["m"];  
407 - ["e"; "m"];  
408 - ["ś"];  
409 - ["e"; "ś"];  
410 - ["ś"; "m"; "y"];  
411 - ["e"; "ś"; "m"; "y"];  
412 - ["ś"; "c"; "i"; "e"];  
413 - ["e"; "ś"; "c"; "i"; "e"];  
414 - ] List.rev  
415 -  
416 -let sufixes2 = Xlist.map [  
417 - ["b"; "y"];  
418 - ] List.rev  
419 -  
420 -let rec find_suffix rev = function  
421 - _, [] -> raise Not_found  
422 - | [], l -> rev, l  
423 - | s :: pat, Capital(c,t) :: l -> if s = t then find_suffix (Capital(c,t) :: rev) (pat,l) else raise Not_found  
424 - | s :: pat, Small t :: l -> if s = t then find_suffix (Small t :: rev) (pat,l) else raise Not_found  
425 - | _,_ -> raise Not_found  
426 -  
427 -let find_suffixes2 sufixes letters sufs =  
428 - Xlist.fold sufixes [] (fun l suf ->  
429 - try  
430 - let suf,rev_stem = find_suffix [] (suf,letters) in  
431 - (rev_stem,suf :: sufs) :: l  
432 - with Not_found -> l)  
433 -  
434 -let find_suffixes i letters =  
435 - let letters = List.rev letters in  
436 - let l = (letters,[]) :: find_suffixes2 sufixes1 letters [] in  
437 - let l = Xlist.fold l l (fun l (letters,sufs) ->  
438 - (find_suffixes2 sufixes2 letters sufs) @ l) in  
439 - Xlist.map l (fun (rev_stem, sufs) ->  
440 - List.rev (fst (Xlist.fold (List.rev rev_stem :: sufs) ([],i) (fun (seq,i) letters ->  
441 - (letters,i) :: seq, i + factor * Xlist.size letters))))  
442 -  
443 -let merge_letters poss_s_beg i letters =  
444 - let l = find_suffixes i letters in  
445 - let roman = recognize_romandig poss_s_beg i letters in  
446 - let variants = Xlist.fold l roman (fun variants -> function  
447 - [] -> failwith "merge_letters"  
448 - | [stem,i] -> (recognize_stem poss_s_beg false i stem) :: variants  
449 - | (stem,i) :: suffixes ->  
450 - (try (Seq((recognize_stem poss_s_beg true i stem) :: Xlist.map suffixes (fun (suf,i) -> recognize_suffix i suf))) :: variants  
451 - with Not_found -> variants)) in  
452 - Variant variants  
453 -  
454 -let rec group_digits rev = function  
455 - [] -> List.rev rev, []  
456 - | Digit s :: l -> group_digits (s :: rev) l  
457 - | x :: l -> List.rev rev, x :: l  
458 -  
459 -let rec group_letters rev = function  
460 - [] -> List.rev rev, []  
461 - | Capital(s,t) :: l -> group_letters ((Capital(s,t)) :: rev) l  
462 - | ForeignCapital(s,t) :: l -> group_letters ((ForeignCapital(s,t)) :: rev) l  
463 - | Small s :: l -> group_letters ((Small s) :: rev) l  
464 - | ForeignSmall s :: l -> group_letters ((ForeignSmall s) :: rev) l  
465 - | x :: l -> List.rev rev, x :: l  
466 -  
467 -let rec group_others rev = function  
468 - [] -> List.rev rev, []  
469 - | Other(s,_) :: l -> group_others (s :: rev) l  
470 - | x :: l -> List.rev rev, x :: l  
471 -  
472 -let create_sign_token poss_s_beg i signs l token =  
473 - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in  
474 - let len = Xlist.size signs * factor in  
475 - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=token; attrs=["maybe cs"]},i+len,l,poss_s_beg  
476 -  
477 -let create_empty_sign_token i signs =  
478 - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in  
479 - let len = Xlist.size signs * factor in  
480 - {empty_token with orth=orth;beg=i;len=len;next=i+len; attrs=["maybe cs"]},i+len  
481 -  
482 -let create_sentence_seq i signs l lemma =  
483 - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in  
484 - let len = Xlist.size signs * factor in  
485 - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"};  
486 - Token{empty_token with orth=orth;beg=i+20;len=len-30;next=i+len-10;token=make_lemma (lemma,"sinterj")};  
487 - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}]  
488 -  
489 -let create_sentence_seq_hapl i signs l lemma =  
490 - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in  
491 - let len = Xlist.size signs * factor in  
492 - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]};  
493 - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</clause>"};  
494 - Token{empty_token with orth=orth;beg=i+20;len=len-30;next=i+len-10;token=make_lemma (lemma,"sinterj")};  
495 - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}]  
496 -  
497 -let create_sentence_seq_q i signs l lemma =  
498 - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in  
499 - let len = Xlist.size signs * factor in  
500 - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "?"};  
501 - Token{empty_token with beg=i+20;len=10;next=i+30;token=Interp "</clause>"};  
502 - Token{empty_token with orth=orth;beg=i+30;len=len-40;next=i+len-10;token=make_lemma (lemma,"sinterj")};  
503 - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}]  
504 -  
505 -let create_sentence_seq_hapl_q i signs l lemma =  
506 - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in  
507 - let len = Xlist.size signs * factor in  
508 - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]};  
509 - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "?"};  
510 - Token{empty_token with beg=i+20;len=10;next=i+30;token=Interp "</clause>"};  
511 - Token{empty_token with orth=orth;beg=i+30;len=len-40;next=i+len-10;token=make_lemma (lemma,"sinterj")};  
512 - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}]  
513 -  
514 -let create_or_beg i signs l poss_s_beg =  
515 - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in  
516 - let len = Xlist.size signs * factor in  
517 - Variant[  
518 - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Symbol "-"; attrs=["maybe cs"]};  
519 - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Interp "-"; attrs=["maybe cs"]}; (* hyphen *)  
520 - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Interp "<or>"};  
521 - (* Seq[Token{empty_token with beg=i; len=20; next=i+20; token=Interp "<sentence>"};  
522 - Token{empty_token with orth=orth;beg=i+20; len=len-20;next=i+len; token=Interp "<or>"}]; *)  
523 - Seq[Token{empty_token with beg=i; len=21; next=i+21; token=Interp "</clause>"};  
524 - Token{empty_token with beg=i+21; len=20; next=i+41; token=Interp "</sentence>"};  
525 - Token{empty_token with orth=orth;beg=i+41; len=len-59;next=i+len-20;token=Interp "</or>"};  
526 - Token{empty_token with beg=i+len-20;len=20; next=i+len; token=Interp "<clause>"}];  
527 - Seq[Token{empty_token with orth=orth;beg=i; len=len-22;next=i+len-22;token=Interp "</or>"};  
528 - Token{empty_token with beg=i+len-22;len=22; next=i+len; token=Interp "<clause>"}];  
529 - ],i+len,l,poss_s_beg  
530 -  
531 -let create_or_beg2 i signs l poss_s_beg =  
532 - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in  
533 - let len = Xlist.size signs * factor in  
534 - Variant[  
535 - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Interp "<or>"};  
536 - (* Seq[Token{empty_token with beg=i; len=20; next=i+20; token=Interp "<sentence>"};  
537 - Token{empty_token with orth=orth;beg=i+20; len=len-20;next=i+len; token=Interp "<or>"}]; *)  
538 - Seq[Token{empty_token with beg=i; len=21; next=i+21; token=Interp "</clause>"};  
539 - Token{empty_token with beg=i+21; len=20; next=i+41; token=Interp "</sentence>"};  
540 - Token{empty_token with orth=orth;beg=i+41; len=len-59;next=i+len-20;token=Interp "</or>"};  
541 - Token{empty_token with beg=i+len-20;len=20; next=i+len; token=Interp "<clause>"}];  
542 - Seq[Token{empty_token with orth=orth;beg=i; len=len-22;next=i+len-22;token=Interp "</or>"};  
543 - Token{empty_token with beg=i+len-22;len=22; next=i+len; token=Interp "<clause>"}];  
544 - ],i+len,l,poss_s_beg  
545 -  
546 -let is_dot_sentence_end_marker = function  
547 - [] -> true  
548 - | [Sign " "] -> true  
549 - | [Sign ""] -> true  
550 - | [Sign " "] -> true  
551 - | [Sign "\""] -> true  
552 - | [Sign "»"] -> true  
553 - | [Sign "”"] -> true  
554 - | _ -> false  
555 -  
556 -let not_dot_sentence_end_marker = function  
557 - Sign " " :: Small _ :: _ -> true  
558 - | Sign "" :: Small _ :: _ -> true  
559 - | Sign " " :: Small _ :: _ -> true  
560 - | Sign "," :: _ -> true  
561 - | Sign ":" :: _ -> true  
562 - | Sign "?" :: _ -> true  
563 - | Sign "!" :: _ -> true  
564 - | Small _ :: _ -> true  
565 - | ForeignSmall _ :: _ -> true  
566 - | Capital _ :: _ -> true  
567 - | ForeignCapital _ :: _ -> true  
568 - | Digit _ :: _ -> true  
569 - | _ -> false  
570 -  
571 -let is_comma_digit_marker = function  
572 - Digit _ :: l -> true  
573 - | _ -> false  
574 -  
575 -let is_colon_sentence_end_marker = function  
576 - [] -> true  
577 - | [Sign " "] -> true  
578 - | [Sign ""] -> true  
579 - | [Sign " "] -> true  
580 - | _ -> false  
581 -  
582 -let is_colon_symbol = function  
583 - Digit _ :: _ -> true  
584 - | Sign "/" :: _ -> true  
585 - | _ -> false  
586 -  
587 -let is_multidot_sentence_end_marker = function  
588 - [] -> true  
589 - | [Sign " "] -> true  
590 - | [Sign ""] -> true  
591 - | [Sign " "] -> true  
592 - | [Sign "\""] -> true  
593 - | [Sign "»"] -> true  
594 - | [Sign "”"] -> true  
595 -(* | "\"" :: l -> true  
596 - | "»" :: l -> true  
597 - | "”" :: l -> true  
598 - | "“" :: l -> true  
599 - | " " :: "-" :: l -> true  
600 - | " " :: "–" :: l -> true  
601 - | " " :: "—" :: l -> true  
602 - | ")" :: l -> true  
603 - | "]" :: l -> true*)  
604 - | _ -> false  
605 -  
606 -let create_quot_digit_token i signs l =  
607 - let t,i2 = create_empty_sign_token i signs in  
608 - Variant[  
609 - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"};  
610 - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"};  
611 - Token{t with beg=t.beg+factor; next=t.next+factor;token=Interp "”s"}];  
612 - Seq[Token{t with token=Interp "”"};  
613 - Token{empty_token with beg=i2;len=20;next=i2+20;token=Interp "</clause>"};  
614 - Token{empty_token with orth=".";beg=i2+20;len=factor-20;next=i2+factor;token=Interp "</sentence>"}];  
615 - ],i2+factor,l,true  
616 -  
617 -let rec recognize_sign_group poss_s_beg i = function  
618 - | (Sign " ") :: l -> create_sign_token poss_s_beg i [Sign " "] l (Symbol " ")  
619 - | (Sign "") :: l -> create_sign_token poss_s_beg i [Sign ""] l (Symbol " ")  
620 - | (Sign " ") :: l -> create_sign_token poss_s_beg i [Sign " "] l (Symbol " ")  
621 - | (Sign "\"") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "\""] l  
622 - | (Sign "\"") :: l ->  
623 - let t,i = create_empty_sign_token i [Sign "\""] in  
624 - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"};Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg  
625 - | (Sign "˝") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "˝"] l  
626 - | (Sign "˝") :: l ->  
627 - let t,i = create_empty_sign_token i [Sign "˝"] in  
628 - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"};Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg  
629 - | (Sign "„") :: l ->  
630 - let t,i = create_empty_sign_token i [Sign "„"] in  
631 - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"}],i,l,poss_s_beg  
632 - | (Sign "”") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "”"] l  
633 - | (Sign "”") :: l ->  
634 - let t,i = create_empty_sign_token i [Sign "”"] in  
635 - Variant[Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg  
636 - | (Sign "“") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "“"] l  
637 - | (Sign "“") :: l ->  
638 - let t,i = create_empty_sign_token i [Sign "“"] in  
639 - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"};Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg  
640 - | (Sign ",") :: (Sign ",") :: l ->  
641 - let t,i = create_empty_sign_token i [Sign ",";Sign ","] in  
642 - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"}],i,l,poss_s_beg  
643 - | (Sign ",") :: l ->  
644 - let t,i2 = create_empty_sign_token i [Sign ","] in  
645 - if is_comma_digit_marker l then  
646 - Token{t with token=Symbol ","},i2,l,false  
647 - else  
648 - Variant[Token{t with token=Interp ","};  
649 - Seq[Token{empty_token with orth=",";beg=i;len=factor/2;next=i+factor/2;token=Interp "</clause>"};  
650 - Token{empty_token with beg=i+factor/2;len=factor-(factor/2);next=i+factor;token=Interp "<clause>"}]],i2,l,false  
651 - | (Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: []) l (make_lemma ("(…)","sinterj"))  
652 - | (Sign "(") :: (Sign "?") :: (Sign "!") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "?") :: (Sign "!") :: (Sign ")") :: []) l (make_lemma ("(?!)","sinterj"))  
653 - | (Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: []) l (make_lemma ("(…)","sinterj"))  
654 - | (Sign "(") :: (Sign "+") :: (Sign "+") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "+") :: (Sign "+") :: (Sign ")") :: []) l (make_lemma ("(++)","sinterj"))  
655 - | (Sign "(") :: (Sign "-") :: (Sign "-") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "-") :: (Sign "-") :: (Sign ")") :: []) l (make_lemma ("(--)","symbol"))  
656 - | (Sign "(") :: (Sign "…") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "…") :: (Sign ")") :: []) l (make_lemma ("(…)","sinterj"))  
657 - | (Sign "(") :: (Sign "?") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "?") :: (Sign ")") :: []) l (make_lemma ("(?)","sinterj"))  
658 - | (Sign "(") :: (Sign "+") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "+") :: (Sign ")") :: []) l (make_lemma ("(+)","symbol"))  
659 - | (Sign "(") :: (Sign "!") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "!") :: (Sign ")") :: []) l (make_lemma ("(!)","sinterj"))  
660 - | (Sign "(") :: (Sign "-") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "-") :: (Sign ")") :: []) l (make_lemma ("(-)","symbol"))  
661 - | (Sign "(") :: (Sign "*") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "*") :: (Sign ")") :: []) l (make_lemma ("(*)","symbol"))  
662 - | (Sign "(") :: l -> create_sign_token poss_s_beg i [Sign "("] l (Interp "(")  
663 - | (Sign ":") :: (Sign "(") :: (Sign "(") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "(") :: (Sign "(") :: (Sign "(") :: []) l (make_lemma (":(((","sinterj"))  
664 - | (Sign ":") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "(") :: []) l (make_lemma (":(","sinterj"))  
665 - | (Sign ":") :: (Sign "-") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign "(") :: []) l (make_lemma (":-(","sinterj"))  
666 - | (Sign ";") :: (Sign "(") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "(") :: (Sign "(") :: []) l (make_lemma (";((","sinterj"))  
667 - | (Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";-))))","sinterj"))  
668 - | (Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":-))))","sinterj"))  
669 - | (Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":-)))","sinterj"))  
670 - | (Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";-)))","sinterj"))  
671 - | (Sign ";") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";)))","sinterj"))  
672 - | (Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":-))","sinterj"))  
673 - | (Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";-))","sinterj"))  
674 - | (Sign ":") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":)))","sinterj"))  
675 - | (Sign ":") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":))","sinterj"))  
676 - | (Sign ";") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";))","sinterj"))  
677 - | (Sign ";") :: (Sign "-") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: []) l (make_lemma (";-)","sinterj"))  
678 - | (Sign ":") :: (Sign "|") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "|") :: []) l (make_lemma (":|","sinterj"))  
679 - | (Sign ":") :: (Sign "\\") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "\\") :: []) l (make_lemma (":\\","sinterj"))  
680 - | (Sign ":") :: (Sign "-") :: (Sign "/") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign "/") :: []) l (make_lemma (":-/","sinterj"))  
681 - | (Sign ":") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign ")") :: []) l (make_lemma (":)","sinterj"))  
682 - | (Sign ";") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign ")") :: []) l (make_lemma (";)","sinterj"))  
683 - | (Sign ")") :: l -> create_sign_token poss_s_beg i [Sign ")"] l (Interp ")")  
684 - | (Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: []) l (make_lemma ("(…)","sinterj"))  
685 - | (Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: []) l (make_lemma ("(…)","sinterj"))  
686 - | (Sign "[") :: (Sign "+") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign "+") :: (Sign "]") :: []) l (make_lemma ("[+]","symbol"))  
687 - | (Sign "[") :: (Sign "-") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign "-") :: (Sign "]") :: []) l (make_lemma ("[-]","symbol"))  
688 - | (Sign "[") :: (Sign "?") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign "?") :: (Sign "]") :: []) l (make_lemma ("[?]","sinterj"))  
689 - | (Sign ":") :: (Sign "]") :: l ->  
690 - let t,i2 = create_empty_sign_token i [Sign ":";Sign "]"] in  
691 - Variant[Token{t with token=make_lemma (":]","sinterj")};  
692 - Seq[Token{empty_token with orth=":";beg=i;len=factor;next=i+factor;token=Interp ":"; attrs=["maybe cs"]};  
693 - Token{empty_token with orth="]";beg=i+factor;len=factor;next=i+2*factor;token=Interp "]"; attrs=["maybe cs"]}]],i2,l,false  
694 - | (Sign ";") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "]") :: []) l (make_lemma (";]","sinterj"))  
695 - | (Sign "]") :: l -> create_sign_token poss_s_beg i [Sign "]"] l (Interp "]")  
696 - | (Sign "[") :: l -> create_sign_token poss_s_beg i [Sign "["] l (Interp "[")  
697 - | (Sign ":") :: l ->  
698 - if is_colon_symbol l then  
699 - Token{empty_token with orth=":";beg=i;len=factor;next=i+factor;token=Symbol ":"; attrs=["maybe cs"]},i+factor,l,false  
700 - else  
701 - Variant[  
702 - Seq[Token{empty_token with beg=i;len=11;next=i+11;token=Interp "</clause>"}; (* wyliczenie*)  
703 - Token{empty_token with orth=":";beg=i+11;len=factor-11;next=i+factor;token=Interp "<clause>"}];  
704 - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"};  
705 - Token{empty_token with orth=":";beg=i+10;len=factor-30;next=i+factor-20;token=Interp ":"}; (* mowa zależna, koniec zdania *)  
706 - Token{empty_token with beg=i+factor-20;len=20;next=i+factor;token=Interp "</sentence>"}];  
707 - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"};  
708 - Token{empty_token with orth=":";beg=i+10;len=factor-40;next=i+factor-30;token=Interp ":"}; (* po ':' zdanie z małej litery *)  
709 - Token{empty_token with beg=i+factor-30;len=10;next=i+factor-20;token=Interp "</sentence>"};  
710 - Token{empty_token with beg=i+factor-20;len=10;next=i+factor-10;token=Interp "<sentence>"};  
711 - Token{empty_token with beg=i+factor-10;len=10;next=i+factor;token=Interp "<clause>"}];  
712 - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"};  
713 - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</sentence>"};  
714 - Token{empty_token with orth=":";beg=i+20;len=factor-20;next=i+factor;token=Interp ":s"}]; (* speaker *)  
715 - ],i+factor,l,true  
716 -(* if is_colon_sentence_end_marker l then  
717 - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"};  
718 - Token{empty_token with orth=":";beg=i+10;len=10;next=i+20;token=Interp ":"};  
719 - Token{empty_token with beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}],i+factor,l,true  
720 - else  
721 - else  
722 - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"};  
723 - Token{empty_token with orth=":";beg=i+10;len=10;next=i+20;token=Interp ""};  
724 - Token{empty_token with beg=i+20;len=factor-20;next=i+factor;token=Interp "<clause>"}],i+factor,l,false*)  
725 - | (Sign "'") :: (Sign "'") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "'";Sign "'"] l  
726 - | (Sign "'") :: (Sign "'") :: l ->  
727 - let t,i = create_empty_sign_token i [Sign "”"] in  
728 - Variant[Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg  
729 - | (Sign "'") :: l -> create_sign_token poss_s_beg i [Sign "'"] l (Symbol "’")  
730 - | (Sign "’") :: (Sign "’") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "’";Sign "’"] l  
731 - | (Sign "’") :: (Sign "’") :: l ->  
732 - let t,i = create_empty_sign_token i [Sign "”"] in  
733 - Variant[Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg  
734 - | (Sign "’") :: l -> create_sign_token poss_s_beg i [Sign "’"] l (Symbol "’")  
735 - | (Sign ";") :: (Sign "*") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "*") :: []) l (make_lemma (";*","sinterj"))  
736 - | (Sign ";") :: l ->  
737 - Variant[Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"};  
738 - Token{empty_token with orth=";";beg=i+20;len=20;next=i+40;token=Interp "</sentence>"};  
739 - Token{empty_token with beg=i+40;len=20;next=i+60;token=Interp "<sentence>"};  
740 - Token{empty_token with beg=i+60;len=factor-60;next=i+factor;token=Interp "<clause>"}];  
741 - Token{empty_token with orth=";";beg=i;len=factor;next=i+factor;token=Interp ";"; attrs=["maybe cs"]}],i+factor,l,false  
742 - | (Sign "?") :: (Sign "!") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l ->  
743 - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "?!...",i+5*factor,l,true  
744 - | (Sign "?") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l ->  
745 - create_sentence_seq_q i ((Sign "?") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "?...",i+4*factor,l,true  
746 - | (Sign "?") :: (Sign "?") :: (Sign "?") :: (Sign "?") :: l ->  
747 - create_sentence_seq_q i ((Sign "?") :: (Sign "?") :: (Sign "?") :: (Sign "?") :: []) l "????",i+4*factor,l,true  
748 - | (Sign "?") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: l ->  
749 - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: []) l "?!!!",i+4*factor,l,true  
750 - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "?") :: l ->  
751 - Variant[create_sentence_seq_hapl_q i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "?") :: []) l "…?";  
752 - create_sentence_seq_q i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "?") :: []) l "…?"],i+4*factor,l,true  
753 - | (Sign "?") :: (Sign "!") :: (Sign "?") :: l ->  
754 - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: (Sign "?") :: []) l "?!?",i+3*factor,l,true  
755 - | (Sign "?") :: (Sign "?") :: (Sign "?") :: l ->  
756 - create_sentence_seq_q i ((Sign "?") :: (Sign "?") :: (Sign "?") :: []) l "???",i+3*factor,l,true  
757 - | (Sign "?") :: (Sign "!") :: l ->  
758 - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: []) l "?!",i+2*factor,l,true  
759 - | (Sign "?") :: (Sign "?") :: l ->  
760 - create_sentence_seq_q i ((Sign "?") :: (Sign "?") :: []) l "??",i+2*factor,l,true  
761 -(* | (Sign "?") :: (Sign ".") :: l -> *)  
762 - | (Sign "!") :: (Sign "?") :: l ->  
763 - create_sentence_seq_q i ((Sign "!") :: (Sign "?") :: []) l "!?",i+2*factor,l,true  
764 - | (Sign "?") :: (Sign "…") :: l ->  
765 - create_sentence_seq_q i ((Sign "?") :: (Sign "…") :: []) l "?…",i+2*factor,l,true  
766 - | (Sign "…") :: (Sign "?") :: l ->  
767 - Variant[create_sentence_seq_hapl_q i ((Sign "…") :: (Sign "?") :: []) l "…?";  
768 - create_sentence_seq_q i ((Sign "…") :: (Sign "?") :: []) l "…?"],i+2*factor,l,true  
769 - | (Sign "?") :: l ->  
770 - create_sentence_seq_q i ((Sign "?") :: []) l "?",i+factor,l,true  
771 - | (Sign "!") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: l ->  
772 - create_sentence_seq i ((Sign "!") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: []) l "!!!!",i+4*factor,l,true  
773 - | (Sign "!") :: (Sign "!") :: (Sign "!") :: l ->  
774 - create_sentence_seq i ((Sign "!") :: (Sign "!") :: (Sign "!") :: []) l "!!!",i+3*factor,l,true  
775 - | (Sign "!") :: (Sign "!") :: l ->  
776 - create_sentence_seq i ((Sign "!") :: (Sign "!") :: []) l "!!",i+2*factor,l,true  
777 - | (Sign "!") :: l ->  
778 - create_sentence_seq i ((Sign "!") :: []) l "!",i+factor,l,true  
779 - | (Sign "…") :: l ->  
780 - if is_multidot_sentence_end_marker l then  
781 - Variant[create_sentence_seq_hapl i ((Sign "…") :: []) l "…";  
782 - create_sentence_seq i ((Sign "…") :: []) l "…"],i+factor,l,true  
783 - else  
784 - Variant[create_sentence_seq_hapl i ((Sign "…") :: []) l "…";  
785 - create_sentence_seq i ((Sign "…") :: []) l "…";  
786 - Token{empty_token with orth="…";beg=i;len=factor;next=i+factor;token=make_lemma ("…","sinterj"); attrs=["maybe cs"]}],i+factor,l,true  
787 - | (Sign "/") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "/") :: l -> create_sign_token poss_s_beg i ((Sign "/") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "/") :: []) l (make_lemma ("(…)","sinterj"))  
788 - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> (* Różne natęrzenia wielokropka i wypunktowania *)  
789 - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……";  
790 - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+8*factor,l,true  
791 - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l ->  
792 - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……";  
793 - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+7*factor,l,true  
794 - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l ->  
795 - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……";  
796 - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+6*factor,l,true  
797 - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l ->  
798 - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……";  
799 - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+5*factor,l,true  
800 - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l ->  
801 - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……";  
802 - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+4*factor,l,true  
803 - | (Sign ".") :: (Sign ".") :: (Sign ".") :: l ->  
804 - if is_multidot_sentence_end_marker l then  
805 - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…";  
806 - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"],i+3*factor,l,true  
807 - else  
808 - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…";  
809 - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…";  
810 - Token{empty_token with orth="...";beg=i;len=3*factor;next=i+3*factor;token=make_lemma ("…","sinterj"); attrs=["maybe cs"]}],i+3*factor,l,true  
811 - | (Sign ".") :: (Sign ".") :: l ->  
812 - if is_multidot_sentence_end_marker l then  
813 - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: []) l "…";  
814 - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"],i+2*factor,l,true  
815 - else  
816 - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: []) l "…";  
817 - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…";  
818 - Token{empty_token with orth="..";beg=i;len=2*factor;next=i+2*factor;token=make_lemma ("…","sinterj"); attrs=["maybe cs"]}],i+2*factor,l,true  
819 - | (Sign ".") :: l ->  
820 - if is_dot_sentence_end_marker l then  
821 - Variant[Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]};  
822 - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</clause>"};  
823 - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}];  
824 - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"};  
825 - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}]],i+factor,l,true  
826 - else if not_dot_sentence_end_marker l then  
827 - Token{empty_token with orth=".";beg=i;len=factor;next=i+factor;token=Symbol "."; attrs=["maybe cs"]},i+factor,l,false  
828 - else  
829 - Variant[Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]};  
830 - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</clause>"};  
831 - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}];  
832 - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"};  
833 - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}];  
834 - Token{empty_token with orth=".";beg=i;len=factor;next=i+factor;token=Symbol "."; attrs=["maybe cs"]}],i+factor,l,true  
835 - | (Sign "*") :: (Sign "*") :: (Sign "*") :: (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*";Sign "*";Sign "*";Sign "*"] l (Interp "*****") (* zastępniki liter *)  
836 - | (Sign "*") :: (Sign "*") :: (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*";Sign "*";Sign "*"] l (Interp "****")  
837 - | (Sign "*") :: (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*";Sign "*"] l (Interp "***")  
838 - | (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*"] l (Interp "**")  
839 - | (Sign "*") :: l -> (* Interp zastępnik liter i cudzysłów, symbol listy *)  
840 - let t,i = create_empty_sign_token i [Sign "*"] in  
841 - Variant[Token{t with token=Interp "*"};Token{t with token=Symbol "*"}],i,l,poss_s_beg  
842 - | (Sign "+") :: l -> create_sign_token poss_s_beg i [Sign "+"] l (Symbol "+")  
843 - | (Sign "«") :: l ->  
844 - let t,i = create_empty_sign_token i [Sign "«"] in  
845 - Variant[Token{t with token=Interp "«"};Token{t with token=Interp "«s"}],i,l,poss_s_beg  
846 - | (Sign "»") :: l ->  
847 - let t,i = create_empty_sign_token i [Sign "»"] in  
848 - Variant[Token{t with token=Interp "»"};Token{t with token=Interp "»s"}],i,l,poss_s_beg  
849 - | (Sign "<") :: (Sign "<") :: l -> create_sign_token poss_s_beg i [Sign "<";Sign "<"] l (Interp "«") (* prawy cudzysłów *)  
850 - | (Sign "<") :: l -> (* prawy cudzysłów i element wzoru matematycznego *)  
851 - let t,i = create_empty_sign_token i [Sign "<"] in  
852 - Variant[Token{t with token=Interp "«"};Token{t with token=Symbol "<"}],i,l,poss_s_beg  
853 - | (Sign ">") :: (Sign ">") :: l -> create_sign_token poss_s_beg i [Sign ">";Sign ">"] l (Interp "»") (* lewy cudzysłów *)  
854 - | (Sign ">") :: l -> create_sign_token poss_s_beg i [Sign ">"] l (Symbol ">")  
855 - | (Sign "-") :: (Sign "-") :: (Sign "-") :: l -> create_or_beg2 i [Sign "-";Sign "-";Sign "-"] l poss_s_beg  
856 - | (Sign "-") :: (Sign "-") :: l -> create_or_beg2 i [Sign "-";Sign "-"] l poss_s_beg  
857 - | (Sign "-") :: l -> create_or_beg i [Sign "-"] l poss_s_beg  
858 - | (Sign "‐") :: l -> create_or_beg i [Sign "‐"] l poss_s_beg  
859 - | (Sign "‑") :: l -> create_or_beg i [Sign "‑"] l poss_s_beg  
860 - | (Sign "‒") :: l -> create_or_beg i [Sign "‒"] l poss_s_beg  
861 - | (Sign "−") :: l -> create_or_beg i [Sign "−"] l poss_s_beg  
862 - | (Sign "–") :: l -> create_or_beg i [Sign "–"] l poss_s_beg  
863 - | (Sign "—") :: l -> create_or_beg i [Sign "—"] l poss_s_beg  
864 - | (Sign "‘") :: l -> create_sign_token poss_s_beg i [Sign "‘"] l (Interp "‘")  
865 - | (Sign "´") :: l -> create_sign_token poss_s_beg i [Sign "´"] l (Symbol "’")  
866 - | (Sign "`") :: (Sign "`") :: l ->  
867 - let t,i = create_empty_sign_token i [Sign "`";Sign "`"] in  
868 - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"}],i,l,poss_s_beg  
869 - | (Sign "`") :: l -> create_sign_token poss_s_beg i [Sign "`"] l (Symbol "’")  
870 - | (Sign "·") :: l -> create_sign_token poss_s_beg i [Sign "·"] l (Interp "·")  
871 - | (Sign "•") :: l -> create_sign_token poss_s_beg i [Sign "•"] l (Interp "•")  
872 - | (Sign "¨") :: l -> create_sign_token poss_s_beg i [Sign "¨"] l (Interp "¨")  
873 - | (Sign "~") :: l ->  
874 - let t,i = create_empty_sign_token i [Sign "~"] in  
875 - Variant[Token{t with token=Symbol "~"};Token{t with token=make_lemma ("około","prep:gen")}],i,l,false  
876 - | (Sign "{") :: l ->  
877 - let t,i = create_empty_sign_token i [Sign "{"] in  
878 - Variant[Token{t with token=Symbol "{"};Token{t with token=Interp "{"}],i,l,poss_s_beg  
879 - | (Sign "}") :: l ->  
880 - let t,i = create_empty_sign_token i [Sign "}"] in  
881 - Variant[Token{t with token=Symbol "}"};Token{t with token=Interp "}"}],i,l,poss_s_beg  
882 - | (Sign "#") :: l -> create_sign_token poss_s_beg i [Sign ""] l (Symbol "")  
883 - | (Sign "^") :: (Sign "^") :: l -> create_sign_token poss_s_beg i [Sign "^";Sign "^"] l (make_lemma ("^^","sinterj"))  
884 - | (Sign "^") :: l -> create_sign_token poss_s_beg i [Sign "^"] l (Symbol "^")  
885 - | (Sign "|") :: l -> create_sign_token poss_s_beg i [Sign "|"] l (Symbol "|")  
886 - | (Sign "&") :: l -> create_sign_token poss_s_beg i [Sign "&"] l (Symbol "&")  
887 - | (Sign "=") :: l -> create_sign_token poss_s_beg i [Sign "="] l (Symbol "=")  
888 - | (Sign "/") :: l ->  
889 - let t,i = create_empty_sign_token i [Sign "/"] in  
890 - Variant[Token{t with token=Symbol "/"};Token{t with token=make_lemma ("na","prep:acc")}],i,l,false  
891 - | (Sign "_") :: l -> create_sign_token poss_s_beg i [Sign "_"] l (Symbol "_")  
892 - | (Sign "@") :: l -> create_sign_token poss_s_beg i [Sign "@"] l (Symbol "@")  
893 - | (Sign "×") :: l -> create_sign_token poss_s_beg i [Sign "×"] l (Symbol "×")  
894 - | (Sign "%") :: l ->  
895 - let t,i = create_empty_sign_token i [Sign "%"] in  
896 - Variant[Token{t with token=Symbol "%"};Token{t with token=make_lemma ("procent","subst:_:_:m3")}],i,l,false  
897 - | (Sign "$") :: l ->  
898 - let t,i = create_empty_sign_token i [Sign "$"] in  
899 - Variant[Token{t with token=Symbol "$"};Token{t with token=make_lemma ("dolar","subst:_:_:m2")}],i,l,false  
900 - | (Sign "€") :: l -> create_sign_token poss_s_beg i [Sign "€"] l (make_lemma ("euro","subst:_:_:n2"))  
901 - | (Sign "²") :: l -> create_sign_token poss_s_beg i [Sign "²"] l (Symbol "²")  
902 - | (Sign "°") :: l -> create_sign_token poss_s_beg i [Sign "°"] l (make_lemma ("stopień","subst:_:_:m3"))  
903 - | (Sign "§") :: l -> create_sign_token false i [Sign "§"] l (make_lemma ("paragraf","subst:_:_:m3"))  
904 - | (Sign s) :: l -> print_endline ("recognize_sign_group: " ^ s); create_sign_token poss_s_beg i [Sign s] l (Symbol s)  
905 - | l -> failwith "recognize_sign_group"  
906 -  
907 -(* FIXME: "„Szpak” frunie." trzeba przenie przenieść <sentence> przed „, ale zostawić po „s. *)  
908 -  
909 -let rec group_chars poss_s_beg i rev = function  
910 - [] -> List.rev ((Token{empty_token with beg=i;len=factor;next=i+factor;token=Interp "</query>"}) :: rev)  
911 - | (Digit s) :: l -> let x,l = group_digits [] ((Digit s) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_digits poss_s_beg i x) :: rev) l  
912 - | (Sign s) :: l -> let x,i,l,poss_s_beg = recognize_sign_group poss_s_beg i ((Sign s) :: l) in group_chars poss_s_beg i (x :: rev) l  
913 - | (Capital(s,t)) :: l -> let x,l = group_letters [] ((Capital(s,t)) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l  
914 - | (ForeignCapital(s,t)) :: l -> let x,l = group_letters [] ((ForeignCapital(s,t)) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l  
915 - | (Small s) :: l -> let x,l = group_letters [] ((Small s) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l  
916 - | (ForeignSmall s) :: l -> let x,l = group_letters [] ((ForeignSmall s) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l  
917 - | (Other(s,x)) :: l ->  
918 - let x,l = group_others [] ((Other(s,x)) :: l) in  
919 - group_chars false (i + Xlist.size x * factor)  
920 - ((Token{empty_token with orth=String.concat "" x;beg=i;len=Xlist.size x * factor;next=i+factor;token=Other(String.concat "" x)}) :: rev) l  
921 -  
922 -let tokenize l =  
923 - (Token{empty_token with beg=0;len=factor;next=factor;token=Interp "<query>"}) :: (group_chars true factor [] l)  
tokenizer/eniam-tokenizer-1.0/README deleted
1 -ENIAMtokenizer Version 1.0 :  
2 ------------------------  
3 -  
4 -ENIAMtokenizer is a library that provides a tokenizer for Polish.  
5 -  
6 -Install  
7 --------  
8 -  
9 -ENIAMtokenizer requires OCaml version 4.02.3 compiler  
10 -together with Xlib library version 3.1 or later.  
11 -  
12 -In order to install type:  
13 -  
14 -make install  
15 -  
16 -by default, ENIAMtokenizer is installed in the 'ocamlc -where'/eniam directory.  
17 -you can change it by editing the Makefile.  
18 -  
19 -In order to test library type:  
20 -make test  
21 -./test  
22 -  
23 -Credits  
24 --------  
25 -Copyright © 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>  
26 -Copyright © 2016 Institute of Computer Science Polish Academy of Sciences  
27 -  
28 -The parser uses the following licensed resources:  
29 -  
30 -SGJP: Grammatical Dictionary of Polish, version 20151020  
31 -Copyright © 2007–2015 Zygmunt Saloni, Włodzimierz Gruszczyński, Marcin  
32 -Woliński, Robert Wołosz, Danuta Skowrońska  
33 -http://sgjp.pl  
34 -  
35 -Licence  
36 --------  
37 -  
38 -This library is free software: you can redistribute it and/or modify  
39 -it under the terms of the GNU Lesser General Public License as published by  
40 -the Free Software Foundation, either version 3 of the License, or  
41 -(at your option) any later version.  
42 -  
43 -This library is distributed in the hope that it will be useful,  
44 -but WITHOUT ANY WARRANTY; without even the implied warranty of  
45 -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  
46 -GNU General Public License for more details.  
47 -  
48 -You should have received a copy of the GNU Lesser General Public License  
49 -along with this program. If not, see <http://www.gnu.org/licenses/>.  
50 -  
tokenizer/eniam-tokenizer-1.0/config-tokenizer deleted
1 -# Localization of definitions of multi-token-expressions  
2 -MTE_FILENAME=/usr/share/eniam/resources/SGJP/mte_20151215.tab  
tokenizer/eniam-tokenizer-1.0/lgpl-3.0.txt deleted
1 - GNU LESSER GENERAL PUBLIC LICENSE  
2 - Version 3, 29 June 2007  
3 -  
4 - Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>  
5 - Everyone is permitted to copy and distribute verbatim copies  
6 - of this license document, but changing it is not allowed.  
7 -  
8 -  
9 - This version of the GNU Lesser General Public License incorporates  
10 -the terms and conditions of version 3 of the GNU General Public  
11 -License, supplemented by the additional permissions listed below.  
12 -  
13 - 0. Additional Definitions.  
14 -  
15 - As used herein, "this License" refers to version 3 of the GNU Lesser  
16 -General Public License, and the "GNU GPL" refers to version 3 of the GNU  
17 -General Public License.  
18 -  
19 - "The Library" refers to a covered work governed by this License,  
20 -other than an Application or a Combined Work as defined below.  
21 -  
22 - An "Application" is any work that makes use of an interface provided  
23 -by the Library, but which is not otherwise based on the Library.  
24 -Defining a subclass of a class defined by the Library is deemed a mode  
25 -of using an interface provided by the Library.  
26 -  
27 - A "Combined Work" is a work produced by combining or linking an  
28 -Application with the Library. The particular version of the Library  
29 -with which the Combined Work was made is also called the "Linked  
30 -Version".  
31 -  
32 - The "Minimal Corresponding Source" for a Combined Work means the  
33 -Corresponding Source for the Combined Work, excluding any source code  
34 -for portions of the Combined Work that, considered in isolation, are  
35 -based on the Application, and not on the Linked Version.  
36 -  
37 - The "Corresponding Application Code" for a Combined Work means the  
38 -object code and/or source code for the Application, including any data  
39 -and utility programs needed for reproducing the Combined Work from the  
40 -Application, but excluding the System Libraries of the Combined Work.  
41 -  
42 - 1. Exception to Section 3 of the GNU GPL.  
43 -  
44 - You may convey a covered work under sections 3 and 4 of this License  
45 -without being bound by section 3 of the GNU GPL.  
46 -  
47 - 2. Conveying Modified Versions.  
48 -  
49 - If you modify a copy of the Library, and, in your modifications, a  
50 -facility refers to a function or data to be supplied by an Application  
51 -that uses the facility (other than as an argument passed when the  
52 -facility is invoked), then you may convey a copy of the modified  
53 -version:  
54 -  
55 - a) under this License, provided that you make a good faith effort to  
56 - ensure that, in the event an Application does not supply the  
57 - function or data, the facility still operates, and performs  
58 - whatever part of its purpose remains meaningful, or  
59 -  
60 - b) under the GNU GPL, with none of the additional permissions of  
61 - this License applicable to that copy.  
62 -  
63 - 3. Object Code Incorporating Material from Library Header Files.  
64 -  
65 - The object code form of an Application may incorporate material from  
66 -a header file that is part of the Library. You may convey such object  
67 -code under terms of your choice, provided that, if the incorporated  
68 -material is not limited to numerical parameters, data structure  
69 -layouts and accessors, or small macros, inline functions and templates  
70 -(ten or fewer lines in length), you do both of the following:  
71 -  
72 - a) Give prominent notice with each copy of the object code that the  
73 - Library is used in it and that the Library and its use are  
74 - covered by this License.  
75 -  
76 - b) Accompany the object code with a copy of the GNU GPL and this license  
77 - document.  
78 -  
79 - 4. Combined Works.  
80 -  
81 - You may convey a Combined Work under terms of your choice that,  
82 -taken together, effectively do not restrict modification of the  
83 -portions of the Library contained in the Combined Work and reverse  
84 -engineering for debugging such modifications, if you also do each of  
85 -the following:  
86 -  
87 - a) Give prominent notice with each copy of the Combined Work that  
88 - the Library is used in it and that the Library and its use are  
89 - covered by this License.  
90 -  
91 - b) Accompany the Combined Work with a copy of the GNU GPL and this license  
92 - document.  
93 -  
94 - c) For a Combined Work that displays copyright notices during  
95 - execution, include the copyright notice for the Library among  
96 - these notices, as well as a reference directing the user to the  
97 - copies of the GNU GPL and this license document.  
98 -  
99 - d) Do one of the following:  
100 -  
101 - 0) Convey the Minimal Corresponding Source under the terms of this  
102 - License, and the Corresponding Application Code in a form  
103 - suitable for, and under terms that permit, the user to  
104 - recombine or relink the Application with a modified version of  
105 - the Linked Version to produce a modified Combined Work, in the  
106 - manner specified by section 6 of the GNU GPL for conveying  
107 - Corresponding Source.  
108 -  
109 - 1) Use a suitable shared library mechanism for linking with the  
110 - Library. A suitable mechanism is one that (a) uses at run time  
111 - a copy of the Library already present on the user's computer  
112 - system, and (b) will operate properly with a modified version  
113 - of the Library that is interface-compatible with the Linked  
114 - Version.  
115 -  
116 - e) Provide Installation Information, but only if you would otherwise  
117 - be required to provide such information under section 6 of the  
118 - GNU GPL, and only to the extent that such information is  
119 - necessary to install and execute a modified version of the  
120 - Combined Work produced by recombining or relinking the  
121 - Application with a modified version of the Linked Version. (If  
122 - you use option 4d0, the Installation Information must accompany  
123 - the Minimal Corresponding Source and Corresponding Application  
124 - Code. If you use option 4d1, you must provide the Installation  
125 - Information in the manner specified by section 6 of the GNU GPL  
126 - for conveying Corresponding Source.)  
127 -  
128 - 5. Combined Libraries.  
129 -  
130 - You may place library facilities that are a work based on the  
131 -Library side by side in a single library together with other library  
132 -facilities that are not Applications and are not covered by this  
133 -License, and convey such a combined library under terms of your  
134 -choice, if you do both of the following:  
135 -  
136 - a) Accompany the combined library with a copy of the same work based  
137 - on the Library, uncombined with any other library facilities,  
138 - conveyed under the terms of this License.  
139 -  
140 - b) Give prominent notice with the combined library that part of it  
141 - is a work based on the Library, and explaining where to find the  
142 - accompanying uncombined form of the same work.  
143 -  
144 - 6. Revised Versions of the GNU Lesser General Public License.  
145 -  
146 - The Free Software Foundation may publish revised and/or new versions  
147 -of the GNU Lesser General Public License from time to time. Such new  
148 -versions will be similar in spirit to the present version, but may  
149 -differ in detail to address new problems or concerns.  
150 -  
151 - Each version is given a distinguishing version number. If the  
152 -Library as you received it specifies that a certain numbered version  
153 -of the GNU Lesser General Public License "or any later version"  
154 -applies to it, you have the option of following the terms and  
155 -conditions either of that published version or of any later version  
156 -published by the Free Software Foundation. If the Library as you  
157 -received it does not specify a version number of the GNU Lesser  
158 -General Public License, you may choose any version of the GNU Lesser  
159 -General Public License ever published by the Free Software Foundation.  
160 -  
161 - If the Library as you received it specifies that a proxy can decide  
162 -whether future versions of the GNU Lesser General Public License shall  
163 -apply, that proxy's public statement of acceptance of any version is  
164 -permanent authorization for you to choose that version for the  
165 -Library.  
tokenizer/eniam-tokenizer-1.0/makefile deleted
1 -OCAMLC=ocamlc  
2 -OCAMLOPT=ocamlopt  
3 -OCAMLDEP=ocamldep  
4 -INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam  
5 -OCAMLFLAGS=$(INCLUDES) -g  
6 -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa  
7 -INSTALLDIR=`ocamlc -where`/eniam  
8 -  
9 -SOURCES= ENIAMtokenizerTypes.ml ENIAMtokens.ml ENIAMacronyms.ml ENIAMpatterns.ml ENIAMtokenizer.ml  
10 -  
11 -all: eniam-tokenizer.cma eniam-tokenizer.cmxa  
12 -  
13 -install: all  
14 - mkdir -p $(INSTALLDIR)  
15 - cp eniam-tokenizer.cmxa eniam-tokenizer.a eniam-tokenizer.cma config-tokenizer $(INSTALLDIR)  
16 - cp ENIAMtokenizerTypes.cmi ENIAMtokens.cmi ENIAMacronyms.cmi ENIAMpatterns.cmi ENIAMtokenizer.cmi $(INSTALLDIR)  
17 - cp ENIAMtokenizerTypes.cmx ENIAMtokens.cmx ENIAMacronyms.cmx ENIAMpatterns.cmx ENIAMtokenizer.cmx $(INSTALLDIR)  
18 - mkdir -p /usr/share/eniam/resources/SGJP  
19 - cp resources/SGJP/* /usr/share/eniam/resources/SGJP  
20 -  
21 -eniam-tokenizer.cma: $(SOURCES)  
22 - ocamlc -linkall -a -o eniam-tokenizer.cma $(OCAMLFLAGS) $^  
23 -  
24 -eniam-tokenizer.cmxa: $(SOURCES)  
25 - ocamlopt -linkall -a -o eniam-tokenizer.cmxa $(INCLUDES) $^  
26 -  
27 -test: test.ml  
28 - $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml  
29 -  
30 -.SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx  
31 -  
32 -.mll.ml:  
33 - ocamllex $<  
34 -  
35 -.mly.mli:  
36 - ocamlyacc $<  
37 -  
38 -.mly.ml:  
39 - ocamlyacc $<  
40 -  
41 -.ml.cmo:  
42 - $(OCAMLC) $(OCAMLFLAGS) -c $<  
43 -  
44 -.mli.cmi:  
45 - $(OCAMLC) $(OCAMLFALGS) -c $<  
46 -  
47 -.ml.cmx:  
48 - $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $<  
49 -  
50 -clean:  
51 - rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a test  
tokenizer/eniam-tokenizer-1.0/test.ml deleted
1 -(*  
2 - * ENIAMtokenizer, a tokenizer for Polish  
3 - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>  
4 - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences  
5 - *  
6 - * This library is free software: you can redistribute it and/or modify  
7 - * it under the terms of the GNU Lesser General Public License as published by  
8 - * the Free Software Foundation, either version 3 of the License, or  
9 - * (at your option) any later version.  
10 - *  
11 - * This library is distributed in the hope that it will be useful,  
12 - * but WITHOUT ANY WARRANTY; without even the implied warranty of  
13 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  
14 - * GNU General Public License for more details.  
15 - *  
16 - * You should have received a copy of the GNU Lesser General Public License  
17 - * along with this program. If not, see <http://www.gnu.org/licenses/>.  
18 - *)  
19 -  
20 -  
21 -let test_strings = [  
22 -(* "a gdybym miałem";  
23 - "A Gdy Miałem";  
24 - "GDY MIAŁEM";  
25 - "I II III IV V VI VII VIII IX X MCXIV MXC";  
26 - "Kiedy Piotr Prabucki, przewodniczący Komisji Budżetu PeKaO";  
27 - "25 idzie 20.";  
28 - "Kot. Kot. kot.";  
29 - "25.";  
30 - "25.888.231";  
31 - "Ala 25.888.231.111 ma.";  
32 - "Ala 25.888.031,011.";  
33 - "Ala -25.888.031,011.";  
34 - "Ala -25 .";  
35 - "Ala -1° C 3° ciepła 20—30°C od 180° do 260°C około 6° poniżej horyzontu.";  
36 - "Ala 22-25 .";  
37 - "Ala 22.5.2000-25.5.2001 .";  
38 - "Szpak frunie.";*)  
39 - "Kot miauczy.";  
40 -(* "Np. Ala.";*)  
41 - "w. dom.";  
42 - "tzn.";  
43 - "c.d.n.";  
44 -(* "Arabia Saudyjska biegnie.";  
45 - "Cauchy'ego ONZ-owska biegnie.";*)  
46 - "TE-cie E-e.";  
47 - "MS-DOS-owska CI-cie KRRi-cie UJ-ocie UJ-OCIE.";  
48 - "rock'n'rollowy d’Alembertowi staro-cerkiewno-słowiańskimi";  
49 -(* "Tom idzie.";*)  
50 - "Miałem miał.";  
51 -(* "Szpak śpiewa.";  
52 - "Ala ma kota.";  
53 - "Ale mają kota:"*)  
54 - ]  
55 -  
56 -let _ =  
57 - print_endline "Testy wbudowane";  
58 - Xlist.iter test_strings (fun s ->  
59 - print_endline ("\nTEST: " ^ s);  
60 - let tokens = ENIAMtokenizer.parse s in  
61 - (* print_endline (ENIAMtokenizer.xml_of tokens); *)  
62 - Xlist.iter tokens (fun token -> print_endline (ENIAMtokenizer.string_of 0 token)));  
63 - print_endline "Testy użytkownika.";  
64 - print_endline "Wpisz tekst i naciśnij ENTER, pusty tekst kończy.";  
65 - let s = ref (read_line ()) in  
66 - while !s <> "" do  
67 - let tokens = ENIAMtokenizer.parse !s in  
68 - (* print_endline (ENIAMtokenizer.xml_of tokens); *)  
69 - Xlist.iter tokens (fun token -> print_endline (ENIAMtokenizer.string_of 0 token));  
70 - print_endline "Wpisz tekst i naciśnij ENTER, pusty tekst kończy.";  
71 - s := read_line ()  
72 - done;  
73 - ()  
tokenizer/makefile
@@ -12,11 +12,13 @@ all: eniam-tokenizer.cma eniam-tokenizer.cmxa @@ -12,11 +12,13 @@ all: eniam-tokenizer.cma eniam-tokenizer.cmxa
12 12
13 install: all 13 install: all
14 mkdir -p $(INSTALLDIR) 14 mkdir -p $(INSTALLDIR)
15 - cp eniam-tokenizer.cmxa eniam-tokenizer.a eniam-tokenizer.cma config-tokenizer $(INSTALLDIR) 15 + cp eniam-tokenizer.cmxa eniam-tokenizer.a eniam-tokenizer.cma $(INSTALLDIR)
16 cp ENIAMtokenizerTypes.cmi ENIAMtokens.cmi ENIAMacronyms.cmi ENIAMpatterns.cmi ENIAMtokenizer.cmi $(INSTALLDIR) 16 cp ENIAMtokenizerTypes.cmi ENIAMtokens.cmi ENIAMacronyms.cmi ENIAMpatterns.cmi ENIAMtokenizer.cmi $(INSTALLDIR)
17 cp ENIAMtokenizerTypes.cmx ENIAMtokens.cmx ENIAMacronyms.cmx ENIAMpatterns.cmx ENIAMtokenizer.cmx $(INSTALLDIR) 17 cp ENIAMtokenizerTypes.cmx ENIAMtokens.cmx ENIAMacronyms.cmx ENIAMpatterns.cmx ENIAMtokenizer.cmx $(INSTALLDIR)
18 - mkdir -p /usr/share/eniam/resources/SGJP  
19 - cp resources/SGJP/* /usr/share/eniam/resources/SGJP 18 + mkdir -p /usr/share/eniam/tokenizer
  19 + cp resources/mte_20151215.tab /usr/share/eniam/tokenizer/mte_20151215.tab
  20 + cp resources/README /usr/share/eniam/tokenizer/README
  21 + ln -s /usr/share/eniam/tokenizer/mte_20151215.tab /usr/share/eniam/tokenizer/mte.tab
20 22
21 eniam-tokenizer.cma: $(SOURCES) 23 eniam-tokenizer.cma: $(SOURCES)
22 ocamlc -linkall -a -o eniam-tokenizer.cma $(OCAMLFLAGS) $^ 24 ocamlc -linkall -a -o eniam-tokenizer.cma $(OCAMLFLAGS) $^
tokenizer/resources/SGJP/README renamed to tokenizer/resources/README
tokenizer/resources/SGJP/mte_20151215.tab renamed to tokenizer/resources/mte_20151215.tab