Commit e0700d7b6ee4a00d085da7acc3256bc12e80eb29
1 parent
9f38bc0d
biblioteka eniam-tokenizer-1.0 z poprawioną konfiguracją
Showing
17 changed files
with
14 additions
and
2665 deletions
tokenizer/ENIAMtokenizerTypes.ml
... | ... | @@ -64,10 +64,8 @@ type pat = L | CL | D of string | C of string | S of string | RD of string | O o |
64 | 64 | let empty_token = { |
65 | 65 | orth="";corr_orth="";beg=0;len=0;next=0; token=Symbol ""; attrs=[]} |
66 | 66 | |
67 | -let config = | |
68 | - try File.load_attr_val_pairs "config-tokenizer" | |
69 | - with _ -> (print_endline "ENIAMtokenizer config file not found"; []) | |
67 | +let resource_path = | |
68 | + try Sys.getenv "ENIAM_RESOURCE_PATH" | |
69 | + with Not_found -> "/usr/share/eniam" | |
70 | 70 | |
71 | -let mte_filename = | |
72 | - try Xlist.assoc config "MTE_FILENAME" | |
73 | - with Not_found -> (print_endline "ENIAMtokenizer MTE_FILENAME config variable undefined"; "") | |
71 | +let mte_filename = resource_path ^ "/tokenizer/mte.tab" | |
... | ... |
tokenizer/README
... | ... | @@ -6,7 +6,7 @@ ENIAMtokenizer is a library that provides a tokenizer for Polish. |
6 | 6 | Install |
7 | 7 | ------- |
8 | 8 | |
9 | -ENIAMtokenizer requires OCaml version 4.02.3 compiler | |
9 | +ENIAMtokenizer requires OCaml version 4.02.3 compiler | |
10 | 10 | together with Xlib library version 3.1 or later. |
11 | 11 | |
12 | 12 | In order to install type: |
... | ... | @@ -20,6 +20,10 @@ In order to test library type: |
20 | 20 | make test |
21 | 21 | ./test |
22 | 22 | |
23 | +By default ENIAMtokenizer looks for resources in /usr/share/eniam directory. | |
24 | +However this behaviour may be changed by setting end exporting ENIAM_RESOURCE_PATH | |
25 | +environment variable. | |
26 | + | |
23 | 27 | Credits |
24 | 28 | ------- |
25 | 29 | Copyright © 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> |
... | ... | @@ -47,4 +51,3 @@ GNU General Public License for more details. |
47 | 51 | |
48 | 52 | You should have received a copy of the GNU Lesser General Public License |
49 | 53 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
50 | - | |
... | ... |
tokenizer/config-tokenizer deleted
tokenizer/eniam-tokenizer-1.0.tar.bz2
No preview for this file type
tokenizer/eniam-tokenizer-1.0/ENIAMacronyms.ml deleted
1 | -(* | |
2 | - * ENIAMtokenizer, a tokenizer for Polish | |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | - * | |
6 | - * This library is free software: you can redistribute it and/or modify | |
7 | - * it under the terms of the GNU Lesser General Public License as published by | |
8 | - * the Free Software Foundation, either version 3 of the License, or | |
9 | - * (at your option) any later version. | |
10 | - * | |
11 | - * This library is distributed in the hope that it will be useful, | |
12 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | - * GNU General Public License for more details. | |
15 | - * | |
16 | - * You should have received a copy of the GNU Lesser General Public License | |
17 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | - *) | |
19 | - | |
20 | -open ENIAMtokenizerTypes | |
21 | - | |
22 | -let mte_patterns = | |
23 | - let lines = try File.load_lines mte_filename | |
24 | - with _ -> (print_endline ("ENIAMtokenizer mte file " ^ mte_filename ^ " not found"); []) in | |
25 | - let l = List.rev (Xlist.rev_map lines (fun line -> | |
26 | - match Str.split (Str.regexp "\t") line with | |
27 | - [orths; lemma; interp] -> Str.split (Str.regexp " ") orths, lemma, interp | |
28 | - | _ -> failwith ("mte_patterns: " ^ line))) in | |
29 | - List.rev (Xlist.rev_map l (fun (orths,lemma,interp) -> | |
30 | - Xlist.map orths (fun orth -> O orth), (fun (_:token_record list) -> ENIAMtokens.make_lemma (lemma,interp)))) | |
31 | - | |
32 | - | |
33 | -let compose_lemma t lemma_suf interp = | |
34 | - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ lemma_suf, interp) | |
35 | - | |
36 | -let compose_lemma3 t1 t2 t3 lemma_suf interp = | |
37 | - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t1.token ^ ENIAMtokens.get_orth t2.token ^ ENIAMtokens.get_orth t3.token ^ lemma_suf, interp) | |
38 | - | |
39 | -let concat_orths l = | |
40 | - String.concat "" (Xlist.map l (fun t -> t.orth)) | |
41 | - | |
42 | -let ct l lemma interp = | |
43 | - let beg = (List.hd l).beg in | |
44 | - let t = List.hd (List.rev l) in | |
45 | - let len = t.beg + t.len - beg in | |
46 | - Token{empty_token with | |
47 | - orth=concat_orths l; | |
48 | - beg=beg; | |
49 | - len=len; | |
50 | - next=t.next; | |
51 | - token=ENIAMtokens.make_lemma (lemma,interp); | |
52 | - attrs=ENIAMtokens.merge_attrs l} | |
53 | - | |
54 | -let rec get_orth_prefix i l = | |
55 | - if i = 0 then "",l else | |
56 | - match l with | |
57 | - c :: l -> let s,l = get_orth_prefix (i-1) l in c ^ s, l | |
58 | - | [] -> failwith "get_orth_prefix" | |
59 | - | |
60 | -let make_sub_tokens t l = | |
61 | - let n = Xlist.fold l 0 (fun n (i,_,_) -> n + i) in | |
62 | - let orth = Xunicode.utf8_chars_of_utf8_string t.orth in | |
63 | - if Xlist.size orth <> n then failwith "make_sub_tokens: invalid orth length" else | |
64 | - let l,_,_,_ = Xlist.fold l ([],t.beg,t.len,orth) (fun (l,beg,remaining_len,orth) (i,lemma,interp) -> | |
65 | - let orth,remaining_orth = get_orth_prefix i orth in | |
66 | - let len = if beg mod factor = 0 then i * factor else ((i-1) * factor) + (beg mod factor) in | |
67 | - if remaining_len = 0 then failwith "make_sub_tokens: invalid remaining_len" else | |
68 | - let len = if len > remaining_len then remaining_len else len in | |
69 | - Token{empty_token with | |
70 | - orth=orth; | |
71 | - beg=beg; | |
72 | - len=len; | |
73 | - next=beg+len; | |
74 | - token=ENIAMtokens.make_lemma (lemma,interp); | |
75 | - attrs=t.attrs} :: l, | |
76 | - beg+len, remaining_len-len, remaining_orth) in | |
77 | - l | |
78 | - | |
79 | -let st t l = | |
80 | - let l = make_sub_tokens t l in | |
81 | - match l with | |
82 | - Token s :: l -> List.rev (Token{s with next=t.next} :: l) | |
83 | - | _ -> failwith "st" | |
84 | - | |
85 | -let std t d l = | |
86 | - let l = make_sub_tokens t l in | |
87 | - match l with | |
88 | - Token s :: l -> List.rev (Token{s with orth=s.orth^d.orth; len=d.beg+d.len-s.beg; next=d.next} :: l) | |
89 | - | _ -> failwith "std" | |
90 | - | |
91 | -let acronym_patterns = [ | |
92 | - [L; S "-"; O "owscy"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | |
93 | - [L; S "-"; O "owska"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns"); | |
94 | - [L; S "-"; O "owski"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns"); | |
95 | - [L; S "-"; O "owski"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns"); | |
96 | - [L; S "-"; O "owskich"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | |
97 | - [L; S "-"; O "owskich"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
98 | - [L; S "-"; O "owskich"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
99 | - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
100 | - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
101 | - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
102 | - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
103 | - [L; S "-"; O "owskiego"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns"); | |
104 | - [L; S "-"; O "owskiego"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
105 | - [L; S "-"; O "owskiej"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns"); | |
106 | - [L; S "-"; O "owskiej"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns"); | |
107 | - [L; S "-"; O "owskiej"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns"); | |
108 | - [L; S "-"; O "owskiemu"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
109 | - [L; S "-"; O "owskim"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
110 | - [L; S "-"; O "owskim"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
111 | - [L; S "-"; O "owskim"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
112 | - [L; S "-"; O "owskimi"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
113 | - [L; S "-"; O "owsko"], (function [x;_;_] -> compose_lemma x "-owski" "adja" | _ -> failwith "acronym_patterns"); | |
114 | - [L; S "-"; O "owsko"], (function [x;_;_] -> compose_lemma x "-owsko" "adv:pos" | _ -> failwith "acronym_patterns"); | |
115 | - [L; S "-"; O "owsku"], (function [x;_;_] -> compose_lemma x "-owski" "adjp" | _ -> failwith "acronym_patterns"); | |
116 | - [L; S "-"; O "owską"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns"); | |
117 | - [L; S "-"; O "owską"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns"); | |
118 | - [L; S "-"; O "wscy"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | |
119 | - [L; S "-"; O "wska"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns"); | |
120 | - [L; S "-"; O "wski"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns"); | |
121 | - [L; S "-"; O "wski"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns"); | |
122 | - [L; S "-"; O "wskich"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | |
123 | - [L; S "-"; O "wskich"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
124 | - [L; S "-"; O "wskich"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
125 | - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
126 | - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
127 | - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
128 | - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
129 | - [L; S "-"; O "wskiego"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns"); | |
130 | - [L; S "-"; O "wskiego"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
131 | - [L; S "-"; O "wskiej"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns"); | |
132 | - [L; S "-"; O "wskiej"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns"); | |
133 | - [L; S "-"; O "wskiej"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns"); | |
134 | - [L; S "-"; O "wskiemu"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
135 | - [L; S "-"; O "wskim"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
136 | - [L; S "-"; O "wskim"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
137 | - [L; S "-"; O "wskim"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
138 | - [L; S "-"; O "wskimi"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
139 | - [L; S "-"; O "wsko"], (function [x;_;_] -> compose_lemma x "-wski" "adja" | _ -> failwith "acronym_patterns"); | |
140 | - [L; S "-"; O "wsko"], (function [x;_;_] -> compose_lemma x "-wsko" "adv:pos" | _ -> failwith "acronym_patterns"); | |
141 | - [L; S "-"; O "wsku"], (function [x;_;_] -> compose_lemma x "-wski" "adjp" | _ -> failwith "acronym_patterns"); | |
142 | - [L; S "-"; O "wską"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns"); | |
143 | - [L; S "-"; O "wską"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns"); | |
144 | - [L; S "’"; O "owa"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns"); | |
145 | - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
146 | - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
147 | - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
148 | - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
149 | - [L; S "’"; O "owego"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns"); | |
150 | - [L; S "’"; O "owego"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
151 | - [L; S "’"; O "owej"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns"); | |
152 | - [L; S "’"; O "owej"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns"); | |
153 | - [L; S "’"; O "owej"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns"); | |
154 | - [L; S "’"; O "owemu"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
155 | - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | |
156 | - [L; S "’"; O "owo"], (function [x;_;_] -> compose_lemma x "’owo" "adv:pos" | _ -> failwith "acronym_patterns"); | |
157 | - [L; S "’"; O "owo"], (function [x;_;_] -> compose_lemma x "’owy" "adja" | _ -> failwith "acronym_patterns"); | |
158 | - [L; S "’"; O "owy"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns"); | |
159 | - [L; S "’"; O "owy"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns"); | |
160 | - [L; S "’"; O "owych"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | |
161 | - [L; S "’"; O "owych"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
162 | - [L; S "’"; O "owych"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
163 | - [L; S "’"; O "owym"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
164 | - [L; S "’"; O "owym"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
165 | - [L; S "’"; O "owym"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
166 | - [L; S "’"; O "owymi"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
167 | - [L; S "’"; O "ową"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns"); | |
168 | - [L; S "’"; O "ową"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns"); | |
169 | - [L; S "’"; O "owscy"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | |
170 | - [L; S "’"; O "owska"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns"); | |
171 | - [L; S "’"; O "owski"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns"); | |
172 | - [L; S "’"; O "owski"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns"); | |
173 | - [L; S "’"; O "owskich"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | |
174 | - [L; S "’"; O "owskich"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
175 | - [L; S "’"; O "owskich"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
176 | - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
177 | - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
178 | - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
179 | - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
180 | - [L; S "’"; O "owskiego"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns"); | |
181 | - [L; S "’"; O "owskiego"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
182 | - [L; S "’"; O "owskiej"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns"); | |
183 | - [L; S "’"; O "owskiej"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns"); | |
184 | - [L; S "’"; O "owskiej"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns"); | |
185 | - [L; S "’"; O "owskiemu"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
186 | - [L; S "’"; O "owskim"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
187 | - [L; S "’"; O "owskim"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
188 | - [L; S "’"; O "owskim"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
189 | - [L; S "’"; O "owskimi"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
190 | - [L; S "’"; O "owsko"], (function [x;_;_] -> compose_lemma x "’owski" "adja" | _ -> failwith "acronym_patterns"); | |
191 | - [L; S "’"; O "owsko"], (function [x;_;_] -> compose_lemma x "’owsko" "adv:pos" | _ -> failwith "acronym_patterns"); | |
192 | - [L; S "’"; O "owsku"], (function [x;_;_] -> compose_lemma x "’owski" "adjp" | _ -> failwith "acronym_patterns"); | |
193 | - [L; S "’"; O "owską"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns"); | |
194 | - [L; S "’"; O "owską"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns"); | |
195 | - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | |
196 | - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns"); | |
197 | - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | |
198 | - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns"); | |
199 | - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | |
200 | - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | |
201 | - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns"); | |
202 | - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); | |
203 | - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:n2" | _ -> failwith "acronym_patterns"); | |
204 | - [CL; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:loc:f" | _ -> failwith "acronym_patterns"); | |
205 | - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | |
206 | - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns"); | |
207 | - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); | |
208 | - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:n2" | _ -> failwith "acronym_patterns"); | |
209 | - [CL; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:inst:f" | _ -> failwith "acronym_patterns"); | |
210 | - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "T" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | |
211 | - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "T" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | |
212 | - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "TA" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); | |
213 | - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "TA" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); | |
214 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
215 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
216 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | |
217 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | |
218 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | |
219 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
220 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | |
221 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | |
222 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
223 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | |
224 | - [CL; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); | |
225 | - [CL; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); | |
226 | - [CL; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); | |
227 | - [L; S "-"; O "ecie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | |
228 | - [L; S "-"; O "ecie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | |
229 | - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | |
230 | - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns"); | |
231 | - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | |
232 | - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:n2" | _ -> failwith "acronym_patterns"); | |
233 | - [L; S "-"; O "etach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); | |
234 | - [L; S "-"; O "etami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); | |
235 | - [L; S "-"; O "etem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | |
236 | - [L; S "-"; O "etom"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); | |
237 | - [L; S "-"; O "etowi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); | |
238 | - [L; S "-"; O "etu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | |
239 | - [L; S "-"; O "ety"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | |
240 | - [L; S "-"; O "ety"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | |
241 | - [L; S "-"; O "ety"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | |
242 | - [L; S "-"; O "etów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | |
243 | - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | |
244 | - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | |
245 | - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | |
246 | - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | |
247 | - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | |
248 | - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
249 | - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | |
250 | - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
251 | - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | |
252 | - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:gen:f" | _ -> failwith "acronym_patterns"); | |
253 | - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); | |
254 | - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); | |
255 | - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); | |
256 | - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | |
257 | - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); | |
258 | - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | |
259 | - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | |
260 | - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); | |
261 | - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | |
262 | - [CL; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); | |
263 | - [CL; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); | |
264 | - [L; S "-"; O "iem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns"); | |
265 | - [L; S "-"; O "iem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | |
266 | - [CL; S "-"; O "o"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:voc:f" | _ -> failwith "acronym_patterns"); | |
267 | - [L; S "-"; O "ocie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | |
268 | - [L; S "-"; O "ocie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | |
269 | - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | |
270 | - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns"); | |
271 | - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); | |
272 | - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:n2" | _ -> failwith "acronym_patterns"); | |
273 | - [CL; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:dat:f" | _ -> failwith "acronym_patterns"); | |
274 | - [L; S "-"; O "otach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); | |
275 | - [L; S "-"; O "otami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); | |
276 | - [L; S "-"; O "otem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | |
277 | - [L; S "-"; O "otom"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); | |
278 | - [L; S "-"; O "otowi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); | |
279 | - [L; S "-"; O "otu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | |
280 | - [L; S "-"; O "oty"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | |
281 | - [L; S "-"; O "oty"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | |
282 | - [L; S "-"; O "oty"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | |
283 | - [L; S "-"; O "otów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | |
284 | - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | |
285 | - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns"); | |
286 | - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); | |
287 | - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:n2" | _ -> failwith "acronym_patterns"); | |
288 | - [L; S "-"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | |
289 | - [L; S "-"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | |
290 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns"); | |
291 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns"); | |
292 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | |
293 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:n2" | _ -> failwith "acronym_patterns"); | |
294 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | |
295 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); | |
296 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | |
297 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | |
298 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); | |
299 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | |
300 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
301 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
302 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | |
303 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | |
304 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:n2" | _ -> failwith "acronym_patterns"); | |
305 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
306 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | |
307 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:n2" | _ -> failwith "acronym_patterns"); | |
308 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
309 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | |
310 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:n2" | _ -> failwith "acronym_patterns"); | |
311 | - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); | |
312 | - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); | |
313 | - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); | |
314 | - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); | |
315 | - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); | |
316 | - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | |
317 | - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:n2" | _ -> failwith "acronym_patterns"); | |
318 | - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); | |
319 | - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | |
320 | - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:n2" | _ -> failwith "acronym_patterns"); | |
321 | - [L; S "-"; O "zie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | |
322 | - [L; S "-"; O "zie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | |
323 | - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | |
324 | - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | |
325 | - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns"); | |
326 | - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | |
327 | - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:n2" | _ -> failwith "acronym_patterns"); | |
328 | - [CL; S "-"; O "ą"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:inst:f" | _ -> failwith "acronym_patterns"); | |
329 | - [CL; S "-"; O "ę"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:acc:f" | _ -> failwith "acronym_patterns"); | |
330 | - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | |
331 | - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns"); | |
332 | - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | |
333 | - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns"); | |
334 | - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | |
335 | - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | |
336 | - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns"); | |
337 | - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); | |
338 | - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:loc:p3" | _ -> failwith "acronym_patterns"); | |
339 | - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | |
340 | - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns"); | |
341 | - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); | |
342 | - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:inst:p3" | _ -> failwith "acronym_patterns"); | |
343 | - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
344 | - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
345 | - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | |
346 | - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | |
347 | - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
348 | - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | |
349 | - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
350 | - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | |
351 | - [L; S "’"; O "ego"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | |
352 | - [L; S "’"; O "ego"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | |
353 | - [L; S "’"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | |
354 | - [L; S "’"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns"); | |
355 | - [L; S "’"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | |
356 | - [L; S "’"; O "emu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | |
357 | - [L; S "’"; O "go"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | |
358 | - [L; S "’"; O "go"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | |
359 | - [L; S "’"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns"); | |
360 | - [L; S "’"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | |
361 | - [L; S "’"; O "m"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | |
362 | - [L; S "’"; O "m"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | |
363 | - [L; S "’"; O "mu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | |
364 | - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | |
365 | - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns"); | |
366 | - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); | |
367 | - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:dat:p3" | _ -> failwith "acronym_patterns"); | |
368 | - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | |
369 | - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns"); | |
370 | - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); | |
371 | - [L; S "’"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | |
372 | - [L; S "’"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | |
373 | - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | |
374 | - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | |
375 | - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); | |
376 | - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | |
377 | - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | |
378 | - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); | |
379 | - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | |
380 | - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
381 | - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
382 | - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | |
383 | - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | |
384 | - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | |
385 | - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
386 | - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | |
387 | - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
388 | - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | |
389 | - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | |
390 | - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | |
391 | - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns"); | |
392 | - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | |
393 | - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:gen:p3" | _ -> failwith "acronym_patterns"); | |
394 | - [L; S "-"; O "ista"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns"); | |
395 | - [L; S "-"; O "istach"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | |
396 | - [L; S "-"; O "istami"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | |
397 | - [L; S "-"; O "isto"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | |
398 | - [L; S "-"; O "istom"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | |
399 | - [L; S "-"; O "isty"], (function [x;_;_] -> compose_lemma x "-ista" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
400 | - [L; S "-"; O "isty"], (function [x;_;_] -> compose_lemma x "-ista" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
401 | - [L; S "-"; O "isty"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | |
402 | - [L; S "-"; O "istów"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | |
403 | - [L; S "-"; O "istów"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | |
404 | - [L; S "-"; O "istą"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | |
405 | - [L; S "-"; O "istę"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | |
406 | - [L; S "-"; O "iści"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | |
407 | - [L; S "-"; O "iści"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | |
408 | - [L; S "-"; O "iście"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | |
409 | - [L; S "-"; O "iście"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | |
410 | - [L; S "-"; O "owca"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | |
411 | - [L; S "-"; O "owca"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | |
412 | - [L; S "-"; O "owcach"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | |
413 | - [L; S "-"; O "owcami"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | |
414 | - [L; S "-"; O "owce"], (function [x;_;_] -> compose_lemma x "-owiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
415 | - [L; S "-"; O "owce"], (function [x;_;_] -> compose_lemma x "-owiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
416 | - [L; S "-"; O "owcem"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | |
417 | - [L; S "-"; O "owcom"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | |
418 | - [L; S "-"; O "owcowi"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | |
419 | - [L; S "-"; O "owcu"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | |
420 | - [L; S "-"; O "owcu"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | |
421 | - [L; S "-"; O "owcy"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | |
422 | - [L; S "-"; O "owcy"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | |
423 | - [L; S "-"; O "owcze"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | |
424 | - [L; S "-"; O "owców"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | |
425 | - [L; S "-"; O "owców"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | |
426 | - [L; S "-"; O "owiec"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns"); | |
427 | - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); | |
428 | - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:gen:f" | _ -> failwith "acronym_patterns"); | |
429 | - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); | |
430 | - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); | |
431 | - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); | |
432 | - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); | |
433 | - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); | |
434 | - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:voc:f" | _ -> failwith "acronym_patterns"); | |
435 | - [L; S "-"; O "owskościach"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:loc:f" | _ -> failwith "acronym_patterns"); | |
436 | - [L; S "-"; O "owskościami"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:inst:f" | _ -> failwith "acronym_patterns"); | |
437 | - [L; S "-"; O "owskościom"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:dat:f" | _ -> failwith "acronym_patterns"); | |
438 | - [L; S "-"; O "owskością"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:inst:f" | _ -> failwith "acronym_patterns"); | |
439 | - [L; S "-"; O "owskość"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:acc:f" | _ -> failwith "acronym_patterns"); | |
440 | - [L; S "-"; O "owskość"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:nom:f" | _ -> failwith "acronym_patterns"); | |
441 | - [L; S "-"; O "wca"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | |
442 | - [L; S "-"; O "wca"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | |
443 | - [L; S "-"; O "wcach"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | |
444 | - [L; S "-"; O "wcami"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | |
445 | - [L; S "-"; O "wce"], (function [x;_;_] -> compose_lemma x "-wiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
446 | - [L; S "-"; O "wce"], (function [x;_;_] -> compose_lemma x "-wiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
447 | - [L; S "-"; O "wcem"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | |
448 | - [L; S "-"; O "wcom"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | |
449 | - [L; S "-"; O "wcowi"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | |
450 | - [L; S "-"; O "wcu"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | |
451 | - [L; S "-"; O "wcu"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | |
452 | - [L; S "-"; O "wcy"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | |
453 | - [L; S "-"; O "wcy"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | |
454 | - [L; S "-"; O "wców"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | |
455 | - [L; S "-"; O "wców"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | |
456 | - [L; S "-"; O "wiec"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns"); | |
457 | - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); | |
458 | - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:gen:f" | _ -> failwith "acronym_patterns"); | |
459 | - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); | |
460 | - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); | |
461 | - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); | |
462 | - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); | |
463 | - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); | |
464 | - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:voc:f" | _ -> failwith "acronym_patterns"); | |
465 | - [L; S "’"; O "owościach"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:loc:f" | _ -> failwith "acronym_patterns"); | |
466 | - [L; S "’"; O "owościami"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:inst:f" | _ -> failwith "acronym_patterns"); | |
467 | - [L; S "’"; O "owościom"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:dat:f" | _ -> failwith "acronym_patterns"); | |
468 | - [L; S "’"; O "owością"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:inst:f" | _ -> failwith "acronym_patterns"); | |
469 | - [L; S "’"; O "owość"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:acc:f" | _ -> failwith "acronym_patterns"); | |
470 | - [L; S "’"; O "owość"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:nom:f" | _ -> failwith "acronym_patterns"); | |
471 | - | |
472 | - [L; S "-"; L; S "-"; O "owscy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | |
473 | - [L; S "-"; L; S "-"; O "owska"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns"); | |
474 | - [L; S "-"; L; S "-"; O "owski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns"); | |
475 | - [L; S "-"; L; S "-"; O "owski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns"); | |
476 | - [L; S "-"; L; S "-"; O "owskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | |
477 | - [L; S "-"; L; S "-"; O "owskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
478 | - [L; S "-"; L; S "-"; O "owskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
479 | - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
480 | - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
481 | - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
482 | - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
483 | - [L; S "-"; L; S "-"; O "owskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns"); | |
484 | - [L; S "-"; L; S "-"; O "owskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
485 | - [L; S "-"; L; S "-"; O "owskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns"); | |
486 | - [L; S "-"; L; S "-"; O "owskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns"); | |
487 | - [L; S "-"; L; S "-"; O "owskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns"); | |
488 | - [L; S "-"; L; S "-"; O "owskiemu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
489 | - [L; S "-"; L; S "-"; O "owskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
490 | - [L; S "-"; L; S "-"; O "owskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
491 | - [L; S "-"; L; S "-"; O "owskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
492 | - [L; S "-"; L; S "-"; O "owskimi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
493 | - [L; S "-"; L; S "-"; O "owsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adja" | _ -> failwith "acronym_patterns"); | |
494 | - [L; S "-"; L; S "-"; O "owsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owsko" "adv:pos" | _ -> failwith "acronym_patterns"); | |
495 | - [L; S "-"; L; S "-"; O "owsku"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adjp" | _ -> failwith "acronym_patterns"); | |
496 | - [L; S "-"; L; S "-"; O "owską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns"); | |
497 | - [L; S "-"; L; S "-"; O "owską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns"); | |
498 | - [L; S "-"; L; S "-"; O "wscy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | |
499 | - [L; S "-"; L; S "-"; O "wska"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns"); | |
500 | - [L; S "-"; L; S "-"; O "wski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns"); | |
501 | - [L; S "-"; L; S "-"; O "wski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns"); | |
502 | - [L; S "-"; L; S "-"; O "wskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | |
503 | - [L; S "-"; L; S "-"; O "wskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
504 | - [L; S "-"; L; S "-"; O "wskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
505 | - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
506 | - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
507 | - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
508 | - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
509 | - [L; S "-"; L; S "-"; O "wskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns"); | |
510 | - [L; S "-"; L; S "-"; O "wskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
511 | - [L; S "-"; L; S "-"; O "wskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns"); | |
512 | - [L; S "-"; L; S "-"; O "wskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns"); | |
513 | - [L; S "-"; L; S "-"; O "wskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns"); | |
514 | - [L; S "-"; L; S "-"; O "wskiemu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
515 | - [L; S "-"; L; S "-"; O "wskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
516 | - [L; S "-"; L; S "-"; O "wskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
517 | - [L; S "-"; L; S "-"; O "wskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | |
518 | - [L; S "-"; L; S "-"; O "wskimi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | |
519 | - [L; S "-"; L; S "-"; O "wsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adja" | _ -> failwith "acronym_patterns"); | |
520 | - [L; S "-"; L; S "-"; O "wsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wsko" "adv:pos" | _ -> failwith "acronym_patterns"); | |
521 | - [L; S "-"; L; S "-"; O "wsku"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adjp" | _ -> failwith "acronym_patterns"); | |
522 | - [L; S "-"; L; S "-"; O "wską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns"); | |
523 | - [L; S "-"; L; S "-"; O "wską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns"); | |
524 | - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | |
525 | - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns"); | |
526 | - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | |
527 | - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns"); | |
528 | - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | |
529 | - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | |
530 | - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns"); | |
531 | - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); | |
532 | - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:n2" | _ -> failwith "acronym_patterns"); | |
533 | - [CL; S "-"; CL; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:loc:f" | _ -> failwith "acronym_patterns"); | |
534 | - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | |
535 | - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns"); | |
536 | - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); | |
537 | - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:n2" | _ -> failwith "acronym_patterns"); | |
538 | - [CL; S "-"; CL; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:inst:f" | _ -> failwith "acronym_patterns"); | |
539 | - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "T" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | |
540 | - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "T" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | |
541 | - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "TA" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); | |
542 | - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "TA" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); | |
543 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
544 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
545 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | |
546 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | |
547 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | |
548 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
549 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | |
550 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | |
551 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
552 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | |
553 | - [CL; S "-"; CL; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); | |
554 | - [CL; S "-"; CL; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); | |
555 | - [CL; S "-"; CL; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); | |
556 | - [L; S "-"; L; S "-"; O "ecie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | |
557 | - [L; S "-"; L; S "-"; O "ecie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | |
558 | - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | |
559 | - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns"); | |
560 | - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | |
561 | - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:n2" | _ -> failwith "acronym_patterns"); | |
562 | - [L; S "-"; L; S "-"; O "etach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); | |
563 | - [L; S "-"; L; S "-"; O "etami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); | |
564 | - [L; S "-"; L; S "-"; O "etem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | |
565 | - [L; S "-"; L; S "-"; O "etom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); | |
566 | - [L; S "-"; L; S "-"; O "etowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); | |
567 | - [L; S "-"; L; S "-"; O "etu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | |
568 | - [L; S "-"; L; S "-"; O "ety"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | |
569 | - [L; S "-"; L; S "-"; O "ety"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | |
570 | - [L; S "-"; L; S "-"; O "ety"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | |
571 | - [L; S "-"; L; S "-"; O "etów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | |
572 | - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | |
573 | - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | |
574 | - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | |
575 | - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | |
576 | - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | |
577 | - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
578 | - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | |
579 | - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
580 | - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | |
581 | - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:gen:f" | _ -> failwith "acronym_patterns"); | |
582 | - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); | |
583 | - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); | |
584 | - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); | |
585 | - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | |
586 | - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); | |
587 | - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | |
588 | - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | |
589 | - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); | |
590 | - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | |
591 | - [CL; S "-"; CL; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); | |
592 | - [CL; S "-"; CL; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); | |
593 | - [L; S "-"; L; S "-"; O "iem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns"); | |
594 | - [L; S "-"; L; S "-"; O "iem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | |
595 | - [CL; S "-"; CL; S "-"; O "o"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:voc:f" | _ -> failwith "acronym_patterns"); | |
596 | - [L; S "-"; L; S "-"; O "ocie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | |
597 | - [L; S "-"; L; S "-"; O "ocie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | |
598 | - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | |
599 | - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns"); | |
600 | - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); | |
601 | - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:n2" | _ -> failwith "acronym_patterns"); | |
602 | - [CL; S "-"; CL; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:dat:f" | _ -> failwith "acronym_patterns"); | |
603 | - [L; S "-"; L; S "-"; O "otach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); | |
604 | - [L; S "-"; L; S "-"; O "otami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); | |
605 | - [L; S "-"; L; S "-"; O "otem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | |
606 | - [L; S "-"; L; S "-"; O "otom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); | |
607 | - [L; S "-"; L; S "-"; O "otowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); | |
608 | - [L; S "-"; L; S "-"; O "otu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | |
609 | - [L; S "-"; L; S "-"; O "oty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | |
610 | - [L; S "-"; L; S "-"; O "oty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | |
611 | - [L; S "-"; L; S "-"; O "oty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | |
612 | - [L; S "-"; L; S "-"; O "otów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | |
613 | - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | |
614 | - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns"); | |
615 | - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); | |
616 | - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:n2" | _ -> failwith "acronym_patterns"); | |
617 | - [L; S "-"; L; S "-"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | |
618 | - [L; S "-"; L; S "-"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | |
619 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns"); | |
620 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns"); | |
621 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | |
622 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:n2" | _ -> failwith "acronym_patterns"); | |
623 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | |
624 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); | |
625 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | |
626 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | |
627 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); | |
628 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | |
629 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
630 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
631 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | |
632 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | |
633 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:n2" | _ -> failwith "acronym_patterns"); | |
634 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
635 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | |
636 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:n2" | _ -> failwith "acronym_patterns"); | |
637 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
638 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | |
639 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:n2" | _ -> failwith "acronym_patterns"); | |
640 | - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); | |
641 | - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); | |
642 | - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); | |
643 | - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); | |
644 | - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); | |
645 | - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | |
646 | - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:n2" | _ -> failwith "acronym_patterns"); | |
647 | - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); | |
648 | - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | |
649 | - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:n2" | _ -> failwith "acronym_patterns"); | |
650 | - [L; S "-"; L; S "-"; O "zie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | |
651 | - [L; S "-"; L; S "-"; O "zie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | |
652 | - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | |
653 | - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | |
654 | - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns"); | |
655 | - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | |
656 | - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:n2" | _ -> failwith "acronym_patterns"); | |
657 | - [CL; S "-"; CL; S "-"; O "ą"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:inst:f" | _ -> failwith "acronym_patterns"); | |
658 | - [CL; S "-"; CL; S "-"; O "ę"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:acc:f" | _ -> failwith "acronym_patterns"); | |
659 | - [L; S "-"; L; S "-"; O "ista"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns"); | |
660 | - [L; S "-"; L; S "-"; O "istach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | |
661 | - [L; S "-"; L; S "-"; O "istami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | |
662 | - [L; S "-"; L; S "-"; O "isto"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | |
663 | - [L; S "-"; L; S "-"; O "istom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | |
664 | - [L; S "-"; L; S "-"; O "isty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
665 | - [L; S "-"; L; S "-"; O "isty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
666 | - [L; S "-"; L; S "-"; O "isty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | |
667 | - [L; S "-"; L; S "-"; O "istów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | |
668 | - [L; S "-"; L; S "-"; O "istów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | |
669 | - [L; S "-"; L; S "-"; O "istą"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | |
670 | - [L; S "-"; L; S "-"; O "istę"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | |
671 | - [L; S "-"; L; S "-"; O "iści"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | |
672 | - [L; S "-"; L; S "-"; O "iści"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | |
673 | - [L; S "-"; L; S "-"; O "iście"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | |
674 | - [L; S "-"; L; S "-"; O "iście"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | |
675 | - [L; S "-"; L; S "-"; O "owca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | |
676 | - [L; S "-"; L; S "-"; O "owca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | |
677 | - [L; S "-"; L; S "-"; O "owcach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | |
678 | - [L; S "-"; L; S "-"; O "owcami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | |
679 | - [L; S "-"; L; S "-"; O "owce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
680 | - [L; S "-"; L; S "-"; O "owce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
681 | - [L; S "-"; L; S "-"; O "owcem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | |
682 | - [L; S "-"; L; S "-"; O "owcom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | |
683 | - [L; S "-"; L; S "-"; O "owcowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | |
684 | - [L; S "-"; L; S "-"; O "owcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | |
685 | - [L; S "-"; L; S "-"; O "owcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | |
686 | - [L; S "-"; L; S "-"; O "owcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | |
687 | - [L; S "-"; L; S "-"; O "owcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | |
688 | - [L; S "-"; L; S "-"; O "owcze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | |
689 | - [L; S "-"; L; S "-"; O "owców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | |
690 | - [L; S "-"; L; S "-"; O "owców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | |
691 | - [L; S "-"; L; S "-"; O "owiec"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns"); | |
692 | - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); | |
693 | - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:gen:f" | _ -> failwith "acronym_patterns"); | |
694 | - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); | |
695 | - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); | |
696 | - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); | |
697 | - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); | |
698 | - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); | |
699 | - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:voc:f" | _ -> failwith "acronym_patterns"); | |
700 | - [L; S "-"; L; S "-"; O "owskościach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:loc:f" | _ -> failwith "acronym_patterns"); | |
701 | - [L; S "-"; L; S "-"; O "owskościami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:inst:f" | _ -> failwith "acronym_patterns"); | |
702 | - [L; S "-"; L; S "-"; O "owskościom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:dat:f" | _ -> failwith "acronym_patterns"); | |
703 | - [L; S "-"; L; S "-"; O "owskością"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:inst:f" | _ -> failwith "acronym_patterns"); | |
704 | - [L; S "-"; L; S "-"; O "owskość"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:acc:f" | _ -> failwith "acronym_patterns"); | |
705 | - [L; S "-"; L; S "-"; O "owskość"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:nom:f" | _ -> failwith "acronym_patterns"); | |
706 | - [L; S "-"; L; S "-"; O "wca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | |
707 | - [L; S "-"; L; S "-"; O "wca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | |
708 | - [L; S "-"; L; S "-"; O "wcach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | |
709 | - [L; S "-"; L; S "-"; O "wcami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | |
710 | - [L; S "-"; L; S "-"; O "wce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
711 | - [L; S "-"; L; S "-"; O "wce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
712 | - [L; S "-"; L; S "-"; O "wcem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | |
713 | - [L; S "-"; L; S "-"; O "wcom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | |
714 | - [L; S "-"; L; S "-"; O "wcowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | |
715 | - [L; S "-"; L; S "-"; O "wcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | |
716 | - [L; S "-"; L; S "-"; O "wcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | |
717 | - [L; S "-"; L; S "-"; O "wcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | |
718 | - [L; S "-"; L; S "-"; O "wcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | |
719 | - [L; S "-"; L; S "-"; O "wców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | |
720 | - [L; S "-"; L; S "-"; O "wców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | |
721 | - [L; S "-"; L; S "-"; O "wiec"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns"); | |
722 | - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | |
723 | - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns"); | |
724 | - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | |
725 | - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns"); | |
726 | - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | |
727 | - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | |
728 | - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns"); | |
729 | - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); | |
730 | - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:loc:p3" | _ -> failwith "acronym_patterns"); | |
731 | - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | |
732 | - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns"); | |
733 | - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); | |
734 | - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:inst:p3" | _ -> failwith "acronym_patterns"); | |
735 | - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
736 | - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
737 | - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | |
738 | - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | |
739 | - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
740 | - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | |
741 | - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
742 | - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | |
743 | - [L; S "-"; L; S "’"; O "ego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | |
744 | - [L; S "-"; L; S "’"; O "ego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | |
745 | - [L; S "-"; L; S "’"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | |
746 | - [L; S "-"; L; S "’"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns"); | |
747 | - [L; S "-"; L; S "’"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | |
748 | - [L; S "-"; L; S "’"; O "emu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | |
749 | - [L; S "-"; L; S "’"; O "go"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | |
750 | - [L; S "-"; L; S "’"; O "go"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | |
751 | - [L; S "-"; L; S "’"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns"); | |
752 | - [L; S "-"; L; S "’"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | |
753 | - [L; S "-"; L; S "’"; O "m"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | |
754 | - [L; S "-"; L; S "’"; O "m"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | |
755 | - [L; S "-"; L; S "’"; O "mu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | |
756 | - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | |
757 | - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns"); | |
758 | - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); | |
759 | - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:dat:p3" | _ -> failwith "acronym_patterns"); | |
760 | - [L; S "-"; L; S "’"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | |
761 | - [L; S "-"; L; S "’"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns"); | |
762 | - [L; S "-"; L; S "’"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); | |
763 | - [L; S "-"; L; S "’"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | |
764 | - [L; S "-"; L; S "’"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | |
765 | - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | |
766 | - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | |
767 | - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); | |
768 | - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | |
769 | - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | |
770 | - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); | |
771 | - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | |
772 | - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
773 | - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
774 | - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | |
775 | - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | |
776 | - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | |
777 | - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | |
778 | - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | |
779 | - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | |
780 | - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | |
781 | - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | |
782 | - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | |
783 | - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns"); | |
784 | - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | |
785 | - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:gen:p3" | _ -> failwith "acronym_patterns"); | |
786 | - ] | |
787 | - | |
788 | -let name_patterns = [ | |
789 | - [O "O"; S "’"; L], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns"); | |
790 | - [O "d"; S "’"; L], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns"); | |
791 | - [O "l"; S "’"; L], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns"); | |
792 | - [L; S "’"; O "s"], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns"); | |
793 | - [L; S "’"; O "sa"], (function [x;_;_] -> compose_lemma x "’s" "subst:sg:gen.acc:_" | _ -> failwith "name_patterns"); | |
794 | - ] | |
795 | - | |
796 | -let abr_patterns = [ | |
797 | - [O "b"; S "."; O "u"; S "."], (function [a;b;c;d] -> [ct [a;b] "bez" "prep:gen:nwok"; ct [c;d] "uwaga" "subst:pl:gen:f"] | _ -> failwith "abr_patterns"); | |
798 | - [O "b"; S "."; O "zm"; S "."], (function [a;b;c;d] -> [ct [a;b] "bez" "prep:gen:nwok"; ct [c;d] "zmiana" "subst:pl:gen:f"] | _ -> failwith "abr_patterns"); | |
799 | - [O "blm"], (function [a] -> st a [1,"bez","prep:gen:nwok";1,"liczba","subst:sg:gen:f";1,"mnogi","adj:sg:gen:f:pos"] | _ -> failwith "abr_patterns"); | |
800 | - [O "blp"], (function [a] -> st a [1,"bez","prep:gen:nwok";1,"liczba","subst:sg:gen:f";1,"pojedynczy","adj:sg:gen:f:pos"] | _ -> failwith "abr_patterns"); | |
801 | - [O "błp"; S "."], (function [a;b] -> std a b [2,"błogosławiony","adj:sg:gen:f:pos";1,"pamięć","subst:sg:gen:f"] | _ -> failwith "abr_patterns"); | |
802 | - [O "bm"], (function [a] -> st a [1,"bieżący","adj:sg:$C:m3:pos";1,"miesiąc","subst:sg:$C:m3"] | _ -> failwith "abr_patterns"); | |
803 | - [O "bm"; S "."], (function [a;b] -> std a b [1,"bieżący","adj:sg:$C:m3:pos";1,"miesiąc","subst:sg:$C:m3"] | _ -> failwith "abr_patterns"); | |
804 | - [O "bp"; S "."], (function [a;b] -> std a b [1,"błogosławiony","adj:sg:gen:f:pos";1,"pamięć","subst:sg:gen:f"] | _ -> failwith "abr_patterns"); | |
805 | - [O "br"], (function [a] -> st a [1,"bieżący","adj:sg:$C:m3:pos";1,"rok","subst:sg:$C:m3"] | _ -> failwith "abr_patterns"); | |
806 | - [O "br"; S "."], (function [a;b] -> std a b [1,"bieżący","adj:sg:$C:m3:pos";1,"rok","subst:sg:$C:m3"] | _ -> failwith "abr_patterns"); | |
807 | - [O "c"; S "."; O "d"; S "."; O "n"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "ciąg" "subst:sg:nom:m3"; ct [c;d] "daleki" "adj:sg:nom:m3:com"; ct [e;f] "nastąpić" "fin:sg:ter:perf"] | _ -> failwith "abr_patterns"); | |
808 | - [O "ccm"], (function [a] -> st a [1,"sześcienny","adj:_:$C:m3:pos";2,"centymetr","subst:_:$C:m3"] | _ -> failwith "abr_patterns"); | |
809 | - [O "cd"; S "."], (function [a;b] -> std a b [1,"ciąg","subst:sg:nom:m3";1,"daleki","adj:sg:nom:m3:com"] | _ -> failwith "abr_patterns"); | |
810 | - [O "cdn"; S "."], (function [a;b] -> std a b [1,"ciąg","subst:sg:nom:m3";1,"daleki","adj:sg:nom:m3:com";1,"nastąpić","fin:sg:ter:perf"] | _ -> failwith "abr_patterns"); | |
811 | - [O "cm"; O "3"], (function [a;b] -> [ct [a] "centymetr" "subst:_:$C:m3"; ct [b] "sześcienny" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); | |
812 | - [O "dcn"; S "."], (function [a;b] -> std a b [1,"daleki","adj:sg:nom:m3:com";1,"ciąg","subst:sg:nom:m3";1,"nastąpić","fin:sg:ter:perf"] | _ -> failwith "abr_patterns"); | |
813 | - [O "dm"; O "3"], (function [a;b] -> [ct [a] "decymetr" "subst:_:$C:m3"; ct [b] "sześcienny" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); | |
814 | - [O "ds"; S "."], (function [a;b] -> std a b [1,"do","prep:gen";1,"sprawa","subst:pl:gen:f"] | _ -> failwith "abr_patterns"); | |
815 | - [O "d"; O "/"; O "s"], (function [a;b;c] -> [ct [a;b] "do" "prep:gen"; ct [c] "sprawa" "subst:pl:gen:f"] | _ -> failwith "abr_patterns"); | |
816 | - [O "itd"; S "."], (function [a;b] -> std a b [1,"i","conj";1,"tak","adv:pos";1,"daleko","adv:com"] | _ -> failwith "abr_patterns"); | |
817 | - [O "itede"; S "."], (function [a;b] -> std a b [1,"i","conj";2,"tak","adv:pos";2,"daleko","adv:com"] | _ -> failwith "abr_patterns"); | |
818 | - [O "itp"; S "."], (function [a;b] -> std a b [1,"i","conj";1,"tym","adv";1,"podobny","adj:pl:nom:_:pos"] | _ -> failwith "abr_patterns"); | |
819 | - [O "jw"; S "."], (function [a;b] -> std a b [1,"jak","adv:pos";1,"wysoko","adv:com"] | _ -> failwith "abr_patterns"); | |
820 | - [O "JWP"], (function [a] -> st a [1,"jaśnie","adv:pos";1,"wielmożny","adj:_:$C:m1:pos";1,"pan","subst:_:$C:m1"] | _ -> failwith "abr_patterns"); | |
821 | - [O "JWP"], (function [a] -> st a [1,"jaśnie","adv:pos";1,"wielmożny","adj:_:$C:f:pos";1,"pani","subst:_:$C:f"] | _ -> failwith "abr_patterns"); | |
822 | - [O "km"; S "."; O "2"], (function [a;b;c] -> [ct [a;b] "kilometr" "subst:_:$C:m3"; ct [c] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); | |
823 | - [O "km"; O "2"], (function [a;b] -> [ct [a] "kilometr" "subst:_:$C:m3"; ct [b] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); | |
824 | - [O "km"; O "²"], (function [a;b] -> [ct [a] "kilometr" "subst:_:$C:m3"; ct [b] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); | |
825 | - [O "lm"; S "."], (function [a;b] -> std a b [1,"liczba","subst:sg:$C:f";1,"mnogi","adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns"); | |
826 | - [O "lp"; S "."], (function [a;b] -> std a b [1,"liczba","subst:sg:$C:f";1,"pojedynczy","adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns"); | |
827 | - [O "m"; S "."; O "in"; S "."], (function [a;b;c;d] -> [ct [a;b] "między" "prep:inst"; ct [c;d] "inny" "adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns"); | |
828 | - [O "m"; S "."; O "in"], (function [a;b;c] -> [ct [a;b] "między" "prep:inst"; ct [c] "inny" "adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns"); | |
829 | - [O "m"; S "."; O "inn"; S "."], (function [a;b;c;d] -> [ct [a;b] "między" "prep:inst"; ct [c;d] "inny" "adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns"); | |
830 | - [O "m"; S "."; O "st"; S "."], (function [a;b;c;d] -> [ct [a;b] "miasto" "subst:_:$C:n2"; ct [c;d] "stołeczny" "adj:_:$C:n2:pos"] | _ -> failwith "abr_patterns"); | |
831 | - [O "m"; O "^"; O "2"], (function [a;b;c] -> [ct [a] "metr" "subst:_:$C:m3"; ct [b;c] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); | |
832 | - [O "m"; O "2"], (function [a;b] -> [ct [a] "metr" "subst:_:$C:m3"; ct [b] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); | |
833 | - [O "m"; O "3"], (function [a;b] -> [ct [a] "metr" "subst:_:$C:m3"; ct [b] "sześcienny" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); | |
834 | - [O "min"; S "."], (function [a;b] -> std a b [1,"między","prep:inst";2,"inny","adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns"); | |
835 | - [O "mkw"; S "."], (function [a;b] -> std a b [1,"metr","subst:_:$C:m3";2,"kwadratowy","adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); | |
836 | - [O "n"; S "."; O "e"; S "."], (function [a;b;c;d] -> [ct [a;b] "nasz" "adj:sg:gen:f:pos"; ct [c;d] "era" "subst:sg:gen:f"] | _ -> failwith "abr_patterns"); | |
837 | - [O "n"; S "."; O "p"; S "."; O "m"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "nad" "prep:inst"; ct [c;d] "poziom" "subst:sg:inst:m3"; ct [e;f] "morze" "subst:sg:gen:n2"] | _ -> failwith "abr_patterns"); | |
838 | - [O "np"; S "."], (function [a;b] -> std a b [1,"na","prep:acc";1,"przykład","subst:sg:acc:m3"] | _ -> failwith "abr_patterns"); | |
839 | - [O "nt"; S "."], (function [a;b] -> std a b [1,"na","prep:acc";1,"temat","subst:sg:acc:m3"] | _ -> failwith "abr_patterns"); | |
840 | - [O "NTG"], (function [a] -> st a [1,"nie","qub";1,"ta","adj:sg:nom:f:pos";1,"grupa","subst:sg:nom:f"] | _ -> failwith "abr_patterns"); | |
841 | - [O "o"; S "."; O "o"; S "."], (function [a;b;c;d] -> [ct [a;b] "ograniczony" "adj:sg:$C:f:pos"; ct [c;d] "odpowiedzialność" "subst:sg:$C:f"] | _ -> failwith "abr_patterns"); | |
842 | - [O "p"; S "."; O "n"; S "."; O "e"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "przed" "prep:inst"; ct [c;d] "nasz" "adj:sg:inst:f:pos"; ct [e;f] "era" "subst:sg:inst:f"] | _ -> failwith "abr_patterns"); | |
843 | - [O "p"; S "."; O "o"; S "."], (function [a;b;c;d] -> [ct [a;b] "pełniący" "pact:_:_:m1.m2.m3:imperf:aff"; ct [c;d] "obowiązek" "subst:pl:acc:m3"] | _ -> failwith "abr_patterns"); | |
844 | - [O "p"; S "."; O "p"; S "."; O "m"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "pod" "prep:inst"; ct [c;d] "poziom" "subst:sg:inst:m3"; ct [e;f] "morze" "subst:sg:gen:n2"] | _ -> failwith "abr_patterns"); | |
845 | - [O "p"; S "."; O "t"; S "."], (function [a;b;c;d] -> [ct [a;b] "pod" "prep:inst:nwokc"; ct [c;d] "tytuł" "subst:sg:inst:m3"] | _ -> failwith "abr_patterns"); | |
846 | - [O "pn"; S "."], (function [a;b] -> std a b [1,"pod","prep:inst";1,"nazwa","subst:sg:inst:f"] | _ -> failwith "abr_patterns"); | |
847 | - [O "pne"; S "."], (function [a;b] -> std a b [1,"przed","prep:inst";1,"nasz","adj:sg:inst:f:pos";1,"era","subst:sg:inst:f"] | _ -> failwith "abr_patterns"); | |
848 | - [O "pt"; S "."], (function [a;b] -> std a b [1,"pod","prep:inst";1,"tytuł","subst:sg:inst:m3"] | _ -> failwith "abr_patterns"); | |
849 | - [O "PW"], (function [a] -> st a [1,"prywatny","adj:_:$C:f:pos";1,"wiadomość","subst:_:$C:f"] | _ -> failwith "abr_patterns"); | |
850 | - [O "pw"; S "."], (function [a;b] -> std a b [1,"pod","prep:inst";1,"wezwanie","subst:sg:inst:n2"] | _ -> failwith "abr_patterns"); | |
851 | -(* [O "S"; S "."; O "A"; S "."], (function [a;b;c;d] -> [ct [a;b] "spółka" "subst:sg:$C:f"; ct [c;d] "akcyjny" "adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns"); | |
852 | - [O "s"; S "."; O "c"; S "."], (function [a;b;c;d] -> [ct [a;b] "spółka" "subst:sg:$C:f"; ct [c;d] "cywilny" "adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns");*) | |
853 | -(* [O "SA"], (function [a] -> st a [1,"spółka","subst:sg:$C:f";1,"akcyjny","adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns"); *) | |
854 | - [O "ś"; S "."; O "p"; S "."], (function [a;b;c;d] -> [ct [a;b] "święty" "adj:sg:gen:f:pos"; ct [c;d] "pamięć" "subst:sg:gen:f"] | _ -> failwith "abr_patterns"); | |
855 | - [O "śp"; S "."], (function [a;b] -> std a b [1,"święty","adj:sg:gen:f:pos";1,"pamięć","subst:sg:gen:f"] | _ -> failwith "abr_patterns"); | |
856 | - [O "tgz"; S "."], (function [a;b] -> std a b [2,"tak","adv";1,"zwać","ppas:_:_:_:_:aff"] | _ -> failwith "abr_patterns"); | |
857 | - [O "tj"; S "."], (function [a;b] -> std a b [1,"to","subst:sg:nom:n2";1,"być","fin:sg:ter:imperf"] | _ -> failwith "abr_patterns"); | |
858 | - [O "tzn"; S "."], (function [a;b] -> std a b [1,"to","subst:sg:nom:n2";2,"znaczyć","fin:sg:ter:imperf"] | _ -> failwith "abr_patterns"); | |
859 | - [O "tzw"; S "."], (function [a;b] -> std a b [1,"tak","adv:pos";2,"zwać","ppas:_:_:_:imperf:aff"] | _ -> failwith "abr_patterns"); | |
860 | - [O "ub"; S "."; O "r"; S "."], (function [a;b;c;d] -> [ct [a;b] "ubiegły" "adj:sg:$C:m3:pos"; ct [c;d] "rok" "subst:sg:$C:m3"] | _ -> failwith "abr_patterns"); | |
861 | - [O "w"; S "."; O "w"; S "."], (function [a;b;c;d] -> [ct [a;b] "wysoko" "adv:com"; ct [c;d] "wymienić" "ppas:_:_:_:perf:aff"] | _ -> failwith "abr_patterns"); | |
862 | - [O "w"; O "/"; O "m"], (function [a;b;c] -> [ct [a;b] "w" "prep:loc"; ct [c] "miejsce" "subst:_:loc:m3"] | _ -> failwith "abr_patterns"); | |
863 | - [O "w"; O "/"; O "w"], (function [a;b;c] -> [ct [a;b] "wysoko" "adv:com"; ct [c] "wymienić" "ppas:_:_:_:perf:aff"] | _ -> failwith "abr_patterns"); | |
864 | - [O "ws"; S "."], (function [a;b] -> std a b [1,"w","prep:loc:nwok";1,"sprawa","subst:sg:loc:f"] | _ -> failwith "abr_patterns"); | |
865 | - [O "ww"; S "."], (function [a;b] -> std a b [1,"wysoko","adv:com";1,"wymieniony","ppas:_:_:_:perf:aff"] | _ -> failwith "abr_patterns"); | |
866 | - ] |
tokenizer/eniam-tokenizer-1.0/ENIAMpatterns.ml deleted
1 | -(* | |
2 | - * ENIAMtokenizer, a tokenizer for Polish | |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | - * | |
6 | - * This library is free software: you can redistribute it and/or modify | |
7 | - * it under the terms of the GNU Lesser General Public License as published by | |
8 | - * the Free Software Foundation, either version 3 of the License, or | |
9 | - * (at your option) any later version. | |
10 | - * | |
11 | - * This library is distributed in the hope that it will be useful, | |
12 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | - * GNU General Public License for more details. | |
15 | - * | |
16 | - * You should have received a copy of the GNU Lesser General Public License | |
17 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | - *) | |
19 | - | |
20 | -open Xstd | |
21 | -open Printf | |
22 | -open ENIAMtokenizerTypes | |
23 | - | |
24 | -let rec flatten_tokens rev_variants = function | |
25 | - | [] -> rev_variants | |
26 | - | Token t :: l -> flatten_tokens (Xlist.map rev_variants (fun rev_variant -> Token t :: rev_variant)) l | |
27 | - | Seq seq :: l -> flatten_tokens rev_variants (seq @ l) | |
28 | - | Variant variants :: l -> flatten_tokens (List.flatten (Xlist.map variants (fun variant -> flatten_tokens rev_variants [variant]))) l | |
29 | - | |
30 | -let rec normalize_tokens rev = function | |
31 | - [] -> List.rev rev | |
32 | - | Token t :: l -> normalize_tokens (Token t :: rev) l | |
33 | - | Seq seq :: l -> normalize_tokens rev (seq @ l) | |
34 | - | Variant[t] :: l -> normalize_tokens rev (t :: l) | |
35 | - | Variant variants :: l -> | |
36 | - let variants = flatten_tokens [[]] [Variant variants] in | |
37 | - let variants = Xlist.map variants (fun rev_seq -> | |
38 | - match List.rev rev_seq with | |
39 | - [] -> failwith "normalize_tokens" | |
40 | - | [t] -> t | |
41 | - | seq -> Seq seq) in | |
42 | - let t = match variants with | |
43 | - [] -> failwith "normalize_tokens" | |
44 | - | [t] -> t | |
45 | - | variants -> Variant variants in | |
46 | - normalize_tokens (t :: rev) l | |
47 | - | |
48 | -let concat_orths l = | |
49 | - String.concat "" (Xlist.map l (fun t -> t.orth)) | |
50 | - | |
51 | -let concat_orths2 l = | |
52 | - String.concat "" (Xlist.map l (fun t -> ENIAMtokens.get_orth t.token)) | |
53 | - | |
54 | -let concat_intnum = function | |
55 | - [{token=Dig(v4,_)};_;{token=Dig(v3,_)};_;{token=Dig(v2,_)};_;{token=Dig(v1,_)}] -> v4^v3^v2^v1 | |
56 | - | [{token=Dig(v3,_)};_;{token=Dig(v2,_)};_;{token=Dig(v1,_)}] -> v3^v2^v1 | |
57 | - | [{token=Dig(v2,_)};_;{token=Dig(v1,_)}] -> v2^v1 | |
58 | - | [{token=Dig(v1,_)}] -> v1 | |
59 | - | _ -> failwith "concat_intnum" | |
60 | - | |
61 | -let dig_value t = | |
62 | - match t.token with | |
63 | - Dig(v,_) -> v | |
64 | - | _ -> failwith "dig_value" | |
65 | - | |
66 | -(* FIXME: problem z ordnum - wyklucza year co stanowi problem na końcu zdania *) | |
67 | -let digit_patterns1 = [ (* FIXME: problem z nadmiarowymi interpretacjami - trzeba uwzględnić w preprocesingu brak spacji - albo w dezambiguacji *) | |
68 | - [D "dig"; S "."; D "dig"; S "."; D "dig"; S "."; D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"])); | |
69 | - [D "dig"; S "."; D "dig"; S "."; D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"])); | |
70 | - [D "dig"; S "."; D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"])); | |
71 | - [D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"])); | |
72 | -(* [D "dig"], "obj-id"; *) | |
73 | - [D "pref3dig"; S "."; D "3dig"; S "."; D "3dig"; S "."; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum")); | |
74 | - [D "pref3dig"; S "."; D "3dig"; S "."; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum")); | |
75 | - [D "pref3dig"; S "."; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum")); | |
76 | - [D "pref3dig"; S " "; D "3dig"; S " "; D "3dig"; S " "; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum")); | |
77 | - [D "pref3dig"; S " "; D "3dig"; S " "; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum")); | |
78 | - [D "pref3dig"; S " "; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum")); | |
79 | - [D "intnum"; S "."], (function [token;_] -> Dig(concat_intnum [token],"ordnum") | _ -> failwith "digit_patterns1"); (* FIXME: to nie powinno wykluczać innych interpretacji *) | |
80 | - [D "day"; S "."; D "month"; S "."; D "year"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns2"); | |
81 | - [D "day"; S "."; RD "month"; S "."; D "year"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns3"); | |
82 | - [D "day"; S " "; RD "month"; S " "; D "year"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns3"); | |
83 | - [D "day"; S "."; D "month"; S "."; D "2dig"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns2"); | |
84 | - [D "day"; S "."; RD "month"; S "."; D "2dig"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns3"); | |
85 | - [D "day"; S "."; D "month"; S "."], (function [day;_;month;_] -> Compound("day-month",[day.token;month.token]) | _ -> failwith "digit_patterns4"); | |
86 | - [D "hour"; S "."; D "minute"], (function [hour;_;minute] -> Compound("hour-minute",[hour.token;minute.token]) | _ -> failwith "digit_patterns5"); | |
87 | - [D "hour"; S ":"; D "minute"], (function [hour;_;minute] -> Compound("hour-minute",[hour.token;minute.token]) | _ -> failwith "digit_patterns6"); | |
88 | - [D "intnum"; S ":"; D "intnum"], (function [x;_;y] -> Compound("match-result",[x.token;y.token]) | _ -> failwith "digit_patterns7"); | |
89 | - ] (* bez 1 i *2 *3 *4 mamy rec *) (* w morfeuszu zawsze num:pl?*) | |
90 | - | |
91 | -let digit_patterns2 = [ | |
92 | - [D "intnum"; S ","; D "dig"], (function [x;_;y] -> Dig(dig_value x ^ "," ^ dig_value y,"realnum") | _ -> failwith "digit_patterns8"); | |
93 | -(* [S "-"; D "intnum"; S ","; D "dig"], (function [_;x;_;y] -> Dig("-" ^ dig_value x ^ "," ^ dig_value y,"realnum") | _ -> failwith "digit_patterns9"); | |
94 | - [S "-"; D "intnum"], (function [_;x] -> Dig("-" ^ dig_value x,"realnum") | _ -> failwith "digit_patterns10");*) | |
95 | - [S "’"; D "2dig"], (function [_;x] -> Dig("’" ^ dig_value x,"year") | _ -> failwith "digit_patterns12"); | |
96 | -(* [D "intnum"], "realnum"; *) | |
97 | - ] | |
98 | - | |
99 | -let compose_latek_lemma t interp = | |
100 | - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-latek", interp) | |
101 | - | |
102 | -let compose_latka_lemma t interp = | |
103 | - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-latka", interp) | |
104 | - | |
105 | -let compose_latek_int_lemma t t2 interp = | |
106 | - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-" ^ ENIAMtokens.get_orth t2.token ^ "-latek", interp) | |
107 | - | |
108 | -let compose_latka_int_lemma t t2 interp = | |
109 | - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-" ^ ENIAMtokens.get_orth t2.token ^ "-latka", interp) | |
110 | - | |
111 | -let digit_patterns3 = [ | |
112 | - [S "-"; D "intnum"], (function [_;x] -> Dig("-" ^ dig_value x,"intnum") | _ -> failwith "digit_patterns10"); | |
113 | - [S "-"; D "realnum"], (function [_;x] -> Dig("-" ^ dig_value x,"realnum") | _ -> failwith "digit_patterns10"); | |
114 | - [D "intnum"; S "-"; D "intnum"], (function [x;_;y] -> Compound("intnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns11"); | |
115 | - [D "realnum"; S "-"; D "realnum"], (function [x;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *) | |
116 | - [D "intnum"; S "-"; D "realnum"], (function [x;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *) | |
117 | - [D "realnum"; S "-"; D "intnum"], (function [x;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *) | |
118 | - [C "date"; S "-"; C "date"], (function [x;_;y] -> Compound("date-interval",[x.token;y.token]) | _ -> failwith "digit_patterns13"); | |
119 | - [C "day-month"; S "-"; C "day-month"], (function [x;_;y] -> Compound("day-month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns14"); | |
120 | - [D "day"; S "-"; D "day"], (function [x;_;y] -> Compound("day-interval",[x.token;y.token]) | _ -> failwith "digit_patterns15"); | |
121 | - [D "month"; S "-"; D "month"], (function [x;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16"); | |
122 | - [RD "month"; S "-"; RD "month"], (function [x;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns17"); | |
123 | - [D "year"; S "-"; D "year"], (function [x;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16"); | |
124 | - [D "year"; S "-"; D "2dig"], (function [x;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16"); | |
125 | - [C "hour-minute"; S "-"; C "hour-minute"], (function [x;_;y] -> Compound("hour-minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns18"); | |
126 | - [D "hour"; S "-"; D "hour"], (function [x;_;y] -> Compound("hour-interval",[x.token;y.token]) | _ -> failwith "digit_patterns19"); | |
127 | - [D "minute"; S "-"; D "minute"], (function [x;_;y] -> Compound("minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns20"); | |
128 | - [RD "roman"; S "-"; RD "roman"], (function [x;_;y] -> Compound("roman-interval",[x.token;y.token]) | _ -> failwith "digit_patterns21"); | |
129 | - [D "intnum"; S " "; S "-"; S " "; D "intnum"], (function [x;_;_;_;y] -> Compound("intnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns11"); | |
130 | - [D "realnum"; S " "; S "-"; S " "; D "realnum"], (function [x;_;_;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *) | |
131 | - [D "intnum"; S " "; S "-"; S " "; D "realnum"], (function [x;_;_;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *) | |
132 | - [D "realnum"; S " "; S "-"; S " "; D "intnum"], (function [x;_;_;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *) | |
133 | - [C "date"; S " "; S "-"; S " "; C "date"], (function [x;_;_;_;y] -> Compound("date-interval",[x.token;y.token]) | _ -> failwith "digit_patterns13"); | |
134 | - [C "day-month"; S " "; S "-"; S " "; C "day-month"], (function [x;_;_;_;y] -> Compound("day-month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns14"); | |
135 | - [D "day"; S " "; S "-"; S " "; D "day"], (function [x;_;_;_;y] -> Compound("day-interval",[x.token;y.token]) | _ -> failwith "digit_patterns15"); | |
136 | - [D "month"; S " "; S "-"; S " "; D "month"], (function [x;_;_;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16"); | |
137 | - [RD "month"; S " "; S "-"; S " "; RD "month"], (function [x;_;_;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns17"); | |
138 | - [D "year"; S " "; S "-"; S " "; D "year"], (function [x;_;_;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16"); | |
139 | - [D "year"; S " "; S "-"; S " "; D "2dig"], (function [x;_;_;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16"); | |
140 | - [C "hour-minute"; S " "; S "-"; S " "; C "hour-minute"], (function [x;_;_;_;y] -> Compound("hour-minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns18"); | |
141 | - [D "hour"; S " "; S "-"; S " "; D "hour"], (function [x;_;_;_;y] -> Compound("hour-interval",[x.token;y.token]) | _ -> failwith "digit_patterns19"); | |
142 | - [D "minute"; S " "; S "-"; S " "; D "minute"], (function [x;_;_;_;y] -> Compound("minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns20"); | |
143 | - [RD "roman"; S " "; S "-"; S " "; RD "roman"], (function [x;_;_;_;y] -> Compound("roman-interval",[x.token;y.token]) | _ -> failwith "digit_patterns21"); | |
144 | - [D "intnum"; S "-"; O "latek"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:nom:m1" | _ -> failwith "digit_patterns22"); | |
145 | - [D "intnum"; S "-"; O "latka"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:gen.acc:m1" | _ -> failwith "digit_patterns22"); | |
146 | - [D "intnum"; S "-"; O "latkowi"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:dat:m1" | _ -> failwith "digit_patterns22"); | |
147 | - [D "intnum"; S "-"; O "latkiem"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:inst:m1" | _ -> failwith "digit_patterns22"); | |
148 | - [D "intnum"; S "-"; O "latku"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:loc.voc:m1" | _ -> failwith "digit_patterns22"); | |
149 | - [D "intnum"; S "-"; O "latkowie"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:nom.voc:m1" | _ -> failwith "digit_patterns22"); | |
150 | - [D "intnum"; S "-"; O "latków"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:gen.acc:m1" | _ -> failwith "digit_patterns22"); | |
151 | - [D "intnum"; S "-"; O "latkom"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:dat:m1" | _ -> failwith "digit_patterns22"); | |
152 | - [D "intnum"; S "-"; O "latkami"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:inst:m1" | _ -> failwith "digit_patterns22"); | |
153 | - [D "intnum"; S "-"; O "latkach"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:loc:m1" | _ -> failwith "digit_patterns22"); | |
154 | - [D "intnum"; S "-"; O "latka"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:nom:f" | _ -> failwith "digit_patterns22"); | |
155 | - [D "intnum"; S "-"; O "latki"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:gen:f" | _ -> failwith "digit_patterns22"); | |
156 | - [D "intnum"; S "-"; O "latce"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:dat.loc:f" | _ -> failwith "digit_patterns22"); | |
157 | - [D "intnum"; S "-"; O "latkę"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:acc:f" | _ -> failwith "digit_patterns22"); | |
158 | - [D "intnum"; S "-"; O "latką"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:inst:f" | _ -> failwith "digit_patterns22"); | |
159 | - [D "intnum"; S "-"; O "latko"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:voc:f" | _ -> failwith "digit_patterns22"); | |
160 | - [D "intnum"; S "-"; O "latki"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:nom.acc.voc:f" | _ -> failwith "digit_patterns22"); | |
161 | - [D "intnum"; S "-"; O "latek"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:gen:f" | _ -> failwith "digit_patterns22"); | |
162 | - [D "intnum"; S "-"; O "latkom"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:dat:f" | _ -> failwith "digit_patterns22"); | |
163 | - [D "intnum"; S "-"; O "latkami"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:inst:f" | _ -> failwith "digit_patterns22"); | |
164 | - [D "intnum"; S "-"; O "latkach"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:loc:f" | _ -> failwith "digit_patterns22"); | |
165 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latek"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:nom:m1" | _ -> failwith "digit_patterns22"); | |
166 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latka"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:gen.acc:m1" | _ -> failwith "digit_patterns22"); | |
167 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkowi"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:dat:m1" | _ -> failwith "digit_patterns22"); | |
168 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkiem"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:inst:m1" | _ -> failwith "digit_patterns22"); | |
169 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latku"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:loc.voc:m1" | _ -> failwith "digit_patterns22"); | |
170 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkowie"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:nom.voc:m1" | _ -> failwith "digit_patterns22"); | |
171 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latków"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:gen.acc:m1" | _ -> failwith "digit_patterns22"); | |
172 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkom"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:dat:m1" | _ -> failwith "digit_patterns22"); | |
173 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkami"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:inst:m1" | _ -> failwith "digit_patterns22"); | |
174 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkach"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:loc:m1" | _ -> failwith "digit_patterns22"); | |
175 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latka"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:nom:f" | _ -> failwith "digit_patterns22"); | |
176 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latki"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:gen:f" | _ -> failwith "digit_patterns22"); | |
177 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latce"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:dat.loc:f" | _ -> failwith "digit_patterns22"); | |
178 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkę"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:acc:f" | _ -> failwith "digit_patterns22"); | |
179 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latką"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:inst:f" | _ -> failwith "digit_patterns22"); | |
180 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latko"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:voc:f" | _ -> failwith "digit_patterns22"); | |
181 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latki"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:nom.acc.voc:f" | _ -> failwith "digit_patterns22"); | |
182 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latek"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:gen:f" | _ -> failwith "digit_patterns22"); | |
183 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkom"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:dat:f" | _ -> failwith "digit_patterns22"); | |
184 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkami"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:inst:f" | _ -> failwith "digit_patterns22"); | |
185 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkach"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:loc:f" | _ -> failwith "digit_patterns22"); | |
186 | - ] | |
187 | - | |
188 | -let url_patterns1 = [ | |
189 | - [L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url")); | |
190 | - [L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url")); | |
191 | - [L; S "."; L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url")); | |
192 | - [L; S "."; L; S "."; L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url")); | |
193 | - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url")); | |
194 | - [L; S "."; L; S "-"; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url")); | |
195 | - [L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url")); | |
196 | - [L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url")); | |
197 | - [L; S "."; L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url")); | |
198 | - [L; S "."; L; S "."; L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url")); | |
199 | - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url")); | |
200 | - [L; S "."; L; S "-"; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url")); | |
201 | - [L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url")); | |
202 | - [L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url")); | |
203 | - [L; S "."; L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url")); | |
204 | - [L; S "."; L; S "."; L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url")); | |
205 | - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url")); | |
206 | - [L; S "."; L; S "-"; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url")); | |
207 | - [L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url")); | |
208 | - [L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url")); | |
209 | - [L; S "."; L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url")); | |
210 | - [L; S "."; L; S "."; L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url")); | |
211 | - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url")); | |
212 | - [L; S "."; L; S "-"; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url")); | |
213 | - [L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url")); | |
214 | - [L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url")); | |
215 | - [L; S "."; L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url")); | |
216 | - [L; S "."; L; S "."; L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url")); | |
217 | - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url")); | |
218 | - [L; S "."; L; S "-"; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url")); | |
219 | - [L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url")); | |
220 | - [L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url")); | |
221 | - [L; S "."; L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url")); | |
222 | - [L; S "."; L; S "."; L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url")); | |
223 | - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url")); | |
224 | - [L; S "."; L; S "-"; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url")); | |
225 | - [L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url")); | |
226 | - [L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url")); | |
227 | - [L; S "."; L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url")); | |
228 | - [L; S "."; L; S "."; L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url")); | |
229 | - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url")); | |
230 | - [L; S "."; L; S "-"; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url")); | |
231 | - [L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url")); | |
232 | - [L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url")); | |
233 | - [L; S "."; L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url")); | |
234 | - [L; S "."; L; S "."; L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url")); | |
235 | - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url")); | |
236 | - [L; S "."; L; S "-"; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url")); | |
237 | - ] | |
238 | - | |
239 | -let url_patterns2 = [ | |
240 | - [L; S "@"; D "url"], (function l -> Dig(concat_orths2 l,"email")); | |
241 | - [O "http"; S ":"; S "/"; S "/"; D "url"], (function l -> Dig(concat_orths2 l,"email")); | |
242 | - ] | |
243 | - | |
244 | -let url_patterns3 = [ | |
245 | - [D "url"; S "/"], (function l -> Dig(concat_orths2 l,"url")); | |
246 | - [D "url"; S "/"; L], (function l -> Dig(concat_orths2 l,"url")); | |
247 | - [D "url"; S "/"; L; S "."; L], (function l -> Dig(concat_orths2 l,"url")); | |
248 | - ] | |
249 | - | |
250 | -type matching = { | |
251 | - prefix: tokens list; | |
252 | - matched: token_record list; | |
253 | - suffix: tokens list; | |
254 | - pattern: pat list; | |
255 | - command: token_record list -> token; | |
256 | - command_abr: token_record list -> tokens list; | |
257 | - } | |
258 | - | |
259 | -let execute_command matching = | |
260 | - let l = List.rev matching.matched in | |
261 | - let len = Xlist.fold l 0 (fun len t -> t.len + len) in | |
262 | - Seq((List.rev matching.prefix) @ [Token{empty_token with | |
263 | - orth=concat_orths l; | |
264 | - beg=(List.hd l).beg; | |
265 | - len=len; | |
266 | - next=(List.hd l).beg+len; | |
267 | - token=matching.command l; | |
268 | - (*weight=0.;*) (* FIXME: dodać wagi do konkretnych reguł i uwzględnić wagi maczowanych tokenów *) | |
269 | - attrs=ENIAMtokens.merge_attrs l}] @ matching.suffix) | |
270 | - | |
271 | -let execute_abr_command matching = | |
272 | - let l = List.rev matching.matched in | |
273 | - Seq((List.rev matching.prefix) @ (matching.command_abr l) @ matching.suffix) | |
274 | - | |
275 | -let match_token = function | |
276 | - D cat, Dig(_,cat2) -> cat = cat2 | |
277 | - | C s, Compound(s2,_) -> s = s2 | |
278 | - | S s, Symbol s2 -> s = s2 | |
279 | - | RD cat, RomanDig(_,cat2) -> cat = cat2 | |
280 | - | O pat, Dig(s,"dig") -> pat = s | |
281 | - | O pat, Symbol s -> pat = s | |
282 | - | O pat, SmallLetter orth -> pat = orth | |
283 | - | O pat, CapLetter(orth,lc) -> pat = orth | |
284 | - | O pat, AllSmall orth -> pat = orth | |
285 | - | O pat, AllCap(orth,lc,lc2) -> pat = orth | |
286 | - | O pat, FirstCap(orth,lc,_,_) -> pat = orth | |
287 | - | O pat, SomeCap orth -> pat = orth | |
288 | - | L, SmallLetter _ -> true | |
289 | - | L, CapLetter _ -> true | |
290 | - | L, AllSmall _ -> true | |
291 | - | L, AllCap _ -> true | |
292 | - | L, FirstCap _ -> true | |
293 | - | L, SomeCap _ -> true | |
294 | - | CL, CapLetter _ -> true | |
295 | - | CL, AllCap _ -> true | |
296 | - | CL, SomeCap _ -> true | |
297 | - | _ -> false | |
298 | - | |
299 | -let rec find_first_token matching pat = function | |
300 | - Token t -> if match_token (pat,t.token) then [{matching with matched = t :: matching.matched}] else [] | |
301 | - | Seq l -> Xlist.map (find_first_token matching pat (List.hd (List.rev l))) (fun matching -> {matching with prefix = matching.prefix @ (List.tl (List.rev l))}) | |
302 | - | Variant l -> List.flatten (Xlist.map l (find_first_token matching pat)) | |
303 | - | |
304 | -let rec find_middle_token matching pat = function | |
305 | - Token t -> if match_token (pat,t.token) then [{matching with matched = t :: matching.matched}] else [] | |
306 | - | Seq _ -> [] | |
307 | - | Variant l -> List.flatten (Xlist.map l (find_middle_token matching pat)) | |
308 | - | |
309 | -let rec find_last_token matching pat = function | |
310 | - Token t -> if match_token (pat,t.token) then [{matching with matched = t :: matching.matched}] else [] | |
311 | - | Seq l -> Xlist.map (find_last_token matching pat (List.hd l)) (fun matching -> {matching with suffix = matching.suffix @ (List.tl l)}) | |
312 | - | Variant l -> List.flatten (Xlist.map l (find_last_token matching pat)) | |
313 | - | |
314 | -let rec find_pattern_tail matchings = function | |
315 | - [] -> raise Not_found | |
316 | - | token :: l -> | |
317 | - let found,finished = Xlist.fold matchings ([],[]) (fun (found,finished) matching -> | |
318 | - match matching.pattern with | |
319 | - [pat] -> found, (find_last_token {matching with pattern=[]} pat token) @ finished | |
320 | - | pat :: pattern -> (find_middle_token {matching with pattern=pattern} pat token) @ found, finished | |
321 | - | _ -> failwith "find_pattern: ni") in | |
322 | - (try | |
323 | - if found = [] then raise Not_found else | |
324 | - find_pattern_tail found l | |
325 | - with Not_found -> | |
326 | - let finished = List.flatten (Xlist.map finished (fun matching -> try [execute_command matching] with Not_found -> [])) in | |
327 | - if finished = [] then raise Not_found else Variant finished,l) | |
328 | - | |
329 | -(* wzorce nie mogą mieć długości 1 *) | |
330 | -let rec find_pattern matchings rev = function | |
331 | - token :: l -> | |
332 | - let found = Xlist.fold matchings [] (fun found matching -> | |
333 | - match matching.pattern with | |
334 | - pat :: pattern -> (find_first_token {matching with pattern=pattern} pat token) @ found | |
335 | - | [] -> failwith "find_pattern: empty pattern") in | |
336 | - if found = [] then find_pattern matchings (token :: rev) l else | |
337 | - (try | |
338 | - let token,l = find_pattern_tail found l in | |
339 | - find_pattern matchings (token :: rev) l | |
340 | - with Not_found -> find_pattern matchings (token :: rev) l) | |
341 | - | [] -> List.rev rev | |
342 | - | |
343 | -let find_patterns patterns tokens = | |
344 | - find_pattern (Xlist.map patterns (fun (pattern,command) -> | |
345 | - {prefix=[]; matched=[]; suffix=[]; pattern=pattern; command=command; command_abr=(fun _ -> [])})) [] tokens | |
346 | - | |
347 | -let rec find_abr_pattern_tail matchings = function | |
348 | - [] -> raise Not_found | |
349 | - | token :: l -> | |
350 | - let found,finished = Xlist.fold matchings ([],[]) (fun (found,finished) matching -> | |
351 | - match matching.pattern with | |
352 | - [pat] -> found, (find_last_token {matching with pattern=[]} pat token) @ finished | |
353 | - | pat :: pattern -> (find_middle_token {matching with pattern=pattern} pat token) @ found, finished | |
354 | - | [] -> found, matching :: finished) in | |
355 | - (try | |
356 | - if found = [] then raise Not_found else | |
357 | - find_abr_pattern_tail found l | |
358 | - with Not_found -> | |
359 | - let finished = List.flatten (Xlist.map finished (fun matching -> try [execute_abr_command matching] with Not_found -> [])) in | |
360 | - if finished = [] then raise Not_found else Variant finished,l) | |
361 | - | |
362 | -let rec find_abr_pattern matchings rev = function | |
363 | - token :: l -> | |
364 | - let found = Xlist.fold matchings [] (fun found matching -> | |
365 | - match matching.pattern with | |
366 | - pat :: pattern -> (find_first_token {matching with pattern=pattern} pat token) @ found | |
367 | - | [] -> failwith "find_abr_pattern: empty pattern") in | |
368 | - if found = [] then find_abr_pattern matchings (token :: rev) l else | |
369 | - (try | |
370 | - let token,l = find_abr_pattern_tail found l in | |
371 | - find_abr_pattern matchings (token :: rev) l | |
372 | - with Not_found -> find_abr_pattern matchings (token :: rev) l) | |
373 | - | [] -> List.rev rev | |
374 | - | |
375 | -let find_abr_patterns patterns tokens = | |
376 | - find_abr_pattern (Xlist.map patterns (fun (pattern,command) -> | |
377 | - {prefix=[]; matched=[]; suffix=[]; pattern=pattern; command=(fun _ -> Symbol ""); command_abr=command})) [] tokens | |
378 | - | |
379 | -let find_replacement_patterns tokens = | |
380 | - let tokens = find_patterns digit_patterns1 tokens in | |
381 | - let tokens = normalize_tokens [] tokens in | |
382 | - let tokens = find_patterns digit_patterns2 tokens in | |
383 | - let tokens = normalize_tokens [] tokens in | |
384 | - let tokens = find_patterns digit_patterns3 tokens in | |
385 | - let tokens = normalize_tokens [] tokens in | |
386 | - let tokens = find_patterns ENIAMacronyms.acronym_patterns tokens in | |
387 | - let tokens = normalize_tokens [] tokens in | |
388 | - let tokens = find_patterns ENIAMacronyms.mte_patterns tokens in | |
389 | - let tokens = normalize_tokens [] tokens in | |
390 | -(* Xlist.iter tokens (fun t -> print_endline (ENIAMtokens.string_of_tokens 0 t)); *) | |
391 | - let tokens = find_patterns ENIAMacronyms.name_patterns tokens in | |
392 | -(* Xlist.iter tokens (fun t -> print_endline (ENIAMtokens.string_of_tokens 0 t)); *) | |
393 | - let tokens = normalize_tokens [] tokens in | |
394 | - let tokens = find_patterns url_patterns1 tokens in | |
395 | - let tokens = normalize_tokens [] tokens in | |
396 | - let tokens = find_patterns url_patterns2 tokens in | |
397 | - let tokens = normalize_tokens [] tokens in | |
398 | - let tokens = find_patterns url_patterns3 tokens in | |
399 | - let tokens = normalize_tokens [] tokens in | |
400 | -(* Xlist.iter tokens (fun t -> print_endline (ENIAMtokens.string_of_tokens 0 t)); *) | |
401 | - tokens | |
402 | - | |
403 | -let rec set_next_id n = function | |
404 | - Token t -> Token{t with next=n} | |
405 | - | Seq l -> | |
406 | - (match List.rev l with | |
407 | - t :: l -> Seq(List.rev ((set_next_id n t) :: l)) | |
408 | - | [] -> failwith "set_next_id n") | |
409 | - | Variant l -> Variant(Xlist.map l (set_next_id n)) | |
410 | - | |
411 | -let rec remove_spaces rev = function | |
412 | - [] -> List.rev rev | |
413 | - | x :: Token{token=Symbol " "; next=n} :: l -> remove_spaces ((set_next_id n x) :: rev) l | |
414 | - | Token{token=Symbol " "} :: l -> remove_spaces rev l | |
415 | - | x :: l -> remove_spaces (x :: rev) l |
tokenizer/eniam-tokenizer-1.0/ENIAMtokenizer.ml deleted
1 | -(* | |
2 | - * ENIAMtokenizer, a tokenizer for Polish | |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | - * | |
6 | - * This library is free software: you can redistribute it and/or modify | |
7 | - * it under the terms of the GNU Lesser General Public License as published by | |
8 | - * the Free Software Foundation, either version 3 of the License, or | |
9 | - * (at your option) any later version. | |
10 | - * | |
11 | - * This library is distributed in the hope that it will be useful, | |
12 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | - * GNU General Public License for more details. | |
15 | - * | |
16 | - * You should have received a copy of the GNU Lesser General Public License | |
17 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | - *) | |
19 | - | |
20 | -open Xstd | |
21 | -open ENIAMtokenizerTypes | |
22 | - | |
23 | -let string_of = | |
24 | - ENIAMtokens.string_of_tokens | |
25 | - | |
26 | -let parse query = | |
27 | - let l = Xunicode.classified_chars_of_utf8_string query in | |
28 | - let l = ENIAMtokens.tokenize l in | |
29 | - let l = ENIAMpatterns.normalize_tokens [] l in | |
30 | - let l = ENIAMpatterns.find_replacement_patterns l in | |
31 | - let l = ENIAMpatterns.remove_spaces [] l in | |
32 | - let l = ENIAMpatterns.find_abr_patterns ENIAMacronyms.abr_patterns l in | |
33 | - let l = ENIAMpatterns.normalize_tokens [] l in | |
34 | - l |
tokenizer/eniam-tokenizer-1.0/ENIAMtokenizerTypes.ml deleted
1 | -(* | |
2 | - * ENIAMtokenizer, a tokenizer for Polish | |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | - * | |
6 | - * This library is free software: you can redistribute it and/or modify | |
7 | - * it under the terms of the GNU Lesser General Public License as published by | |
8 | - * the Free Software Foundation, either version 3 of the License, or | |
9 | - * (at your option) any later version. | |
10 | - * | |
11 | - * This library is distributed in the hope that it will be useful, | |
12 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | - * GNU General Public License for more details. | |
15 | - * | |
16 | - * You should have received a copy of the GNU Lesser General Public License | |
17 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | - *) | |
19 | - | |
20 | -open Xstd | |
21 | - | |
22 | -(* Długość pojedynczego znaku w tekście *) | |
23 | -let factor = 100 | |
24 | - | |
25 | -type token = | |
26 | - SmallLetter of string (* orth *) | |
27 | - | CapLetter of string * string (* orth * lowercase *) | |
28 | - | AllSmall of string (* orth *) | |
29 | - | AllCap of string * string * string (* orth * lowercase * all lowercase *) | |
30 | - | FirstCap of string * string * string * string (* orth * all lowercase * first letter uppercase * first letter lowercase *) | |
31 | - | SomeCap of string (* orth *) | |
32 | - | RomanDig of string * string (* value * cat *) | |
33 | - | Interp of string (* orth *) | |
34 | - | Symbol of string (* orth *) | |
35 | - | Dig of string * string (* value * cat *) | |
36 | - | Other of string (* orth *) | |
37 | - | Lemma of string * string * string list list list (* lemma * cat * interp *) | |
38 | - | Proper of string * string * string list list list * string list (* lemma * cat * interp * senses *) | |
39 | -(* | Sense of string * string * string list list list * (string * string * string list) list (* lemma * cat * interp * senses *) *) | |
40 | - | Compound of string * token list (* sense * components *) | |
41 | - (* | Tokens of string * int list (*cat * token id list *) *) | |
42 | - | |
43 | -(* Tekst reprezentuję jako zbiór obiektów typu token_record zawierających | |
44 | - informacje o poszczególnych tokenach *) | |
45 | -and token_record = { | |
46 | - orth: string; (* sekwencja znaków pierwotnego tekstu składająca się na token *) | |
47 | - corr_orth: string; (* sekwencja znaków pierwotnego tekstu składająca się na token z poprawionymi błędami *) | |
48 | - beg: int; (* pozycja początkowa tokenu względem początku akapitu *) | |
49 | - len: int; (* długość tokenu *) | |
50 | - next: int; (* pozycja początkowa następnego tokenu względem początku akapitu *) | |
51 | - token: token; (* treść tokenu *) | |
52 | - attrs: string list; (* dodatkowe atrybuty *) | |
53 | - } | |
54 | - | |
55 | -(* Tokeny umieszczone są w strukturze danych umożliwiającej efektywne wyszukiwanie ich sekwencji, | |
56 | - struktura danych sama z siebie nie wnosi informacji *) | |
57 | -type tokens = | |
58 | - | Token of token_record | |
59 | - | Variant of tokens list | |
60 | - | Seq of tokens list | |
61 | - | |
62 | -type pat = L | CL | D of string | C of string | S of string | RD of string | O of string | |
63 | - | |
64 | -let empty_token = { | |
65 | - orth="";corr_orth="";beg=0;len=0;next=0; token=Symbol ""; attrs=[]} | |
66 | - | |
67 | -let config = | |
68 | - try File.load_attr_val_pairs "config-tokenizer" | |
69 | - with _ -> (print_endline "ENIAMtokenizer config file not found"; []) | |
70 | - | |
71 | -let mte_filename = | |
72 | - try Xlist.assoc config "MTE_FILENAME" | |
73 | - with Not_found -> (print_endline "ENIAMtokenizer MTE_FILENAME config variable undefined"; "") |
tokenizer/eniam-tokenizer-1.0/ENIAMtokens.ml deleted
1 | -(* | |
2 | - * ENIAMtokenizer, a tokenizer for Polish | |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | - * | |
6 | - * This library is free software: you can redistribute it and/or modify | |
7 | - * it under the terms of the GNU Lesser General Public License as published by | |
8 | - * the Free Software Foundation, either version 3 of the License, or | |
9 | - * (at your option) any later version. | |
10 | - * | |
11 | - * This library is distributed in the hope that it will be useful, | |
12 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | - * GNU General Public License for more details. | |
15 | - * | |
16 | - * You should have received a copy of the GNU Lesser General Public License | |
17 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | - *) | |
19 | - | |
20 | -open Printf | |
21 | -open ENIAMtokenizerTypes | |
22 | -open Xstd | |
23 | -open Xunicode | |
24 | - | |
25 | -let string_of_interps interps = | |
26 | - String.concat "|" (Xlist.map interps (fun interp -> | |
27 | - (String.concat ":" (Xlist.map interp (fun interp2 -> | |
28 | - (String.concat "." interp2)))))) | |
29 | - | |
30 | -let rec string_of_token = function | |
31 | - SmallLetter orth -> sprintf "SmallLetter(%s)" orth | |
32 | - | CapLetter(orth,lc) -> sprintf "CapLetter(%s,%s)" orth lc | |
33 | - | AllSmall orth -> sprintf "AllSmall(%s)" orth | |
34 | - | AllCap(orth,lc,lc2) -> sprintf "AllCap(%s,%s,%s)" orth lc lc2 | |
35 | - | FirstCap(orth,lc,cl,ll) -> sprintf "FirstCap(%s,%s,%s,%s)" orth lc cl ll | |
36 | - | SomeCap orth -> sprintf "SomeCap(%s)" orth | |
37 | - | RomanDig(v,t) -> sprintf "RomanDig(%s,%s)" v t | |
38 | - | Interp orth -> sprintf "Interp(%s)" orth | |
39 | - | Symbol orth -> sprintf "Symbol(%s)" orth | |
40 | - | Dig(v,t) -> sprintf "Dig(%s,%s)" v t | |
41 | - | Other orth -> sprintf "Other(%s)" orth | |
42 | - | Lemma(lemma,cat,interps) -> sprintf "Lemma(%s,%s,%s)" lemma cat (string_of_interps interps) | |
43 | - | Proper(lemma,cat,interps,senses) -> sprintf "Proper(%s,%s,%s,%s)" lemma cat (string_of_interps interps) (String.concat "|" senses) | |
44 | -(* | Sense(lemma,cat,interps,senses) -> sprintf "Sense(%s,%s,%s,%s)" lemma cat (string_of_interps interps) | |
45 | - (String.concat "|" (Xlist.map senses (fun (_,v,_) -> v)))*) | |
46 | - | Compound(sense,l) -> sprintf "Compound(%s,[%s])" sense (String.concat ";" (Xlist.map l string_of_token)) | |
47 | - (* | Tokens(cat,l) -> sprintf "Tokens(%s,%s)" cat (String.concat ";" (Xlist.map l string_of_int)) *) | |
48 | - | |
49 | -let rec spaces i = | |
50 | - if i = 0 then "" else " " ^ spaces (i-1) | |
51 | - | |
52 | -let rec string_of_tokens i = function | |
53 | - Token t -> sprintf "%s{orth=%s;beg=%d;len=%d;next=%d;token=%s;attrs=[%s]}" (spaces i) t.orth t.beg t.len t.next (string_of_token t.token) | |
54 | - (String.concat ";" t.attrs) | |
55 | - | Variant l -> sprintf "%sVariant[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1)))) | |
56 | - | Seq l -> sprintf "%sSeq[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1)))) | |
57 | - | |
58 | -let rec string_of_token_simple = function | |
59 | - SmallLetter orth -> "SmallLetter" | |
60 | - | CapLetter(orth,lc) -> "CapLetter" | |
61 | - | AllSmall orth -> "AllSmall" | |
62 | - | AllCap(orth,lc,lc2) -> "AllCap" | |
63 | - | FirstCap(orth,lc,_,_) -> "FirstCap" | |
64 | - | SomeCap orth -> "SomeCap" | |
65 | - | RomanDig(v,t) -> "RomanDig" | |
66 | - | Interp orth -> sprintf "Interp(%s)" orth | |
67 | - | Symbol orth -> sprintf "Symbol(%s)" orth | |
68 | - | Dig(v,t) -> "Dig" | |
69 | - | Other orth -> sprintf "Other(%s)" orth | |
70 | - | Lemma(lemma,cat,interp) -> "Lemma" | |
71 | - | Proper(lemma,cat,interp,sense) -> "Proper" | |
72 | -(* | Sense(lemma,cat,interp,sense) -> "Sense" *) | |
73 | - | Compound(sense,l) -> sprintf "Compound" | |
74 | - (* | Tokens _ -> sprintf "Tokens" *) | |
75 | - | |
76 | -let rec string_of_tokens_simple = function | |
77 | - Token t -> string_of_token_simple t.token | |
78 | - | Variant l -> sprintf "Variant[%s]" (String.concat ";" (Xlist.map l string_of_tokens_simple)) | |
79 | - | Seq l -> sprintf "Seq[%s]" (String.concat ";" (Xlist.map l string_of_tokens_simple)) | |
80 | - | |
81 | -let get_orth = function | |
82 | - SmallLetter orth -> orth | |
83 | - | CapLetter(orth,lc) -> orth | |
84 | - | AllSmall orth -> orth | |
85 | - | AllCap(orth,lc,lc2) -> orth | |
86 | - | FirstCap(orth,lc,_,_) -> orth | |
87 | - | SomeCap orth -> orth | |
88 | - | Symbol orth -> orth | |
89 | - | Dig(v,_) -> v | |
90 | - | Other orth -> orth | |
91 | - | _ -> ""(*failwith "get_orth"*) | |
92 | - | |
93 | - | |
94 | -let months = StringSet.of_list ["1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; "01"; "02"; "03"; "04"; "05"; "06"; "07"; "08"; "09"; "10"; "11"; "12"] | |
95 | -let hours = StringSet.of_list ["0"; "1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; "00"; "01"; "02"; "03"; "04"; "05"; "06"; "07"; "08"; "09"; | |
96 | - "10"; "11"; "12"; "13"; "14"; "15"; "16"; "17"; "18"; "19"; "20"; "21"; "22"; "23"; "24"] | |
97 | -let days = StringSet.of_list ["1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; "01"; "02"; "03"; "04"; "05"; "06"; "07"; "08"; "09"; | |
98 | - "10"; "11"; "12"; "13"; "14"; "15"; "16"; "17"; "18"; "19"; "20"; "21"; "22"; "23"; "24"; "25"; "26"; "27"; "28"; "29"; "30"; "31"] | |
99 | -let romanmonths = StringSet.of_list ["I"; "II"; "III"; "IV"; "V"; "VI"; "VII"; "VIII"; "IX"; "X"; "XI"; "XII"] | |
100 | - | |
101 | - | |
102 | -let s_beg i = Token{empty_token with beg=i;len=1;next=i+1; token=Interp "<sentence>"} | |
103 | -let c_beg i = Token{empty_token with beg=i;len=1;next=i+1; token=Interp "<clause>"} | |
104 | - | |
105 | -let dig_token orth i digs token = | |
106 | - Token{empty_token with orth=orth;beg=i;len=Xlist.size digs * factor;next=i+Xlist.size digs * factor; token=token; attrs=["maybe cs"]} | |
107 | - | |
108 | -let sc_dig_token orth i digs token = | |
109 | - Seq[s_beg i;c_beg (i+1);Token{empty_token with orth=orth;beg=i+2;len=Xlist.size digs * factor - 2;next=i+Xlist.size digs * factor; token=token; attrs=["maybe cs"]}] | |
110 | - | |
111 | -let dig_tokens orth poss_s_beg i digs v cat = | |
112 | - if poss_s_beg then | |
113 | - [dig_token orth i digs (Dig(v,cat)); | |
114 | - sc_dig_token orth i digs (Dig(v,cat))] | |
115 | - else | |
116 | - [dig_token orth i digs (Dig(v,cat))] | |
117 | - | |
118 | -let merge_digits poss_s_beg i digs = | |
119 | - let orth = String.concat "" digs in | |
120 | - let t = dig_tokens orth poss_s_beg i digs in | |
121 | - let v = try string_of_int (int_of_string orth) with _ -> failwith "merge_digits" in | |
122 | - let variants = | |
123 | - (t orth "dig") @ | |
124 | - [Token{empty_token with orth=orth;beg=i;len=Xlist.size digs * factor;next=i+Xlist.size digs * factor; token=Proper(orth,"obj-id",[[]],["obj-id"]); attrs=["maybe cs"]}] @ | |
125 | - (if digs = ["0"] || List.hd digs <> "0" then (t orth "intnum")(* @ (t orth "realnum")*) else []) @ | |
126 | - (if List.hd digs <> "0" then (t v "year") else []) @ | |
127 | - (if StringSet.mem months orth then (t v "month") else []) @ | |
128 | - (if StringSet.mem hours orth then (t v "hour") else []) @ | |
129 | - (if StringSet.mem days orth then (t v "day") else []) @ | |
130 | - (if Xlist.size digs = 2 && List.hd digs < "6" then (t v "minute") else []) @ | |
131 | - (if Xlist.size digs = 3 then (t orth "3dig") else []) @ | |
132 | - (if Xlist.size digs = 2 then (t orth "2dig") else []) @ | |
133 | - (if Xlist.size digs <= 3 && List.hd digs <> "0" then (t orth "pref3dig") else []) in | |
134 | -(* let t = dig_token orth i digs in | |
135 | - let sc_t = sc_dig_token orth i digs in | |
136 | - let v = try int_of_string orth with _ -> failwith "merge_digits" in | |
137 | - let variants = | |
138 | - [t (Dig(v,"dig"));sc_t (Dig(v,"dig"))] @ | |
139 | - (if digs = ["0"] || List.hd digs <> "0" then [t (Dig(v,"intnum"));sc_t (Dig(v,"intnum"))] else []) @ | |
140 | - (if List.hd digs <> "0" then [t (Dig(v,"year"));sc_t (Dig(v,"year"))] else []) @ | |
141 | - (if StringSet.mem months orth then [t (Dig(v,"month"));sc_t (Dig(v,"month"))] else []) @ | |
142 | - (if StringSet.mem hours orth then [t (Dig(v,"hour"));sc_t (Dig(v,"hour"))] else []) @ | |
143 | - (if StringSet.mem days orth then [t (Dig(v,"day"));sc_t (Dig(v,"day"))] else []) @ | |
144 | - (if Xlist.size digs = 2 && List.hd digs < "6" then [t (Dig(v,"minute"));sc_t (Dig(v,"minute"))] else []) @ | |
145 | - (if Xlist.size digs = 3 then [t (Dig(v,"3dig"));sc_t (Dig(v,"3dig"))] else []) @ | |
146 | - (if Xlist.size digs <= 3 && List.hd digs <> "0" then [t (Dig(v,"pref3dig"));sc_t (Dig(v,"pref3dig"))] else []) in*) | |
147 | - Variant variants | |
148 | - | |
149 | -let recognize_roman_I v = function | |
150 | - Capital("I",_) :: Capital("I",_) :: Capital("I",_) :: [] -> v+3,false | |
151 | - | Capital("I",_) :: Capital("I",_) :: [] -> v+2,false | |
152 | - | Capital("I",_) :: [] -> v+1,false | |
153 | - | [] -> v,false | |
154 | - | Capital("I",_) :: Capital("I",_) :: Capital("I",_) :: Small("w") :: [] -> v+3,true | |
155 | - | Capital("I",_) :: Capital("I",_) :: Small("w") :: [] -> v+2,true | |
156 | - | Capital("I",_) :: Small("w") :: [] -> v+1,true | |
157 | - | Small("w") :: [] -> v,true | |
158 | - | _ -> 0,false | |
159 | - | |
160 | -let recognize_roman_V v = function | |
161 | - Capital("I",_) :: ForeignCapital("V",_) :: [] -> v+4,false | |
162 | - | ForeignCapital("V",_) :: l -> recognize_roman_I (v+5) l | |
163 | - | Capital("I",_) :: ForeignCapital("X",_) :: [] -> v+9,false | |
164 | - | Capital("I",_) :: ForeignCapital("V",_) :: Small("w") :: [] -> v+4,true | |
165 | - | Capital("I",_) :: ForeignCapital("X",_) :: Small("w") :: [] -> v+9,true | |
166 | - | l -> recognize_roman_I v l | |
167 | - | |
168 | -let recognize_roman_X v = function | |
169 | - | ForeignCapital("X",_) :: ForeignCapital("X",_) :: ForeignCapital("X",_) :: l -> recognize_roman_V (v+30) l | |
170 | - | ForeignCapital("X",_) :: ForeignCapital("X",_) :: l -> recognize_roman_V (v+20) l | |
171 | - | ForeignCapital("X",_) :: l -> recognize_roman_V (v+10) l | |
172 | - | l -> recognize_roman_V v l | |
173 | - | |
174 | -let recognize_roman_L v = function | |
175 | - ForeignCapital("X",_) :: Capital("L",_) :: l -> recognize_roman_V (v+40) l | |
176 | - | Capital("L",_) :: l -> recognize_roman_X (v+50) l | |
177 | - | ForeignCapital("X",_) :: Capital("C",_) :: l -> recognize_roman_V (v+90) l | |
178 | - | l -> recognize_roman_X v l | |
179 | - | |
180 | -let recognize_roman_C v = function | |
181 | - | Capital("C",_) :: Capital("C",_) :: Capital("C",_) :: l -> recognize_roman_L (v+300) l | |
182 | - | Capital("C",_) :: Capital("C",_) :: l -> recognize_roman_L (v+200) l | |
183 | - | Capital("C",_) :: l -> recognize_roman_L (v+100) l | |
184 | - | l -> recognize_roman_L v l | |
185 | - | |
186 | -let recognize_roman_D v = function | |
187 | - Capital("C",_) :: Capital("D",_) :: l -> recognize_roman_L (v+400) l | |
188 | - | Capital("D",_) :: l -> recognize_roman_C (v+500) l | |
189 | - | Capital("C",_) :: Capital("M",_) :: l -> recognize_roman_L (v+900) l | |
190 | - | l -> recognize_roman_C v l | |
191 | - | |
192 | -let recognize_roman_M v = function | |
193 | - | Capital("M",_) :: Capital("M",_) :: Capital("M",_) :: l -> recognize_roman_D (v+3000) l | |
194 | - | Capital("M",_) :: Capital("M",_) :: l -> recognize_roman_D (v+2000) l | |
195 | - | Capital("M",_) :: l -> recognize_roman_D (v+1000) l | |
196 | - | l -> recognize_roman_D v l | |
197 | - | |
198 | -let rec merge l = | |
199 | - String.concat "" (Xlist.map l (function | |
200 | - Capital(s,t) -> s | |
201 | - | ForeignCapital(s,t) -> s | |
202 | - | Small s -> s | |
203 | - | ForeignSmall s -> s | |
204 | - | _ -> failwith "merge")) | |
205 | - | |
206 | -let lowercase_first = function | |
207 | - [] -> [] | |
208 | - | Capital(s,t) :: l -> Small t :: l | |
209 | - | ForeignCapital(s,t) :: l -> ForeignSmall t :: l | |
210 | - | Small s :: l -> Small s :: l | |
211 | - | ForeignSmall s :: l -> ForeignSmall s :: l | |
212 | - | _ -> failwith "lowercase_first" | |
213 | - | |
214 | -let rec lowercase_all = function | |
215 | - [] -> [] | |
216 | - | Capital(s,t) :: l -> Small t :: lowercase_all l | |
217 | - | ForeignCapital(s,t) :: l -> ForeignSmall t :: lowercase_all l | |
218 | - | Small s :: l -> Small s :: lowercase_all l | |
219 | - | ForeignSmall s :: l -> ForeignSmall s :: lowercase_all l | |
220 | - | _ -> failwith "lowercase_all" | |
221 | - | |
222 | -let lowercase_rest = function | |
223 | - [] -> [] | |
224 | - | x :: l -> x :: lowercase_all l | |
225 | - | |
226 | -let first_capital = function | |
227 | - Capital _ :: _ -> true | |
228 | - | ForeignCapital _ :: _ -> true | |
229 | - | Small _ :: _ -> false | |
230 | - | ForeignSmall _ :: _ -> false | |
231 | - | _ -> failwith "first_capital" | |
232 | - | |
233 | -let rec all_capital = function | |
234 | - Capital _ :: l -> all_capital l | |
235 | - | ForeignCapital _ :: l -> all_capital l | |
236 | - | Small _ :: l -> false | |
237 | - | ForeignSmall _ :: l -> false | |
238 | - | [] -> true | |
239 | - | _ -> failwith "first_capital" | |
240 | - | |
241 | -let rec all_small = function | |
242 | - Capital _ :: l -> false | |
243 | - | ForeignCapital _ :: l -> false | |
244 | - | Small _ :: l -> all_small l | |
245 | - | ForeignSmall _ :: l -> all_small l | |
246 | - | [] -> true | |
247 | - | _ -> failwith "first_capital" | |
248 | - | |
249 | -let rest_capital = function | |
250 | - [] -> failwith "rest_capital" | |
251 | - | _ :: l -> all_capital l | |
252 | - | |
253 | -let rest_small = function | |
254 | - [] -> failwith "rest_small" | |
255 | - | _ :: l -> all_small l | |
256 | - | |
257 | -let get_first_cap = function | |
258 | - | Capital(s,t) :: l -> s | |
259 | - | ForeignCapital(s,t) :: l -> s | |
260 | - | _ -> failwith "get_first_cap" | |
261 | - | |
262 | -let get_first_lower = function | |
263 | - | Capital(s,t) :: l -> t | |
264 | - | ForeignCapital(s,t) :: l -> t | |
265 | - | _ -> failwith "get_first_lower" | |
266 | - | |
267 | -(*let cs_weight = -1. | |
268 | -let sc_cap_weight = -0.3*) | |
269 | - | |
270 | -let is_add_attr_token = function | |
271 | - SmallLetter _ -> true | |
272 | - | CapLetter _ -> true | |
273 | - | AllSmall _ -> true | |
274 | - | AllCap _ -> true | |
275 | - | FirstCap _ -> true | |
276 | - | SomeCap _ -> true | |
277 | - | _ -> false | |
278 | - | |
279 | -let rec add_attr s = function | |
280 | - Token t -> if is_add_attr_token t.token then Token{t with attrs=s :: t.attrs} else Token t | |
281 | - | Variant l -> Variant(Xlist.map l (add_attr s)) | |
282 | - | Seq l -> Seq(Xlist.map l (add_attr s)) | |
283 | - | |
284 | -let recognize_stem poss_s_beg has_sufix i letters = | |
285 | - let orth = merge letters in | |
286 | - let t = {empty_token with orth=orth;beg=i;len=Xlist.size letters * factor;next=i+Xlist.size letters * factor} in | |
287 | - let t = if poss_s_beg then | |
288 | - if Xlist.size letters = 1 then | |
289 | - if first_capital letters then Variant[ | |
290 | - Token{t with token=SmallLetter(merge (lowercase_first letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs}; | |
291 | - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=SmallLetter(merge (lowercase_first letters)); attrs="maybe cs" :: t.attrs}]; | |
292 | - Token{t with token=CapLetter(orth,merge (lowercase_first letters)); attrs="maybe cs" :: t.attrs}; | |
293 | - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=CapLetter(orth,merge (lowercase_first letters)); (*weight=sc_cap_weight;*) attrs="maybe cs" :: t.attrs}]] | |
294 | - else Token{t with token=SmallLetter orth} | |
295 | - else | |
296 | - if first_capital letters then | |
297 | - if rest_small letters then Variant[ | |
298 | - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=AllSmall(merge (lowercase_first letters))}]; | |
299 | - Token{t with token=FirstCap(orth,merge (lowercase_first letters),get_first_cap letters,get_first_lower letters)}; | |
300 | - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=FirstCap(orth,merge (lowercase_first letters),get_first_cap letters,get_first_lower letters); (*weight=sc_cap_weight*)}]] | |
301 | - else if rest_capital letters then Variant([ | |
302 | - Token{t with token=AllSmall(merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs}; | |
303 | - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=AllSmall(merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs}]; | |
304 | - Token{t with token=FirstCap(merge (lowercase_rest letters),merge (lowercase_all letters),get_first_cap letters,get_first_lower letters); (*weight=cs_weight;*) attrs="cs" :: t.attrs}; | |
305 | - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=FirstCap(merge (lowercase_rest letters),merge (lowercase_all letters),get_first_cap letters,get_first_lower letters); (*weight=cs_weight+.sc_cap_weight;*) attrs="cs" :: t.attrs}]] @ | |
306 | - (if has_sufix then [] else [ | |
307 | - Token{t with token=AllCap(orth,merge (lowercase_rest letters),merge (lowercase_all letters)); attrs="maybe cs" :: t.attrs}; | |
308 | - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=AllCap(orth,merge (lowercase_rest letters),merge (lowercase_all letters)); attrs="maybe cs" :: t.attrs}]])) | |
309 | - else Token{t with token=SomeCap orth} | |
310 | - else | |
311 | - if rest_small letters then Token{t with token=AllSmall orth} | |
312 | - else Token{t with token=SomeCap orth} | |
313 | - else | |
314 | - if Xlist.size letters = 1 then | |
315 | - if first_capital letters then Variant[ | |
316 | - Token{t with token=SmallLetter orth; (*weight=cs_weight;*) attrs="cs" :: t.attrs}; | |
317 | - Token{t with token=CapLetter(orth,merge (lowercase_first letters)); attrs="maybe cs" :: t.attrs}] | |
318 | - else Token{t with token=SmallLetter orth} | |
319 | - else | |
320 | - if first_capital letters then | |
321 | - if rest_small letters then | |
322 | - Token{t with token=FirstCap(orth,merge (lowercase_first letters),get_first_cap letters,get_first_lower letters)} | |
323 | - else if rest_capital letters then Variant([ | |
324 | - Token{t with token=AllSmall(merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs}; | |
325 | - Token{t with token=FirstCap(merge (lowercase_rest letters),merge (lowercase_all letters),get_first_cap letters,get_first_lower letters); (*weight=cs_weight;*) attrs="cs" :: t.attrs}] @ | |
326 | - (if has_sufix then [] else [ | |
327 | - Token{t with token=AllCap(orth,merge (lowercase_rest letters),merge (lowercase_all letters)); attrs="maybe cs" :: t.attrs}])) | |
328 | - else Token{t with token=SomeCap orth} | |
329 | - else | |
330 | - if rest_small letters then Token{t with token=AllSmall orth} | |
331 | - else Token{t with token=SomeCap orth} in | |
332 | - if has_sufix then add_attr "required validated lemmatization" t else t | |
333 | - | |
334 | -let parse_postags s = | |
335 | - List.map (fun s -> | |
336 | - match List.map (fun t -> Str.split (Str.regexp "\\.") t) (Str.split (Str.regexp ":") s) with | |
337 | - [pos] :: tags -> pos, tags | |
338 | - | _ -> failwith ("parse_postags: " ^ s)) (Str.split (Str.regexp "|") s) | |
339 | - | |
340 | -let make_lemma (lemma,interp) = | |
341 | - match parse_postags interp with | |
342 | - [pos,tags] -> Lemma(lemma,pos,[tags]) | |
343 | - | _ -> failwith "make_lemma" | |
344 | - | |
345 | -let merge_attrs l = | |
346 | -(* print_endline (String.concat " " (Xlist.map l (fun token -> "[" ^ token.orth ^ " " ^ String.concat ";" token.attrs ^ "]"))); *) | |
347 | - let len = Xlist.size l in | |
348 | - let attrs = Xlist.fold l StringQMap.empty (fun attrs token -> | |
349 | - Xlist.fold token.attrs attrs StringQMap.add) in | |
350 | - let n_cs = try StringQMap.find attrs "cs" with Not_found -> 0 in | |
351 | - let n_maybe_cs = try StringQMap.find attrs "maybe cs" with Not_found -> 0 in | |
352 | - let new_attrs = | |
353 | - (if n_cs > 0 then | |
354 | - if n_cs + n_maybe_cs = len then ["cs"] else raise Not_found | |
355 | - else | |
356 | - if n_maybe_cs = len then ["maybe cs"] else []) @ | |
357 | - (StringQMap.fold attrs [] (fun attrs attr _ -> if attr = "cs" || attr = "maybe cs" then attrs else attr :: attrs)) in | |
358 | -(* print_endline (String.concat " " new_attrs); *) | |
359 | - new_attrs | |
360 | - | |
361 | -let suffix_lemmata = Xlist.fold [ | |
362 | - "em",make_lemma ("być","aglt:sg:pri:imperf:wok"); | |
363 | - "eś",make_lemma ("być","aglt:sg:sec:imperf:wok"); | |
364 | - "eście",make_lemma ("być","aglt:pl:sec:imperf:wok"); | |
365 | - "eśmy",make_lemma ("być","aglt:pl:pri:imperf:wok"); | |
366 | - "m",make_lemma ("być","aglt:sg:pri:imperf:nwok"); | |
367 | - "ś",make_lemma ("być","aglt:sg:sec:imperf:nwok"); | |
368 | - "ście",make_lemma ("być","aglt:pl:sec:imperf:nwok"); | |
369 | - "śmy",make_lemma ("być","aglt:pl:pri:imperf:nwok"); | |
370 | - "by",make_lemma ("by","qub"); | |
371 | - ] StringMap.empty (fun map (suf,lemma) -> StringMap.add map suf lemma) | |
372 | - | |
373 | -let recognize_suffix i letters = | |
374 | - let orth = merge letters in | |
375 | - let t = {empty_token with orth=orth;beg=i;len=Xlist.size letters * factor;next=i+Xlist.size letters * factor} in | |
376 | - if all_capital letters then Token{t with token=StringMap.find suffix_lemmata (merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs} | |
377 | - else if all_small letters then Token{t with token=StringMap.find suffix_lemmata orth} | |
378 | - else raise Not_found | |
379 | - | |
380 | -let recognize_romandig poss_s_beg i letters = | |
381 | - let roman,w = recognize_roman_M 0 letters in | |
382 | - if roman > 0 then | |
383 | - let letters,w = if w then let l = List.rev letters in List.rev (List.tl l), [List.hd l] else letters,[] in | |
384 | - let orth = merge letters in | |
385 | - let roman = string_of_int roman in | |
386 | - let t = {empty_token with orth=orth;beg=i;len=Xlist.size letters * factor;next=i+Xlist.size letters * factor} in | |
387 | - let w = if w = [] then [] else | |
388 | - let beg = i + Xlist.size letters * factor in | |
389 | - [Variant[Token{empty_token with orth=merge w; beg=beg; len=factor; next=beg+factor; token=SmallLetter(merge w)}; | |
390 | - Token{empty_token with orth=merge w; beg=beg; len=factor; next=beg+factor; token=make_lemma ("wiek","subst:sg:_:m3")}]] in | |
391 | - if StringSet.mem romanmonths orth then [ | |
392 | - Seq(Token{t with token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}::w); | |
393 | - Seq(Token{t with token=RomanDig(roman,"month"); attrs="maybe cs" :: t.attrs}::w)] @ | |
394 | - (if poss_s_beg then [ | |
395 | - Seq([s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}]@w); | |
396 | - Seq([s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=RomanDig(roman,"month"); attrs="maybe cs" :: t.attrs}]@w); | |
397 | - ] else []) | |
398 | - else [ | |
399 | - Seq(Token{t with token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}::w)] @ | |
400 | - (if poss_s_beg then [ | |
401 | - Seq([s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}]@w); | |
402 | - ] else []) | |
403 | - else [] | |
404 | - | |
405 | -let sufixes1 = Xlist.map [ | |
406 | - ["m"]; | |
407 | - ["e"; "m"]; | |
408 | - ["ś"]; | |
409 | - ["e"; "ś"]; | |
410 | - ["ś"; "m"; "y"]; | |
411 | - ["e"; "ś"; "m"; "y"]; | |
412 | - ["ś"; "c"; "i"; "e"]; | |
413 | - ["e"; "ś"; "c"; "i"; "e"]; | |
414 | - ] List.rev | |
415 | - | |
416 | -let sufixes2 = Xlist.map [ | |
417 | - ["b"; "y"]; | |
418 | - ] List.rev | |
419 | - | |
420 | -let rec find_suffix rev = function | |
421 | - _, [] -> raise Not_found | |
422 | - | [], l -> rev, l | |
423 | - | s :: pat, Capital(c,t) :: l -> if s = t then find_suffix (Capital(c,t) :: rev) (pat,l) else raise Not_found | |
424 | - | s :: pat, Small t :: l -> if s = t then find_suffix (Small t :: rev) (pat,l) else raise Not_found | |
425 | - | _,_ -> raise Not_found | |
426 | - | |
427 | -let find_suffixes2 sufixes letters sufs = | |
428 | - Xlist.fold sufixes [] (fun l suf -> | |
429 | - try | |
430 | - let suf,rev_stem = find_suffix [] (suf,letters) in | |
431 | - (rev_stem,suf :: sufs) :: l | |
432 | - with Not_found -> l) | |
433 | - | |
434 | -let find_suffixes i letters = | |
435 | - let letters = List.rev letters in | |
436 | - let l = (letters,[]) :: find_suffixes2 sufixes1 letters [] in | |
437 | - let l = Xlist.fold l l (fun l (letters,sufs) -> | |
438 | - (find_suffixes2 sufixes2 letters sufs) @ l) in | |
439 | - Xlist.map l (fun (rev_stem, sufs) -> | |
440 | - List.rev (fst (Xlist.fold (List.rev rev_stem :: sufs) ([],i) (fun (seq,i) letters -> | |
441 | - (letters,i) :: seq, i + factor * Xlist.size letters)))) | |
442 | - | |
443 | -let merge_letters poss_s_beg i letters = | |
444 | - let l = find_suffixes i letters in | |
445 | - let roman = recognize_romandig poss_s_beg i letters in | |
446 | - let variants = Xlist.fold l roman (fun variants -> function | |
447 | - [] -> failwith "merge_letters" | |
448 | - | [stem,i] -> (recognize_stem poss_s_beg false i stem) :: variants | |
449 | - | (stem,i) :: suffixes -> | |
450 | - (try (Seq((recognize_stem poss_s_beg true i stem) :: Xlist.map suffixes (fun (suf,i) -> recognize_suffix i suf))) :: variants | |
451 | - with Not_found -> variants)) in | |
452 | - Variant variants | |
453 | - | |
454 | -let rec group_digits rev = function | |
455 | - [] -> List.rev rev, [] | |
456 | - | Digit s :: l -> group_digits (s :: rev) l | |
457 | - | x :: l -> List.rev rev, x :: l | |
458 | - | |
459 | -let rec group_letters rev = function | |
460 | - [] -> List.rev rev, [] | |
461 | - | Capital(s,t) :: l -> group_letters ((Capital(s,t)) :: rev) l | |
462 | - | ForeignCapital(s,t) :: l -> group_letters ((ForeignCapital(s,t)) :: rev) l | |
463 | - | Small s :: l -> group_letters ((Small s) :: rev) l | |
464 | - | ForeignSmall s :: l -> group_letters ((ForeignSmall s) :: rev) l | |
465 | - | x :: l -> List.rev rev, x :: l | |
466 | - | |
467 | -let rec group_others rev = function | |
468 | - [] -> List.rev rev, [] | |
469 | - | Other(s,_) :: l -> group_others (s :: rev) l | |
470 | - | x :: l -> List.rev rev, x :: l | |
471 | - | |
472 | -let create_sign_token poss_s_beg i signs l token = | |
473 | - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in | |
474 | - let len = Xlist.size signs * factor in | |
475 | - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=token; attrs=["maybe cs"]},i+len,l,poss_s_beg | |
476 | - | |
477 | -let create_empty_sign_token i signs = | |
478 | - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in | |
479 | - let len = Xlist.size signs * factor in | |
480 | - {empty_token with orth=orth;beg=i;len=len;next=i+len; attrs=["maybe cs"]},i+len | |
481 | - | |
482 | -let create_sentence_seq i signs l lemma = | |
483 | - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in | |
484 | - let len = Xlist.size signs * factor in | |
485 | - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"}; | |
486 | - Token{empty_token with orth=orth;beg=i+20;len=len-30;next=i+len-10;token=make_lemma (lemma,"sinterj")}; | |
487 | - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}] | |
488 | - | |
489 | -let create_sentence_seq_hapl i signs l lemma = | |
490 | - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in | |
491 | - let len = Xlist.size signs * factor in | |
492 | - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]}; | |
493 | - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</clause>"}; | |
494 | - Token{empty_token with orth=orth;beg=i+20;len=len-30;next=i+len-10;token=make_lemma (lemma,"sinterj")}; | |
495 | - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}] | |
496 | - | |
497 | -let create_sentence_seq_q i signs l lemma = | |
498 | - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in | |
499 | - let len = Xlist.size signs * factor in | |
500 | - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "?"}; | |
501 | - Token{empty_token with beg=i+20;len=10;next=i+30;token=Interp "</clause>"}; | |
502 | - Token{empty_token with orth=orth;beg=i+30;len=len-40;next=i+len-10;token=make_lemma (lemma,"sinterj")}; | |
503 | - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}] | |
504 | - | |
505 | -let create_sentence_seq_hapl_q i signs l lemma = | |
506 | - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in | |
507 | - let len = Xlist.size signs * factor in | |
508 | - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]}; | |
509 | - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "?"}; | |
510 | - Token{empty_token with beg=i+20;len=10;next=i+30;token=Interp "</clause>"}; | |
511 | - Token{empty_token with orth=orth;beg=i+30;len=len-40;next=i+len-10;token=make_lemma (lemma,"sinterj")}; | |
512 | - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}] | |
513 | - | |
514 | -let create_or_beg i signs l poss_s_beg = | |
515 | - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in | |
516 | - let len = Xlist.size signs * factor in | |
517 | - Variant[ | |
518 | - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Symbol "-"; attrs=["maybe cs"]}; | |
519 | - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Interp "-"; attrs=["maybe cs"]}; (* hyphen *) | |
520 | - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Interp "<or>"}; | |
521 | - (* Seq[Token{empty_token with beg=i; len=20; next=i+20; token=Interp "<sentence>"}; | |
522 | - Token{empty_token with orth=orth;beg=i+20; len=len-20;next=i+len; token=Interp "<or>"}]; *) | |
523 | - Seq[Token{empty_token with beg=i; len=21; next=i+21; token=Interp "</clause>"}; | |
524 | - Token{empty_token with beg=i+21; len=20; next=i+41; token=Interp "</sentence>"}; | |
525 | - Token{empty_token with orth=orth;beg=i+41; len=len-59;next=i+len-20;token=Interp "</or>"}; | |
526 | - Token{empty_token with beg=i+len-20;len=20; next=i+len; token=Interp "<clause>"}]; | |
527 | - Seq[Token{empty_token with orth=orth;beg=i; len=len-22;next=i+len-22;token=Interp "</or>"}; | |
528 | - Token{empty_token with beg=i+len-22;len=22; next=i+len; token=Interp "<clause>"}]; | |
529 | - ],i+len,l,poss_s_beg | |
530 | - | |
531 | -let create_or_beg2 i signs l poss_s_beg = | |
532 | - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in | |
533 | - let len = Xlist.size signs * factor in | |
534 | - Variant[ | |
535 | - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Interp "<or>"}; | |
536 | - (* Seq[Token{empty_token with beg=i; len=20; next=i+20; token=Interp "<sentence>"}; | |
537 | - Token{empty_token with orth=orth;beg=i+20; len=len-20;next=i+len; token=Interp "<or>"}]; *) | |
538 | - Seq[Token{empty_token with beg=i; len=21; next=i+21; token=Interp "</clause>"}; | |
539 | - Token{empty_token with beg=i+21; len=20; next=i+41; token=Interp "</sentence>"}; | |
540 | - Token{empty_token with orth=orth;beg=i+41; len=len-59;next=i+len-20;token=Interp "</or>"}; | |
541 | - Token{empty_token with beg=i+len-20;len=20; next=i+len; token=Interp "<clause>"}]; | |
542 | - Seq[Token{empty_token with orth=orth;beg=i; len=len-22;next=i+len-22;token=Interp "</or>"}; | |
543 | - Token{empty_token with beg=i+len-22;len=22; next=i+len; token=Interp "<clause>"}]; | |
544 | - ],i+len,l,poss_s_beg | |
545 | - | |
546 | -let is_dot_sentence_end_marker = function | |
547 | - [] -> true | |
548 | - | [Sign " "] -> true | |
549 | - | [Sign ""] -> true | |
550 | - | [Sign " "] -> true | |
551 | - | [Sign "\""] -> true | |
552 | - | [Sign "»"] -> true | |
553 | - | [Sign "”"] -> true | |
554 | - | _ -> false | |
555 | - | |
556 | -let not_dot_sentence_end_marker = function | |
557 | - Sign " " :: Small _ :: _ -> true | |
558 | - | Sign "" :: Small _ :: _ -> true | |
559 | - | Sign " " :: Small _ :: _ -> true | |
560 | - | Sign "," :: _ -> true | |
561 | - | Sign ":" :: _ -> true | |
562 | - | Sign "?" :: _ -> true | |
563 | - | Sign "!" :: _ -> true | |
564 | - | Small _ :: _ -> true | |
565 | - | ForeignSmall _ :: _ -> true | |
566 | - | Capital _ :: _ -> true | |
567 | - | ForeignCapital _ :: _ -> true | |
568 | - | Digit _ :: _ -> true | |
569 | - | _ -> false | |
570 | - | |
571 | -let is_comma_digit_marker = function | |
572 | - Digit _ :: l -> true | |
573 | - | _ -> false | |
574 | - | |
575 | -let is_colon_sentence_end_marker = function | |
576 | - [] -> true | |
577 | - | [Sign " "] -> true | |
578 | - | [Sign ""] -> true | |
579 | - | [Sign " "] -> true | |
580 | - | _ -> false | |
581 | - | |
582 | -let is_colon_symbol = function | |
583 | - Digit _ :: _ -> true | |
584 | - | Sign "/" :: _ -> true | |
585 | - | _ -> false | |
586 | - | |
587 | -let is_multidot_sentence_end_marker = function | |
588 | - [] -> true | |
589 | - | [Sign " "] -> true | |
590 | - | [Sign ""] -> true | |
591 | - | [Sign " "] -> true | |
592 | - | [Sign "\""] -> true | |
593 | - | [Sign "»"] -> true | |
594 | - | [Sign "”"] -> true | |
595 | -(* | "\"" :: l -> true | |
596 | - | "»" :: l -> true | |
597 | - | "”" :: l -> true | |
598 | - | "“" :: l -> true | |
599 | - | " " :: "-" :: l -> true | |
600 | - | " " :: "–" :: l -> true | |
601 | - | " " :: "—" :: l -> true | |
602 | - | ")" :: l -> true | |
603 | - | "]" :: l -> true*) | |
604 | - | _ -> false | |
605 | - | |
606 | -let create_quot_digit_token i signs l = | |
607 | - let t,i2 = create_empty_sign_token i signs in | |
608 | - Variant[ | |
609 | - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"}; | |
610 | - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}; | |
611 | - Token{t with beg=t.beg+factor; next=t.next+factor;token=Interp "”s"}]; | |
612 | - Seq[Token{t with token=Interp "”"}; | |
613 | - Token{empty_token with beg=i2;len=20;next=i2+20;token=Interp "</clause>"}; | |
614 | - Token{empty_token with orth=".";beg=i2+20;len=factor-20;next=i2+factor;token=Interp "</sentence>"}]; | |
615 | - ],i2+factor,l,true | |
616 | - | |
617 | -let rec recognize_sign_group poss_s_beg i = function | |
618 | - | (Sign " ") :: l -> create_sign_token poss_s_beg i [Sign " "] l (Symbol " ") | |
619 | - | (Sign "") :: l -> create_sign_token poss_s_beg i [Sign ""] l (Symbol " ") | |
620 | - | (Sign " ") :: l -> create_sign_token poss_s_beg i [Sign " "] l (Symbol " ") | |
621 | - | (Sign "\"") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "\""] l | |
622 | - | (Sign "\"") :: l -> | |
623 | - let t,i = create_empty_sign_token i [Sign "\""] in | |
624 | - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"};Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg | |
625 | - | (Sign "˝") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "˝"] l | |
626 | - | (Sign "˝") :: l -> | |
627 | - let t,i = create_empty_sign_token i [Sign "˝"] in | |
628 | - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"};Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg | |
629 | - | (Sign "„") :: l -> | |
630 | - let t,i = create_empty_sign_token i [Sign "„"] in | |
631 | - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"}],i,l,poss_s_beg | |
632 | - | (Sign "”") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "”"] l | |
633 | - | (Sign "”") :: l -> | |
634 | - let t,i = create_empty_sign_token i [Sign "”"] in | |
635 | - Variant[Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg | |
636 | - | (Sign "“") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "“"] l | |
637 | - | (Sign "“") :: l -> | |
638 | - let t,i = create_empty_sign_token i [Sign "“"] in | |
639 | - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"};Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg | |
640 | - | (Sign ",") :: (Sign ",") :: l -> | |
641 | - let t,i = create_empty_sign_token i [Sign ",";Sign ","] in | |
642 | - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"}],i,l,poss_s_beg | |
643 | - | (Sign ",") :: l -> | |
644 | - let t,i2 = create_empty_sign_token i [Sign ","] in | |
645 | - if is_comma_digit_marker l then | |
646 | - Token{t with token=Symbol ","},i2,l,false | |
647 | - else | |
648 | - Variant[Token{t with token=Interp ","}; | |
649 | - Seq[Token{empty_token with orth=",";beg=i;len=factor/2;next=i+factor/2;token=Interp "</clause>"}; | |
650 | - Token{empty_token with beg=i+factor/2;len=factor-(factor/2);next=i+factor;token=Interp "<clause>"}]],i2,l,false | |
651 | - | (Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: []) l (make_lemma ("(…)","sinterj")) | |
652 | - | (Sign "(") :: (Sign "?") :: (Sign "!") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "?") :: (Sign "!") :: (Sign ")") :: []) l (make_lemma ("(?!)","sinterj")) | |
653 | - | (Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: []) l (make_lemma ("(…)","sinterj")) | |
654 | - | (Sign "(") :: (Sign "+") :: (Sign "+") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "+") :: (Sign "+") :: (Sign ")") :: []) l (make_lemma ("(++)","sinterj")) | |
655 | - | (Sign "(") :: (Sign "-") :: (Sign "-") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "-") :: (Sign "-") :: (Sign ")") :: []) l (make_lemma ("(--)","symbol")) | |
656 | - | (Sign "(") :: (Sign "…") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "…") :: (Sign ")") :: []) l (make_lemma ("(…)","sinterj")) | |
657 | - | (Sign "(") :: (Sign "?") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "?") :: (Sign ")") :: []) l (make_lemma ("(?)","sinterj")) | |
658 | - | (Sign "(") :: (Sign "+") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "+") :: (Sign ")") :: []) l (make_lemma ("(+)","symbol")) | |
659 | - | (Sign "(") :: (Sign "!") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "!") :: (Sign ")") :: []) l (make_lemma ("(!)","sinterj")) | |
660 | - | (Sign "(") :: (Sign "-") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "-") :: (Sign ")") :: []) l (make_lemma ("(-)","symbol")) | |
661 | - | (Sign "(") :: (Sign "*") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "*") :: (Sign ")") :: []) l (make_lemma ("(*)","symbol")) | |
662 | - | (Sign "(") :: l -> create_sign_token poss_s_beg i [Sign "("] l (Interp "(") | |
663 | - | (Sign ":") :: (Sign "(") :: (Sign "(") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "(") :: (Sign "(") :: (Sign "(") :: []) l (make_lemma (":(((","sinterj")) | |
664 | - | (Sign ":") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "(") :: []) l (make_lemma (":(","sinterj")) | |
665 | - | (Sign ":") :: (Sign "-") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign "(") :: []) l (make_lemma (":-(","sinterj")) | |
666 | - | (Sign ";") :: (Sign "(") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "(") :: (Sign "(") :: []) l (make_lemma (";((","sinterj")) | |
667 | - | (Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";-))))","sinterj")) | |
668 | - | (Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":-))))","sinterj")) | |
669 | - | (Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":-)))","sinterj")) | |
670 | - | (Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";-)))","sinterj")) | |
671 | - | (Sign ";") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";)))","sinterj")) | |
672 | - | (Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":-))","sinterj")) | |
673 | - | (Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";-))","sinterj")) | |
674 | - | (Sign ":") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":)))","sinterj")) | |
675 | - | (Sign ":") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":))","sinterj")) | |
676 | - | (Sign ";") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";))","sinterj")) | |
677 | - | (Sign ";") :: (Sign "-") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: []) l (make_lemma (";-)","sinterj")) | |
678 | - | (Sign ":") :: (Sign "|") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "|") :: []) l (make_lemma (":|","sinterj")) | |
679 | - | (Sign ":") :: (Sign "\\") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "\\") :: []) l (make_lemma (":\\","sinterj")) | |
680 | - | (Sign ":") :: (Sign "-") :: (Sign "/") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign "/") :: []) l (make_lemma (":-/","sinterj")) | |
681 | - | (Sign ":") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign ")") :: []) l (make_lemma (":)","sinterj")) | |
682 | - | (Sign ";") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign ")") :: []) l (make_lemma (";)","sinterj")) | |
683 | - | (Sign ")") :: l -> create_sign_token poss_s_beg i [Sign ")"] l (Interp ")") | |
684 | - | (Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: []) l (make_lemma ("(…)","sinterj")) | |
685 | - | (Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: []) l (make_lemma ("(…)","sinterj")) | |
686 | - | (Sign "[") :: (Sign "+") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign "+") :: (Sign "]") :: []) l (make_lemma ("[+]","symbol")) | |
687 | - | (Sign "[") :: (Sign "-") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign "-") :: (Sign "]") :: []) l (make_lemma ("[-]","symbol")) | |
688 | - | (Sign "[") :: (Sign "?") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign "?") :: (Sign "]") :: []) l (make_lemma ("[?]","sinterj")) | |
689 | - | (Sign ":") :: (Sign "]") :: l -> | |
690 | - let t,i2 = create_empty_sign_token i [Sign ":";Sign "]"] in | |
691 | - Variant[Token{t with token=make_lemma (":]","sinterj")}; | |
692 | - Seq[Token{empty_token with orth=":";beg=i;len=factor;next=i+factor;token=Interp ":"; attrs=["maybe cs"]}; | |
693 | - Token{empty_token with orth="]";beg=i+factor;len=factor;next=i+2*factor;token=Interp "]"; attrs=["maybe cs"]}]],i2,l,false | |
694 | - | (Sign ";") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "]") :: []) l (make_lemma (";]","sinterj")) | |
695 | - | (Sign "]") :: l -> create_sign_token poss_s_beg i [Sign "]"] l (Interp "]") | |
696 | - | (Sign "[") :: l -> create_sign_token poss_s_beg i [Sign "["] l (Interp "[") | |
697 | - | (Sign ":") :: l -> | |
698 | - if is_colon_symbol l then | |
699 | - Token{empty_token with orth=":";beg=i;len=factor;next=i+factor;token=Symbol ":"; attrs=["maybe cs"]},i+factor,l,false | |
700 | - else | |
701 | - Variant[ | |
702 | - Seq[Token{empty_token with beg=i;len=11;next=i+11;token=Interp "</clause>"}; (* wyliczenie*) | |
703 | - Token{empty_token with orth=":";beg=i+11;len=factor-11;next=i+factor;token=Interp "<clause>"}]; | |
704 | - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"}; | |
705 | - Token{empty_token with orth=":";beg=i+10;len=factor-30;next=i+factor-20;token=Interp ":"}; (* mowa zależna, koniec zdania *) | |
706 | - Token{empty_token with beg=i+factor-20;len=20;next=i+factor;token=Interp "</sentence>"}]; | |
707 | - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"}; | |
708 | - Token{empty_token with orth=":";beg=i+10;len=factor-40;next=i+factor-30;token=Interp ":"}; (* po ':' zdanie z małej litery *) | |
709 | - Token{empty_token with beg=i+factor-30;len=10;next=i+factor-20;token=Interp "</sentence>"}; | |
710 | - Token{empty_token with beg=i+factor-20;len=10;next=i+factor-10;token=Interp "<sentence>"}; | |
711 | - Token{empty_token with beg=i+factor-10;len=10;next=i+factor;token=Interp "<clause>"}]; | |
712 | - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"}; | |
713 | - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</sentence>"}; | |
714 | - Token{empty_token with orth=":";beg=i+20;len=factor-20;next=i+factor;token=Interp ":s"}]; (* speaker *) | |
715 | - ],i+factor,l,true | |
716 | -(* if is_colon_sentence_end_marker l then | |
717 | - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"}; | |
718 | - Token{empty_token with orth=":";beg=i+10;len=10;next=i+20;token=Interp ":"}; | |
719 | - Token{empty_token with beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}],i+factor,l,true | |
720 | - else | |
721 | - else | |
722 | - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"}; | |
723 | - Token{empty_token with orth=":";beg=i+10;len=10;next=i+20;token=Interp ""}; | |
724 | - Token{empty_token with beg=i+20;len=factor-20;next=i+factor;token=Interp "<clause>"}],i+factor,l,false*) | |
725 | - | (Sign "'") :: (Sign "'") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "'";Sign "'"] l | |
726 | - | (Sign "'") :: (Sign "'") :: l -> | |
727 | - let t,i = create_empty_sign_token i [Sign "”"] in | |
728 | - Variant[Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg | |
729 | - | (Sign "'") :: l -> create_sign_token poss_s_beg i [Sign "'"] l (Symbol "’") | |
730 | - | (Sign "’") :: (Sign "’") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "’";Sign "’"] l | |
731 | - | (Sign "’") :: (Sign "’") :: l -> | |
732 | - let t,i = create_empty_sign_token i [Sign "”"] in | |
733 | - Variant[Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg | |
734 | - | (Sign "’") :: l -> create_sign_token poss_s_beg i [Sign "’"] l (Symbol "’") | |
735 | - | (Sign ";") :: (Sign "*") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "*") :: []) l (make_lemma (";*","sinterj")) | |
736 | - | (Sign ";") :: l -> | |
737 | - Variant[Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"}; | |
738 | - Token{empty_token with orth=";";beg=i+20;len=20;next=i+40;token=Interp "</sentence>"}; | |
739 | - Token{empty_token with beg=i+40;len=20;next=i+60;token=Interp "<sentence>"}; | |
740 | - Token{empty_token with beg=i+60;len=factor-60;next=i+factor;token=Interp "<clause>"}]; | |
741 | - Token{empty_token with orth=";";beg=i;len=factor;next=i+factor;token=Interp ";"; attrs=["maybe cs"]}],i+factor,l,false | |
742 | - | (Sign "?") :: (Sign "!") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> | |
743 | - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "?!...",i+5*factor,l,true | |
744 | - | (Sign "?") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> | |
745 | - create_sentence_seq_q i ((Sign "?") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "?...",i+4*factor,l,true | |
746 | - | (Sign "?") :: (Sign "?") :: (Sign "?") :: (Sign "?") :: l -> | |
747 | - create_sentence_seq_q i ((Sign "?") :: (Sign "?") :: (Sign "?") :: (Sign "?") :: []) l "????",i+4*factor,l,true | |
748 | - | (Sign "?") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: l -> | |
749 | - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: []) l "?!!!",i+4*factor,l,true | |
750 | - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "?") :: l -> | |
751 | - Variant[create_sentence_seq_hapl_q i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "?") :: []) l "…?"; | |
752 | - create_sentence_seq_q i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "?") :: []) l "…?"],i+4*factor,l,true | |
753 | - | (Sign "?") :: (Sign "!") :: (Sign "?") :: l -> | |
754 | - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: (Sign "?") :: []) l "?!?",i+3*factor,l,true | |
755 | - | (Sign "?") :: (Sign "?") :: (Sign "?") :: l -> | |
756 | - create_sentence_seq_q i ((Sign "?") :: (Sign "?") :: (Sign "?") :: []) l "???",i+3*factor,l,true | |
757 | - | (Sign "?") :: (Sign "!") :: l -> | |
758 | - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: []) l "?!",i+2*factor,l,true | |
759 | - | (Sign "?") :: (Sign "?") :: l -> | |
760 | - create_sentence_seq_q i ((Sign "?") :: (Sign "?") :: []) l "??",i+2*factor,l,true | |
761 | -(* | (Sign "?") :: (Sign ".") :: l -> *) | |
762 | - | (Sign "!") :: (Sign "?") :: l -> | |
763 | - create_sentence_seq_q i ((Sign "!") :: (Sign "?") :: []) l "!?",i+2*factor,l,true | |
764 | - | (Sign "?") :: (Sign "…") :: l -> | |
765 | - create_sentence_seq_q i ((Sign "?") :: (Sign "…") :: []) l "?…",i+2*factor,l,true | |
766 | - | (Sign "…") :: (Sign "?") :: l -> | |
767 | - Variant[create_sentence_seq_hapl_q i ((Sign "…") :: (Sign "?") :: []) l "…?"; | |
768 | - create_sentence_seq_q i ((Sign "…") :: (Sign "?") :: []) l "…?"],i+2*factor,l,true | |
769 | - | (Sign "?") :: l -> | |
770 | - create_sentence_seq_q i ((Sign "?") :: []) l "?",i+factor,l,true | |
771 | - | (Sign "!") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: l -> | |
772 | - create_sentence_seq i ((Sign "!") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: []) l "!!!!",i+4*factor,l,true | |
773 | - | (Sign "!") :: (Sign "!") :: (Sign "!") :: l -> | |
774 | - create_sentence_seq i ((Sign "!") :: (Sign "!") :: (Sign "!") :: []) l "!!!",i+3*factor,l,true | |
775 | - | (Sign "!") :: (Sign "!") :: l -> | |
776 | - create_sentence_seq i ((Sign "!") :: (Sign "!") :: []) l "!!",i+2*factor,l,true | |
777 | - | (Sign "!") :: l -> | |
778 | - create_sentence_seq i ((Sign "!") :: []) l "!",i+factor,l,true | |
779 | - | (Sign "…") :: l -> | |
780 | - if is_multidot_sentence_end_marker l then | |
781 | - Variant[create_sentence_seq_hapl i ((Sign "…") :: []) l "…"; | |
782 | - create_sentence_seq i ((Sign "…") :: []) l "…"],i+factor,l,true | |
783 | - else | |
784 | - Variant[create_sentence_seq_hapl i ((Sign "…") :: []) l "…"; | |
785 | - create_sentence_seq i ((Sign "…") :: []) l "…"; | |
786 | - Token{empty_token with orth="…";beg=i;len=factor;next=i+factor;token=make_lemma ("…","sinterj"); attrs=["maybe cs"]}],i+factor,l,true | |
787 | - | (Sign "/") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "/") :: l -> create_sign_token poss_s_beg i ((Sign "/") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "/") :: []) l (make_lemma ("(…)","sinterj")) | |
788 | - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> (* Różne natęrzenia wielokropka i wypunktowania *) | |
789 | - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"; | |
790 | - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+8*factor,l,true | |
791 | - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> | |
792 | - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"; | |
793 | - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+7*factor,l,true | |
794 | - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> | |
795 | - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"; | |
796 | - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+6*factor,l,true | |
797 | - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> | |
798 | - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"; | |
799 | - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+5*factor,l,true | |
800 | - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> | |
801 | - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"; | |
802 | - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+4*factor,l,true | |
803 | - | (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> | |
804 | - if is_multidot_sentence_end_marker l then | |
805 | - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"; | |
806 | - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"],i+3*factor,l,true | |
807 | - else | |
808 | - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"; | |
809 | - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"; | |
810 | - Token{empty_token with orth="...";beg=i;len=3*factor;next=i+3*factor;token=make_lemma ("…","sinterj"); attrs=["maybe cs"]}],i+3*factor,l,true | |
811 | - | (Sign ".") :: (Sign ".") :: l -> | |
812 | - if is_multidot_sentence_end_marker l then | |
813 | - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: []) l "…"; | |
814 | - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"],i+2*factor,l,true | |
815 | - else | |
816 | - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: []) l "…"; | |
817 | - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"; | |
818 | - Token{empty_token with orth="..";beg=i;len=2*factor;next=i+2*factor;token=make_lemma ("…","sinterj"); attrs=["maybe cs"]}],i+2*factor,l,true | |
819 | - | (Sign ".") :: l -> | |
820 | - if is_dot_sentence_end_marker l then | |
821 | - Variant[Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]}; | |
822 | - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</clause>"}; | |
823 | - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}]; | |
824 | - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"}; | |
825 | - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}]],i+factor,l,true | |
826 | - else if not_dot_sentence_end_marker l then | |
827 | - Token{empty_token with orth=".";beg=i;len=factor;next=i+factor;token=Symbol "."; attrs=["maybe cs"]},i+factor,l,false | |
828 | - else | |
829 | - Variant[Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]}; | |
830 | - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</clause>"}; | |
831 | - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}]; | |
832 | - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"}; | |
833 | - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}]; | |
834 | - Token{empty_token with orth=".";beg=i;len=factor;next=i+factor;token=Symbol "."; attrs=["maybe cs"]}],i+factor,l,true | |
835 | - | (Sign "*") :: (Sign "*") :: (Sign "*") :: (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*";Sign "*";Sign "*";Sign "*"] l (Interp "*****") (* zastępniki liter *) | |
836 | - | (Sign "*") :: (Sign "*") :: (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*";Sign "*";Sign "*"] l (Interp "****") | |
837 | - | (Sign "*") :: (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*";Sign "*"] l (Interp "***") | |
838 | - | (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*"] l (Interp "**") | |
839 | - | (Sign "*") :: l -> (* Interp zastępnik liter i cudzysłów, symbol listy *) | |
840 | - let t,i = create_empty_sign_token i [Sign "*"] in | |
841 | - Variant[Token{t with token=Interp "*"};Token{t with token=Symbol "*"}],i,l,poss_s_beg | |
842 | - | (Sign "+") :: l -> create_sign_token poss_s_beg i [Sign "+"] l (Symbol "+") | |
843 | - | (Sign "«") :: l -> | |
844 | - let t,i = create_empty_sign_token i [Sign "«"] in | |
845 | - Variant[Token{t with token=Interp "«"};Token{t with token=Interp "«s"}],i,l,poss_s_beg | |
846 | - | (Sign "»") :: l -> | |
847 | - let t,i = create_empty_sign_token i [Sign "»"] in | |
848 | - Variant[Token{t with token=Interp "»"};Token{t with token=Interp "»s"}],i,l,poss_s_beg | |
849 | - | (Sign "<") :: (Sign "<") :: l -> create_sign_token poss_s_beg i [Sign "<";Sign "<"] l (Interp "«") (* prawy cudzysłów *) | |
850 | - | (Sign "<") :: l -> (* prawy cudzysłów i element wzoru matematycznego *) | |
851 | - let t,i = create_empty_sign_token i [Sign "<"] in | |
852 | - Variant[Token{t with token=Interp "«"};Token{t with token=Symbol "<"}],i,l,poss_s_beg | |
853 | - | (Sign ">") :: (Sign ">") :: l -> create_sign_token poss_s_beg i [Sign ">";Sign ">"] l (Interp "»") (* lewy cudzysłów *) | |
854 | - | (Sign ">") :: l -> create_sign_token poss_s_beg i [Sign ">"] l (Symbol ">") | |
855 | - | (Sign "-") :: (Sign "-") :: (Sign "-") :: l -> create_or_beg2 i [Sign "-";Sign "-";Sign "-"] l poss_s_beg | |
856 | - | (Sign "-") :: (Sign "-") :: l -> create_or_beg2 i [Sign "-";Sign "-"] l poss_s_beg | |
857 | - | (Sign "-") :: l -> create_or_beg i [Sign "-"] l poss_s_beg | |
858 | - | (Sign "‐") :: l -> create_or_beg i [Sign "‐"] l poss_s_beg | |
859 | - | (Sign "‑") :: l -> create_or_beg i [Sign "‑"] l poss_s_beg | |
860 | - | (Sign "‒") :: l -> create_or_beg i [Sign "‒"] l poss_s_beg | |
861 | - | (Sign "−") :: l -> create_or_beg i [Sign "−"] l poss_s_beg | |
862 | - | (Sign "–") :: l -> create_or_beg i [Sign "–"] l poss_s_beg | |
863 | - | (Sign "—") :: l -> create_or_beg i [Sign "—"] l poss_s_beg | |
864 | - | (Sign "‘") :: l -> create_sign_token poss_s_beg i [Sign "‘"] l (Interp "‘") | |
865 | - | (Sign "´") :: l -> create_sign_token poss_s_beg i [Sign "´"] l (Symbol "’") | |
866 | - | (Sign "`") :: (Sign "`") :: l -> | |
867 | - let t,i = create_empty_sign_token i [Sign "`";Sign "`"] in | |
868 | - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"}],i,l,poss_s_beg | |
869 | - | (Sign "`") :: l -> create_sign_token poss_s_beg i [Sign "`"] l (Symbol "’") | |
870 | - | (Sign "·") :: l -> create_sign_token poss_s_beg i [Sign "·"] l (Interp "·") | |
871 | - | (Sign "•") :: l -> create_sign_token poss_s_beg i [Sign "•"] l (Interp "•") | |
872 | - | (Sign "¨") :: l -> create_sign_token poss_s_beg i [Sign "¨"] l (Interp "¨") | |
873 | - | (Sign "~") :: l -> | |
874 | - let t,i = create_empty_sign_token i [Sign "~"] in | |
875 | - Variant[Token{t with token=Symbol "~"};Token{t with token=make_lemma ("około","prep:gen")}],i,l,false | |
876 | - | (Sign "{") :: l -> | |
877 | - let t,i = create_empty_sign_token i [Sign "{"] in | |
878 | - Variant[Token{t with token=Symbol "{"};Token{t with token=Interp "{"}],i,l,poss_s_beg | |
879 | - | (Sign "}") :: l -> | |
880 | - let t,i = create_empty_sign_token i [Sign "}"] in | |
881 | - Variant[Token{t with token=Symbol "}"};Token{t with token=Interp "}"}],i,l,poss_s_beg | |
882 | - | (Sign "#") :: l -> create_sign_token poss_s_beg i [Sign ""] l (Symbol "") | |
883 | - | (Sign "^") :: (Sign "^") :: l -> create_sign_token poss_s_beg i [Sign "^";Sign "^"] l (make_lemma ("^^","sinterj")) | |
884 | - | (Sign "^") :: l -> create_sign_token poss_s_beg i [Sign "^"] l (Symbol "^") | |
885 | - | (Sign "|") :: l -> create_sign_token poss_s_beg i [Sign "|"] l (Symbol "|") | |
886 | - | (Sign "&") :: l -> create_sign_token poss_s_beg i [Sign "&"] l (Symbol "&") | |
887 | - | (Sign "=") :: l -> create_sign_token poss_s_beg i [Sign "="] l (Symbol "=") | |
888 | - | (Sign "/") :: l -> | |
889 | - let t,i = create_empty_sign_token i [Sign "/"] in | |
890 | - Variant[Token{t with token=Symbol "/"};Token{t with token=make_lemma ("na","prep:acc")}],i,l,false | |
891 | - | (Sign "_") :: l -> create_sign_token poss_s_beg i [Sign "_"] l (Symbol "_") | |
892 | - | (Sign "@") :: l -> create_sign_token poss_s_beg i [Sign "@"] l (Symbol "@") | |
893 | - | (Sign "×") :: l -> create_sign_token poss_s_beg i [Sign "×"] l (Symbol "×") | |
894 | - | (Sign "%") :: l -> | |
895 | - let t,i = create_empty_sign_token i [Sign "%"] in | |
896 | - Variant[Token{t with token=Symbol "%"};Token{t with token=make_lemma ("procent","subst:_:_:m3")}],i,l,false | |
897 | - | (Sign "$") :: l -> | |
898 | - let t,i = create_empty_sign_token i [Sign "$"] in | |
899 | - Variant[Token{t with token=Symbol "$"};Token{t with token=make_lemma ("dolar","subst:_:_:m2")}],i,l,false | |
900 | - | (Sign "€") :: l -> create_sign_token poss_s_beg i [Sign "€"] l (make_lemma ("euro","subst:_:_:n2")) | |
901 | - | (Sign "²") :: l -> create_sign_token poss_s_beg i [Sign "²"] l (Symbol "²") | |
902 | - | (Sign "°") :: l -> create_sign_token poss_s_beg i [Sign "°"] l (make_lemma ("stopień","subst:_:_:m3")) | |
903 | - | (Sign "§") :: l -> create_sign_token false i [Sign "§"] l (make_lemma ("paragraf","subst:_:_:m3")) | |
904 | - | (Sign s) :: l -> print_endline ("recognize_sign_group: " ^ s); create_sign_token poss_s_beg i [Sign s] l (Symbol s) | |
905 | - | l -> failwith "recognize_sign_group" | |
906 | - | |
907 | -(* FIXME: "„Szpak” frunie." trzeba przenie przenieść <sentence> przed „, ale zostawić po „s. *) | |
908 | - | |
909 | -let rec group_chars poss_s_beg i rev = function | |
910 | - [] -> List.rev ((Token{empty_token with beg=i;len=factor;next=i+factor;token=Interp "</query>"}) :: rev) | |
911 | - | (Digit s) :: l -> let x,l = group_digits [] ((Digit s) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_digits poss_s_beg i x) :: rev) l | |
912 | - | (Sign s) :: l -> let x,i,l,poss_s_beg = recognize_sign_group poss_s_beg i ((Sign s) :: l) in group_chars poss_s_beg i (x :: rev) l | |
913 | - | (Capital(s,t)) :: l -> let x,l = group_letters [] ((Capital(s,t)) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l | |
914 | - | (ForeignCapital(s,t)) :: l -> let x,l = group_letters [] ((ForeignCapital(s,t)) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l | |
915 | - | (Small s) :: l -> let x,l = group_letters [] ((Small s) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l | |
916 | - | (ForeignSmall s) :: l -> let x,l = group_letters [] ((ForeignSmall s) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l | |
917 | - | (Other(s,x)) :: l -> | |
918 | - let x,l = group_others [] ((Other(s,x)) :: l) in | |
919 | - group_chars false (i + Xlist.size x * factor) | |
920 | - ((Token{empty_token with orth=String.concat "" x;beg=i;len=Xlist.size x * factor;next=i+factor;token=Other(String.concat "" x)}) :: rev) l | |
921 | - | |
922 | -let tokenize l = | |
923 | - (Token{empty_token with beg=0;len=factor;next=factor;token=Interp "<query>"}) :: (group_chars true factor [] l) |
tokenizer/eniam-tokenizer-1.0/README deleted
1 | -ENIAMtokenizer Version 1.0 : | |
2 | ------------------------ | |
3 | - | |
4 | -ENIAMtokenizer is a library that provides a tokenizer for Polish. | |
5 | - | |
6 | -Install | |
7 | -------- | |
8 | - | |
9 | -ENIAMtokenizer requires OCaml version 4.02.3 compiler | |
10 | -together with Xlib library version 3.1 or later. | |
11 | - | |
12 | -In order to install type: | |
13 | - | |
14 | -make install | |
15 | - | |
16 | -by default, ENIAMtokenizer is installed in the 'ocamlc -where'/eniam directory. | |
17 | -you can change it by editing the Makefile. | |
18 | - | |
19 | -In order to test library type: | |
20 | -make test | |
21 | -./test | |
22 | - | |
23 | -Credits | |
24 | -------- | |
25 | -Copyright © 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
26 | -Copyright © 2016 Institute of Computer Science Polish Academy of Sciences | |
27 | - | |
28 | -The parser uses the following licensed resources: | |
29 | - | |
30 | -SGJP: Grammatical Dictionary of Polish, version 20151020 | |
31 | -Copyright © 2007–2015 Zygmunt Saloni, Włodzimierz Gruszczyński, Marcin | |
32 | -Woliński, Robert Wołosz, Danuta Skowrońska | |
33 | -http://sgjp.pl | |
34 | - | |
35 | -Licence | |
36 | -------- | |
37 | - | |
38 | -This library is free software: you can redistribute it and/or modify | |
39 | -it under the terms of the GNU Lesser General Public License as published by | |
40 | -the Free Software Foundation, either version 3 of the License, or | |
41 | -(at your option) any later version. | |
42 | - | |
43 | -This library is distributed in the hope that it will be useful, | |
44 | -but WITHOUT ANY WARRANTY; without even the implied warranty of | |
45 | -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
46 | -GNU General Public License for more details. | |
47 | - | |
48 | -You should have received a copy of the GNU Lesser General Public License | |
49 | -along with this program. If not, see <http://www.gnu.org/licenses/>. | |
50 | - |
tokenizer/eniam-tokenizer-1.0/config-tokenizer deleted
tokenizer/eniam-tokenizer-1.0/lgpl-3.0.txt deleted
1 | - GNU LESSER GENERAL PUBLIC LICENSE | |
2 | - Version 3, 29 June 2007 | |
3 | - | |
4 | - Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> | |
5 | - Everyone is permitted to copy and distribute verbatim copies | |
6 | - of this license document, but changing it is not allowed. | |
7 | - | |
8 | - | |
9 | - This version of the GNU Lesser General Public License incorporates | |
10 | -the terms and conditions of version 3 of the GNU General Public | |
11 | -License, supplemented by the additional permissions listed below. | |
12 | - | |
13 | - 0. Additional Definitions. | |
14 | - | |
15 | - As used herein, "this License" refers to version 3 of the GNU Lesser | |
16 | -General Public License, and the "GNU GPL" refers to version 3 of the GNU | |
17 | -General Public License. | |
18 | - | |
19 | - "The Library" refers to a covered work governed by this License, | |
20 | -other than an Application or a Combined Work as defined below. | |
21 | - | |
22 | - An "Application" is any work that makes use of an interface provided | |
23 | -by the Library, but which is not otherwise based on the Library. | |
24 | -Defining a subclass of a class defined by the Library is deemed a mode | |
25 | -of using an interface provided by the Library. | |
26 | - | |
27 | - A "Combined Work" is a work produced by combining or linking an | |
28 | -Application with the Library. The particular version of the Library | |
29 | -with which the Combined Work was made is also called the "Linked | |
30 | -Version". | |
31 | - | |
32 | - The "Minimal Corresponding Source" for a Combined Work means the | |
33 | -Corresponding Source for the Combined Work, excluding any source code | |
34 | -for portions of the Combined Work that, considered in isolation, are | |
35 | -based on the Application, and not on the Linked Version. | |
36 | - | |
37 | - The "Corresponding Application Code" for a Combined Work means the | |
38 | -object code and/or source code for the Application, including any data | |
39 | -and utility programs needed for reproducing the Combined Work from the | |
40 | -Application, but excluding the System Libraries of the Combined Work. | |
41 | - | |
42 | - 1. Exception to Section 3 of the GNU GPL. | |
43 | - | |
44 | - You may convey a covered work under sections 3 and 4 of this License | |
45 | -without being bound by section 3 of the GNU GPL. | |
46 | - | |
47 | - 2. Conveying Modified Versions. | |
48 | - | |
49 | - If you modify a copy of the Library, and, in your modifications, a | |
50 | -facility refers to a function or data to be supplied by an Application | |
51 | -that uses the facility (other than as an argument passed when the | |
52 | -facility is invoked), then you may convey a copy of the modified | |
53 | -version: | |
54 | - | |
55 | - a) under this License, provided that you make a good faith effort to | |
56 | - ensure that, in the event an Application does not supply the | |
57 | - function or data, the facility still operates, and performs | |
58 | - whatever part of its purpose remains meaningful, or | |
59 | - | |
60 | - b) under the GNU GPL, with none of the additional permissions of | |
61 | - this License applicable to that copy. | |
62 | - | |
63 | - 3. Object Code Incorporating Material from Library Header Files. | |
64 | - | |
65 | - The object code form of an Application may incorporate material from | |
66 | -a header file that is part of the Library. You may convey such object | |
67 | -code under terms of your choice, provided that, if the incorporated | |
68 | -material is not limited to numerical parameters, data structure | |
69 | -layouts and accessors, or small macros, inline functions and templates | |
70 | -(ten or fewer lines in length), you do both of the following: | |
71 | - | |
72 | - a) Give prominent notice with each copy of the object code that the | |
73 | - Library is used in it and that the Library and its use are | |
74 | - covered by this License. | |
75 | - | |
76 | - b) Accompany the object code with a copy of the GNU GPL and this license | |
77 | - document. | |
78 | - | |
79 | - 4. Combined Works. | |
80 | - | |
81 | - You may convey a Combined Work under terms of your choice that, | |
82 | -taken together, effectively do not restrict modification of the | |
83 | -portions of the Library contained in the Combined Work and reverse | |
84 | -engineering for debugging such modifications, if you also do each of | |
85 | -the following: | |
86 | - | |
87 | - a) Give prominent notice with each copy of the Combined Work that | |
88 | - the Library is used in it and that the Library and its use are | |
89 | - covered by this License. | |
90 | - | |
91 | - b) Accompany the Combined Work with a copy of the GNU GPL and this license | |
92 | - document. | |
93 | - | |
94 | - c) For a Combined Work that displays copyright notices during | |
95 | - execution, include the copyright notice for the Library among | |
96 | - these notices, as well as a reference directing the user to the | |
97 | - copies of the GNU GPL and this license document. | |
98 | - | |
99 | - d) Do one of the following: | |
100 | - | |
101 | - 0) Convey the Minimal Corresponding Source under the terms of this | |
102 | - License, and the Corresponding Application Code in a form | |
103 | - suitable for, and under terms that permit, the user to | |
104 | - recombine or relink the Application with a modified version of | |
105 | - the Linked Version to produce a modified Combined Work, in the | |
106 | - manner specified by section 6 of the GNU GPL for conveying | |
107 | - Corresponding Source. | |
108 | - | |
109 | - 1) Use a suitable shared library mechanism for linking with the | |
110 | - Library. A suitable mechanism is one that (a) uses at run time | |
111 | - a copy of the Library already present on the user's computer | |
112 | - system, and (b) will operate properly with a modified version | |
113 | - of the Library that is interface-compatible with the Linked | |
114 | - Version. | |
115 | - | |
116 | - e) Provide Installation Information, but only if you would otherwise | |
117 | - be required to provide such information under section 6 of the | |
118 | - GNU GPL, and only to the extent that such information is | |
119 | - necessary to install and execute a modified version of the | |
120 | - Combined Work produced by recombining or relinking the | |
121 | - Application with a modified version of the Linked Version. (If | |
122 | - you use option 4d0, the Installation Information must accompany | |
123 | - the Minimal Corresponding Source and Corresponding Application | |
124 | - Code. If you use option 4d1, you must provide the Installation | |
125 | - Information in the manner specified by section 6 of the GNU GPL | |
126 | - for conveying Corresponding Source.) | |
127 | - | |
128 | - 5. Combined Libraries. | |
129 | - | |
130 | - You may place library facilities that are a work based on the | |
131 | -Library side by side in a single library together with other library | |
132 | -facilities that are not Applications and are not covered by this | |
133 | -License, and convey such a combined library under terms of your | |
134 | -choice, if you do both of the following: | |
135 | - | |
136 | - a) Accompany the combined library with a copy of the same work based | |
137 | - on the Library, uncombined with any other library facilities, | |
138 | - conveyed under the terms of this License. | |
139 | - | |
140 | - b) Give prominent notice with the combined library that part of it | |
141 | - is a work based on the Library, and explaining where to find the | |
142 | - accompanying uncombined form of the same work. | |
143 | - | |
144 | - 6. Revised Versions of the GNU Lesser General Public License. | |
145 | - | |
146 | - The Free Software Foundation may publish revised and/or new versions | |
147 | -of the GNU Lesser General Public License from time to time. Such new | |
148 | -versions will be similar in spirit to the present version, but may | |
149 | -differ in detail to address new problems or concerns. | |
150 | - | |
151 | - Each version is given a distinguishing version number. If the | |
152 | -Library as you received it specifies that a certain numbered version | |
153 | -of the GNU Lesser General Public License "or any later version" | |
154 | -applies to it, you have the option of following the terms and | |
155 | -conditions either of that published version or of any later version | |
156 | -published by the Free Software Foundation. If the Library as you | |
157 | -received it does not specify a version number of the GNU Lesser | |
158 | -General Public License, you may choose any version of the GNU Lesser | |
159 | -General Public License ever published by the Free Software Foundation. | |
160 | - | |
161 | - If the Library as you received it specifies that a proxy can decide | |
162 | -whether future versions of the GNU Lesser General Public License shall | |
163 | -apply, that proxy's public statement of acceptance of any version is | |
164 | -permanent authorization for you to choose that version for the | |
165 | -Library. |
tokenizer/eniam-tokenizer-1.0/makefile deleted
1 | -OCAMLC=ocamlc | |
2 | -OCAMLOPT=ocamlopt | |
3 | -OCAMLDEP=ocamldep | |
4 | -INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam | |
5 | -OCAMLFLAGS=$(INCLUDES) -g | |
6 | -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa | |
7 | -INSTALLDIR=`ocamlc -where`/eniam | |
8 | - | |
9 | -SOURCES= ENIAMtokenizerTypes.ml ENIAMtokens.ml ENIAMacronyms.ml ENIAMpatterns.ml ENIAMtokenizer.ml | |
10 | - | |
11 | -all: eniam-tokenizer.cma eniam-tokenizer.cmxa | |
12 | - | |
13 | -install: all | |
14 | - mkdir -p $(INSTALLDIR) | |
15 | - cp eniam-tokenizer.cmxa eniam-tokenizer.a eniam-tokenizer.cma config-tokenizer $(INSTALLDIR) | |
16 | - cp ENIAMtokenizerTypes.cmi ENIAMtokens.cmi ENIAMacronyms.cmi ENIAMpatterns.cmi ENIAMtokenizer.cmi $(INSTALLDIR) | |
17 | - cp ENIAMtokenizerTypes.cmx ENIAMtokens.cmx ENIAMacronyms.cmx ENIAMpatterns.cmx ENIAMtokenizer.cmx $(INSTALLDIR) | |
18 | - mkdir -p /usr/share/eniam/resources/SGJP | |
19 | - cp resources/SGJP/* /usr/share/eniam/resources/SGJP | |
20 | - | |
21 | -eniam-tokenizer.cma: $(SOURCES) | |
22 | - ocamlc -linkall -a -o eniam-tokenizer.cma $(OCAMLFLAGS) $^ | |
23 | - | |
24 | -eniam-tokenizer.cmxa: $(SOURCES) | |
25 | - ocamlopt -linkall -a -o eniam-tokenizer.cmxa $(INCLUDES) $^ | |
26 | - | |
27 | -test: test.ml | |
28 | - $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml | |
29 | - | |
30 | -.SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx | |
31 | - | |
32 | -.mll.ml: | |
33 | - ocamllex $< | |
34 | - | |
35 | -.mly.mli: | |
36 | - ocamlyacc $< | |
37 | - | |
38 | -.mly.ml: | |
39 | - ocamlyacc $< | |
40 | - | |
41 | -.ml.cmo: | |
42 | - $(OCAMLC) $(OCAMLFLAGS) -c $< | |
43 | - | |
44 | -.mli.cmi: | |
45 | - $(OCAMLC) $(OCAMLFALGS) -c $< | |
46 | - | |
47 | -.ml.cmx: | |
48 | - $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $< | |
49 | - | |
50 | -clean: | |
51 | - rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a test |
tokenizer/eniam-tokenizer-1.0/test.ml deleted
1 | -(* | |
2 | - * ENIAMtokenizer, a tokenizer for Polish | |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | - * | |
6 | - * This library is free software: you can redistribute it and/or modify | |
7 | - * it under the terms of the GNU Lesser General Public License as published by | |
8 | - * the Free Software Foundation, either version 3 of the License, or | |
9 | - * (at your option) any later version. | |
10 | - * | |
11 | - * This library is distributed in the hope that it will be useful, | |
12 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | - * GNU General Public License for more details. | |
15 | - * | |
16 | - * You should have received a copy of the GNU Lesser General Public License | |
17 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | - *) | |
19 | - | |
20 | - | |
21 | -let test_strings = [ | |
22 | -(* "a gdybym miałem"; | |
23 | - "A Gdy Miałem"; | |
24 | - "GDY MIAŁEM"; | |
25 | - "I II III IV V VI VII VIII IX X MCXIV MXC"; | |
26 | - "Kiedy Piotr Prabucki, przewodniczący Komisji Budżetu PeKaO"; | |
27 | - "25 idzie 20."; | |
28 | - "Kot. Kot. kot."; | |
29 | - "25."; | |
30 | - "25.888.231"; | |
31 | - "Ala 25.888.231.111 ma."; | |
32 | - "Ala 25.888.031,011."; | |
33 | - "Ala -25.888.031,011."; | |
34 | - "Ala -25 ."; | |
35 | - "Ala -1° C 3° ciepła 20—30°C od 180° do 260°C około 6° poniżej horyzontu."; | |
36 | - "Ala 22-25 ."; | |
37 | - "Ala 22.5.2000-25.5.2001 ."; | |
38 | - "Szpak frunie.";*) | |
39 | - "Kot miauczy."; | |
40 | -(* "Np. Ala.";*) | |
41 | - "w. dom."; | |
42 | - "tzn."; | |
43 | - "c.d.n."; | |
44 | -(* "Arabia Saudyjska biegnie."; | |
45 | - "Cauchy'ego ONZ-owska biegnie.";*) | |
46 | - "TE-cie E-e."; | |
47 | - "MS-DOS-owska CI-cie KRRi-cie UJ-ocie UJ-OCIE."; | |
48 | - "rock'n'rollowy d’Alembertowi staro-cerkiewno-słowiańskimi"; | |
49 | -(* "Tom idzie.";*) | |
50 | - "Miałem miał."; | |
51 | -(* "Szpak śpiewa."; | |
52 | - "Ala ma kota."; | |
53 | - "Ale mają kota:"*) | |
54 | - ] | |
55 | - | |
56 | -let _ = | |
57 | - print_endline "Testy wbudowane"; | |
58 | - Xlist.iter test_strings (fun s -> | |
59 | - print_endline ("\nTEST: " ^ s); | |
60 | - let tokens = ENIAMtokenizer.parse s in | |
61 | - (* print_endline (ENIAMtokenizer.xml_of tokens); *) | |
62 | - Xlist.iter tokens (fun token -> print_endline (ENIAMtokenizer.string_of 0 token))); | |
63 | - print_endline "Testy użytkownika."; | |
64 | - print_endline "Wpisz tekst i naciśnij ENTER, pusty tekst kończy."; | |
65 | - let s = ref (read_line ()) in | |
66 | - while !s <> "" do | |
67 | - let tokens = ENIAMtokenizer.parse !s in | |
68 | - (* print_endline (ENIAMtokenizer.xml_of tokens); *) | |
69 | - Xlist.iter tokens (fun token -> print_endline (ENIAMtokenizer.string_of 0 token)); | |
70 | - print_endline "Wpisz tekst i naciśnij ENTER, pusty tekst kończy."; | |
71 | - s := read_line () | |
72 | - done; | |
73 | - () |
tokenizer/makefile
... | ... | @@ -12,11 +12,13 @@ all: eniam-tokenizer.cma eniam-tokenizer.cmxa |
12 | 12 | |
13 | 13 | install: all |
14 | 14 | mkdir -p $(INSTALLDIR) |
15 | - cp eniam-tokenizer.cmxa eniam-tokenizer.a eniam-tokenizer.cma config-tokenizer $(INSTALLDIR) | |
15 | + cp eniam-tokenizer.cmxa eniam-tokenizer.a eniam-tokenizer.cma $(INSTALLDIR) | |
16 | 16 | cp ENIAMtokenizerTypes.cmi ENIAMtokens.cmi ENIAMacronyms.cmi ENIAMpatterns.cmi ENIAMtokenizer.cmi $(INSTALLDIR) |
17 | 17 | cp ENIAMtokenizerTypes.cmx ENIAMtokens.cmx ENIAMacronyms.cmx ENIAMpatterns.cmx ENIAMtokenizer.cmx $(INSTALLDIR) |
18 | - mkdir -p /usr/share/eniam/resources/SGJP | |
19 | - cp resources/SGJP/* /usr/share/eniam/resources/SGJP | |
18 | + mkdir -p /usr/share/eniam/tokenizer | |
19 | + cp resources/mte_20151215.tab /usr/share/eniam/tokenizer/mte_20151215.tab | |
20 | + cp resources/README /usr/share/eniam/tokenizer/README | |
21 | + ln -s /usr/share/eniam/tokenizer/mte_20151215.tab /usr/share/eniam/tokenizer/mte.tab | |
20 | 22 | |
21 | 23 | eniam-tokenizer.cma: $(SOURCES) |
22 | 24 | ocamlc -linkall -a -o eniam-tokenizer.cma $(OCAMLFLAGS) $^ |
... | ... |
tokenizer/resources/SGJP/README renamed to tokenizer/resources/README
tokenizer/resources/SGJP/mte_20151215.tab renamed to tokenizer/resources/mte_20151215.tab