Commit e0700d7b6ee4a00d085da7acc3256bc12e80eb29
1 parent
9f38bc0d
biblioteka eniam-tokenizer-1.0 z poprawioną konfiguracją
Showing
17 changed files
with
14 additions
and
2665 deletions
tokenizer/ENIAMtokenizerTypes.ml
| @@ -64,10 +64,8 @@ type pat = L | CL | D of string | C of string | S of string | RD of string | O o | @@ -64,10 +64,8 @@ type pat = L | CL | D of string | C of string | S of string | RD of string | O o | ||
| 64 | let empty_token = { | 64 | let empty_token = { |
| 65 | orth="";corr_orth="";beg=0;len=0;next=0; token=Symbol ""; attrs=[]} | 65 | orth="";corr_orth="";beg=0;len=0;next=0; token=Symbol ""; attrs=[]} |
| 66 | 66 | ||
| 67 | -let config = | ||
| 68 | - try File.load_attr_val_pairs "config-tokenizer" | ||
| 69 | - with _ -> (print_endline "ENIAMtokenizer config file not found"; []) | 67 | +let resource_path = |
| 68 | + try Sys.getenv "ENIAM_RESOURCE_PATH" | ||
| 69 | + with Not_found -> "/usr/share/eniam" | ||
| 70 | 70 | ||
| 71 | -let mte_filename = | ||
| 72 | - try Xlist.assoc config "MTE_FILENAME" | ||
| 73 | - with Not_found -> (print_endline "ENIAMtokenizer MTE_FILENAME config variable undefined"; "") | 71 | +let mte_filename = resource_path ^ "/tokenizer/mte.tab" |
tokenizer/README
| @@ -6,7 +6,7 @@ ENIAMtokenizer is a library that provides a tokenizer for Polish. | @@ -6,7 +6,7 @@ ENIAMtokenizer is a library that provides a tokenizer for Polish. | ||
| 6 | Install | 6 | Install |
| 7 | ------- | 7 | ------- |
| 8 | 8 | ||
| 9 | -ENIAMtokenizer requires OCaml version 4.02.3 compiler | 9 | +ENIAMtokenizer requires OCaml version 4.02.3 compiler |
| 10 | together with Xlib library version 3.1 or later. | 10 | together with Xlib library version 3.1 or later. |
| 11 | 11 | ||
| 12 | In order to install type: | 12 | In order to install type: |
| @@ -20,6 +20,10 @@ In order to test library type: | @@ -20,6 +20,10 @@ In order to test library type: | ||
| 20 | make test | 20 | make test |
| 21 | ./test | 21 | ./test |
| 22 | 22 | ||
| 23 | +By default ENIAMtokenizer looks for resources in /usr/share/eniam directory. | ||
| 24 | +However this behaviour may be changed by setting end exporting ENIAM_RESOURCE_PATH | ||
| 25 | +environment variable. | ||
| 26 | + | ||
| 23 | Credits | 27 | Credits |
| 24 | ------- | 28 | ------- |
| 25 | Copyright © 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | 29 | Copyright © 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> |
| @@ -47,4 +51,3 @@ GNU General Public License for more details. | @@ -47,4 +51,3 @@ GNU General Public License for more details. | ||
| 47 | 51 | ||
| 48 | You should have received a copy of the GNU Lesser General Public License | 52 | You should have received a copy of the GNU Lesser General Public License |
| 49 | along with this program. If not, see <http://www.gnu.org/licenses/>. | 53 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 50 | - |
tokenizer/config-tokenizer deleted
tokenizer/eniam-tokenizer-1.0.tar.bz2
No preview for this file type
tokenizer/eniam-tokenizer-1.0/ENIAMacronyms.ml deleted
| 1 | -(* | ||
| 2 | - * ENIAMtokenizer, a tokenizer for Polish | ||
| 3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | ||
| 4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | ||
| 5 | - * | ||
| 6 | - * This library is free software: you can redistribute it and/or modify | ||
| 7 | - * it under the terms of the GNU Lesser General Public License as published by | ||
| 8 | - * the Free Software Foundation, either version 3 of the License, or | ||
| 9 | - * (at your option) any later version. | ||
| 10 | - * | ||
| 11 | - * This library is distributed in the hope that it will be useful, | ||
| 12 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 14 | - * GNU General Public License for more details. | ||
| 15 | - * | ||
| 16 | - * You should have received a copy of the GNU Lesser General Public License | ||
| 17 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 18 | - *) | ||
| 19 | - | ||
| 20 | -open ENIAMtokenizerTypes | ||
| 21 | - | ||
| 22 | -let mte_patterns = | ||
| 23 | - let lines = try File.load_lines mte_filename | ||
| 24 | - with _ -> (print_endline ("ENIAMtokenizer mte file " ^ mte_filename ^ " not found"); []) in | ||
| 25 | - let l = List.rev (Xlist.rev_map lines (fun line -> | ||
| 26 | - match Str.split (Str.regexp "\t") line with | ||
| 27 | - [orths; lemma; interp] -> Str.split (Str.regexp " ") orths, lemma, interp | ||
| 28 | - | _ -> failwith ("mte_patterns: " ^ line))) in | ||
| 29 | - List.rev (Xlist.rev_map l (fun (orths,lemma,interp) -> | ||
| 30 | - Xlist.map orths (fun orth -> O orth), (fun (_:token_record list) -> ENIAMtokens.make_lemma (lemma,interp)))) | ||
| 31 | - | ||
| 32 | - | ||
| 33 | -let compose_lemma t lemma_suf interp = | ||
| 34 | - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ lemma_suf, interp) | ||
| 35 | - | ||
| 36 | -let compose_lemma3 t1 t2 t3 lemma_suf interp = | ||
| 37 | - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t1.token ^ ENIAMtokens.get_orth t2.token ^ ENIAMtokens.get_orth t3.token ^ lemma_suf, interp) | ||
| 38 | - | ||
| 39 | -let concat_orths l = | ||
| 40 | - String.concat "" (Xlist.map l (fun t -> t.orth)) | ||
| 41 | - | ||
| 42 | -let ct l lemma interp = | ||
| 43 | - let beg = (List.hd l).beg in | ||
| 44 | - let t = List.hd (List.rev l) in | ||
| 45 | - let len = t.beg + t.len - beg in | ||
| 46 | - Token{empty_token with | ||
| 47 | - orth=concat_orths l; | ||
| 48 | - beg=beg; | ||
| 49 | - len=len; | ||
| 50 | - next=t.next; | ||
| 51 | - token=ENIAMtokens.make_lemma (lemma,interp); | ||
| 52 | - attrs=ENIAMtokens.merge_attrs l} | ||
| 53 | - | ||
| 54 | -let rec get_orth_prefix i l = | ||
| 55 | - if i = 0 then "",l else | ||
| 56 | - match l with | ||
| 57 | - c :: l -> let s,l = get_orth_prefix (i-1) l in c ^ s, l | ||
| 58 | - | [] -> failwith "get_orth_prefix" | ||
| 59 | - | ||
| 60 | -let make_sub_tokens t l = | ||
| 61 | - let n = Xlist.fold l 0 (fun n (i,_,_) -> n + i) in | ||
| 62 | - let orth = Xunicode.utf8_chars_of_utf8_string t.orth in | ||
| 63 | - if Xlist.size orth <> n then failwith "make_sub_tokens: invalid orth length" else | ||
| 64 | - let l,_,_,_ = Xlist.fold l ([],t.beg,t.len,orth) (fun (l,beg,remaining_len,orth) (i,lemma,interp) -> | ||
| 65 | - let orth,remaining_orth = get_orth_prefix i orth in | ||
| 66 | - let len = if beg mod factor = 0 then i * factor else ((i-1) * factor) + (beg mod factor) in | ||
| 67 | - if remaining_len = 0 then failwith "make_sub_tokens: invalid remaining_len" else | ||
| 68 | - let len = if len > remaining_len then remaining_len else len in | ||
| 69 | - Token{empty_token with | ||
| 70 | - orth=orth; | ||
| 71 | - beg=beg; | ||
| 72 | - len=len; | ||
| 73 | - next=beg+len; | ||
| 74 | - token=ENIAMtokens.make_lemma (lemma,interp); | ||
| 75 | - attrs=t.attrs} :: l, | ||
| 76 | - beg+len, remaining_len-len, remaining_orth) in | ||
| 77 | - l | ||
| 78 | - | ||
| 79 | -let st t l = | ||
| 80 | - let l = make_sub_tokens t l in | ||
| 81 | - match l with | ||
| 82 | - Token s :: l -> List.rev (Token{s with next=t.next} :: l) | ||
| 83 | - | _ -> failwith "st" | ||
| 84 | - | ||
| 85 | -let std t d l = | ||
| 86 | - let l = make_sub_tokens t l in | ||
| 87 | - match l with | ||
| 88 | - Token s :: l -> List.rev (Token{s with orth=s.orth^d.orth; len=d.beg+d.len-s.beg; next=d.next} :: l) | ||
| 89 | - | _ -> failwith "std" | ||
| 90 | - | ||
| 91 | -let acronym_patterns = [ | ||
| 92 | - [L; S "-"; O "owscy"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | ||
| 93 | - [L; S "-"; O "owska"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 94 | - [L; S "-"; O "owski"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns"); | ||
| 95 | - [L; S "-"; O "owski"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns"); | ||
| 96 | - [L; S "-"; O "owskich"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | ||
| 97 | - [L; S "-"; O "owskich"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 98 | - [L; S "-"; O "owskich"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 99 | - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 100 | - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 101 | - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 102 | - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 103 | - [L; S "-"; O "owskiego"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns"); | ||
| 104 | - [L; S "-"; O "owskiego"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 105 | - [L; S "-"; O "owskiej"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 106 | - [L; S "-"; O "owskiej"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 107 | - [L; S "-"; O "owskiej"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 108 | - [L; S "-"; O "owskiemu"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 109 | - [L; S "-"; O "owskim"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 110 | - [L; S "-"; O "owskim"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 111 | - [L; S "-"; O "owskim"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 112 | - [L; S "-"; O "owskimi"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 113 | - [L; S "-"; O "owsko"], (function [x;_;_] -> compose_lemma x "-owski" "adja" | _ -> failwith "acronym_patterns"); | ||
| 114 | - [L; S "-"; O "owsko"], (function [x;_;_] -> compose_lemma x "-owsko" "adv:pos" | _ -> failwith "acronym_patterns"); | ||
| 115 | - [L; S "-"; O "owsku"], (function [x;_;_] -> compose_lemma x "-owski" "adjp" | _ -> failwith "acronym_patterns"); | ||
| 116 | - [L; S "-"; O "owską"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 117 | - [L; S "-"; O "owską"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 118 | - [L; S "-"; O "wscy"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | ||
| 119 | - [L; S "-"; O "wska"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 120 | - [L; S "-"; O "wski"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns"); | ||
| 121 | - [L; S "-"; O "wski"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns"); | ||
| 122 | - [L; S "-"; O "wskich"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | ||
| 123 | - [L; S "-"; O "wskich"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 124 | - [L; S "-"; O "wskich"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 125 | - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 126 | - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 127 | - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 128 | - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 129 | - [L; S "-"; O "wskiego"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns"); | ||
| 130 | - [L; S "-"; O "wskiego"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 131 | - [L; S "-"; O "wskiej"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 132 | - [L; S "-"; O "wskiej"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 133 | - [L; S "-"; O "wskiej"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 134 | - [L; S "-"; O "wskiemu"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 135 | - [L; S "-"; O "wskim"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 136 | - [L; S "-"; O "wskim"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 137 | - [L; S "-"; O "wskim"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 138 | - [L; S "-"; O "wskimi"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 139 | - [L; S "-"; O "wsko"], (function [x;_;_] -> compose_lemma x "-wski" "adja" | _ -> failwith "acronym_patterns"); | ||
| 140 | - [L; S "-"; O "wsko"], (function [x;_;_] -> compose_lemma x "-wsko" "adv:pos" | _ -> failwith "acronym_patterns"); | ||
| 141 | - [L; S "-"; O "wsku"], (function [x;_;_] -> compose_lemma x "-wski" "adjp" | _ -> failwith "acronym_patterns"); | ||
| 142 | - [L; S "-"; O "wską"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 143 | - [L; S "-"; O "wską"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 144 | - [L; S "’"; O "owa"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 145 | - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 146 | - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 147 | - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 148 | - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 149 | - [L; S "’"; O "owego"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns"); | ||
| 150 | - [L; S "’"; O "owego"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 151 | - [L; S "’"; O "owej"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 152 | - [L; S "’"; O "owej"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 153 | - [L; S "’"; O "owej"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 154 | - [L; S "’"; O "owemu"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 155 | - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | ||
| 156 | - [L; S "’"; O "owo"], (function [x;_;_] -> compose_lemma x "’owo" "adv:pos" | _ -> failwith "acronym_patterns"); | ||
| 157 | - [L; S "’"; O "owo"], (function [x;_;_] -> compose_lemma x "’owy" "adja" | _ -> failwith "acronym_patterns"); | ||
| 158 | - [L; S "’"; O "owy"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns"); | ||
| 159 | - [L; S "’"; O "owy"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns"); | ||
| 160 | - [L; S "’"; O "owych"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | ||
| 161 | - [L; S "’"; O "owych"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 162 | - [L; S "’"; O "owych"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 163 | - [L; S "’"; O "owym"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 164 | - [L; S "’"; O "owym"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 165 | - [L; S "’"; O "owym"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 166 | - [L; S "’"; O "owymi"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 167 | - [L; S "’"; O "ową"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 168 | - [L; S "’"; O "ową"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 169 | - [L; S "’"; O "owscy"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | ||
| 170 | - [L; S "’"; O "owska"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 171 | - [L; S "’"; O "owski"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns"); | ||
| 172 | - [L; S "’"; O "owski"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns"); | ||
| 173 | - [L; S "’"; O "owskich"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | ||
| 174 | - [L; S "’"; O "owskich"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 175 | - [L; S "’"; O "owskich"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 176 | - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 177 | - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 178 | - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 179 | - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 180 | - [L; S "’"; O "owskiego"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns"); | ||
| 181 | - [L; S "’"; O "owskiego"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 182 | - [L; S "’"; O "owskiej"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 183 | - [L; S "’"; O "owskiej"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 184 | - [L; S "’"; O "owskiej"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 185 | - [L; S "’"; O "owskiemu"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 186 | - [L; S "’"; O "owskim"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 187 | - [L; S "’"; O "owskim"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 188 | - [L; S "’"; O "owskim"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 189 | - [L; S "’"; O "owskimi"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 190 | - [L; S "’"; O "owsko"], (function [x;_;_] -> compose_lemma x "’owski" "adja" | _ -> failwith "acronym_patterns"); | ||
| 191 | - [L; S "’"; O "owsko"], (function [x;_;_] -> compose_lemma x "’owsko" "adv:pos" | _ -> failwith "acronym_patterns"); | ||
| 192 | - [L; S "’"; O "owsku"], (function [x;_;_] -> compose_lemma x "’owski" "adjp" | _ -> failwith "acronym_patterns"); | ||
| 193 | - [L; S "’"; O "owską"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 194 | - [L; S "’"; O "owską"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 195 | - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 196 | - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns"); | ||
| 197 | - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 198 | - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns"); | ||
| 199 | - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 200 | - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 201 | - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns"); | ||
| 202 | - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 203 | - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:n2" | _ -> failwith "acronym_patterns"); | ||
| 204 | - [CL; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:loc:f" | _ -> failwith "acronym_patterns"); | ||
| 205 | - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 206 | - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns"); | ||
| 207 | - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); | ||
| 208 | - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:n2" | _ -> failwith "acronym_patterns"); | ||
| 209 | - [CL; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:inst:f" | _ -> failwith "acronym_patterns"); | ||
| 210 | - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "T" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 211 | - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "T" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 212 | - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "TA" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); | ||
| 213 | - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "TA" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); | ||
| 214 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 215 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 216 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | ||
| 217 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | ||
| 218 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | ||
| 219 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 220 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | ||
| 221 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 222 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 223 | - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 224 | - [CL; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); | ||
| 225 | - [CL; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); | ||
| 226 | - [CL; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); | ||
| 227 | - [L; S "-"; O "ecie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 228 | - [L; S "-"; O "ecie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 229 | - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 230 | - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns"); | ||
| 231 | - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | ||
| 232 | - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:n2" | _ -> failwith "acronym_patterns"); | ||
| 233 | - [L; S "-"; O "etach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 234 | - [L; S "-"; O "etami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); | ||
| 235 | - [L; S "-"; O "etem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | ||
| 236 | - [L; S "-"; O "etom"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); | ||
| 237 | - [L; S "-"; O "etowi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); | ||
| 238 | - [L; S "-"; O "etu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 239 | - [L; S "-"; O "ety"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | ||
| 240 | - [L; S "-"; O "ety"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | ||
| 241 | - [L; S "-"; O "ety"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 242 | - [L; S "-"; O "etów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 243 | - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 244 | - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | ||
| 245 | - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | ||
| 246 | - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 247 | - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 248 | - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 249 | - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | ||
| 250 | - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 251 | - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 252 | - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:gen:f" | _ -> failwith "acronym_patterns"); | ||
| 253 | - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); | ||
| 254 | - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); | ||
| 255 | - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); | ||
| 256 | - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 257 | - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); | ||
| 258 | - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 259 | - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 260 | - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 261 | - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 262 | - [CL; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); | ||
| 263 | - [CL; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); | ||
| 264 | - [L; S "-"; O "iem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns"); | ||
| 265 | - [L; S "-"; O "iem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | ||
| 266 | - [CL; S "-"; O "o"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:voc:f" | _ -> failwith "acronym_patterns"); | ||
| 267 | - [L; S "-"; O "ocie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 268 | - [L; S "-"; O "ocie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 269 | - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 270 | - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns"); | ||
| 271 | - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); | ||
| 272 | - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:n2" | _ -> failwith "acronym_patterns"); | ||
| 273 | - [CL; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:dat:f" | _ -> failwith "acronym_patterns"); | ||
| 274 | - [L; S "-"; O "otach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 275 | - [L; S "-"; O "otami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); | ||
| 276 | - [L; S "-"; O "otem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | ||
| 277 | - [L; S "-"; O "otom"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); | ||
| 278 | - [L; S "-"; O "otowi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); | ||
| 279 | - [L; S "-"; O "otu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 280 | - [L; S "-"; O "oty"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | ||
| 281 | - [L; S "-"; O "oty"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | ||
| 282 | - [L; S "-"; O "oty"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 283 | - [L; S "-"; O "otów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 284 | - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 285 | - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns"); | ||
| 286 | - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); | ||
| 287 | - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:n2" | _ -> failwith "acronym_patterns"); | ||
| 288 | - [L; S "-"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | ||
| 289 | - [L; S "-"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 290 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns"); | ||
| 291 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns"); | ||
| 292 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 293 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:n2" | _ -> failwith "acronym_patterns"); | ||
| 294 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 295 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); | ||
| 296 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 297 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 298 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 299 | - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 300 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 301 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 302 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | ||
| 303 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | ||
| 304 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:n2" | _ -> failwith "acronym_patterns"); | ||
| 305 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 306 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | ||
| 307 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:n2" | _ -> failwith "acronym_patterns"); | ||
| 308 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 309 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 310 | - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:n2" | _ -> failwith "acronym_patterns"); | ||
| 311 | - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); | ||
| 312 | - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); | ||
| 313 | - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); | ||
| 314 | - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); | ||
| 315 | - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); | ||
| 316 | - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 317 | - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:n2" | _ -> failwith "acronym_patterns"); | ||
| 318 | - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 319 | - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 320 | - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:n2" | _ -> failwith "acronym_patterns"); | ||
| 321 | - [L; S "-"; O "zie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 322 | - [L; S "-"; O "zie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 323 | - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 324 | - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 325 | - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns"); | ||
| 326 | - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 327 | - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:n2" | _ -> failwith "acronym_patterns"); | ||
| 328 | - [CL; S "-"; O "ą"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:inst:f" | _ -> failwith "acronym_patterns"); | ||
| 329 | - [CL; S "-"; O "ę"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:acc:f" | _ -> failwith "acronym_patterns"); | ||
| 330 | - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 331 | - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns"); | ||
| 332 | - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 333 | - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns"); | ||
| 334 | - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 335 | - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 336 | - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns"); | ||
| 337 | - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 338 | - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:loc:p3" | _ -> failwith "acronym_patterns"); | ||
| 339 | - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 340 | - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns"); | ||
| 341 | - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); | ||
| 342 | - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:inst:p3" | _ -> failwith "acronym_patterns"); | ||
| 343 | - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 344 | - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 345 | - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | ||
| 346 | - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | ||
| 347 | - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 348 | - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | ||
| 349 | - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 350 | - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 351 | - [L; S "’"; O "ego"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 352 | - [L; S "’"; O "ego"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 353 | - [L; S "’"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 354 | - [L; S "’"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns"); | ||
| 355 | - [L; S "’"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | ||
| 356 | - [L; S "’"; O "emu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 357 | - [L; S "’"; O "go"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 358 | - [L; S "’"; O "go"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 359 | - [L; S "’"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns"); | ||
| 360 | - [L; S "’"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 361 | - [L; S "’"; O "m"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 362 | - [L; S "’"; O "m"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 363 | - [L; S "’"; O "mu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 364 | - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 365 | - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns"); | ||
| 366 | - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); | ||
| 367 | - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:dat:p3" | _ -> failwith "acronym_patterns"); | ||
| 368 | - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 369 | - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns"); | ||
| 370 | - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); | ||
| 371 | - [L; S "’"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | ||
| 372 | - [L; S "’"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 373 | - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 374 | - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 375 | - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); | ||
| 376 | - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 377 | - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 378 | - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 379 | - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 380 | - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 381 | - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 382 | - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | ||
| 383 | - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | ||
| 384 | - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 385 | - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 386 | - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | ||
| 387 | - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 388 | - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 389 | - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 390 | - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 391 | - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns"); | ||
| 392 | - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 393 | - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:gen:p3" | _ -> failwith "acronym_patterns"); | ||
| 394 | - [L; S "-"; O "ista"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns"); | ||
| 395 | - [L; S "-"; O "istach"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 396 | - [L; S "-"; O "istami"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 397 | - [L; S "-"; O "isto"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 398 | - [L; S "-"; O "istom"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 399 | - [L; S "-"; O "isty"], (function [x;_;_] -> compose_lemma x "-ista" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 400 | - [L; S "-"; O "isty"], (function [x;_;_] -> compose_lemma x "-ista" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 401 | - [L; S "-"; O "isty"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 402 | - [L; S "-"; O "istów"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 403 | - [L; S "-"; O "istów"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 404 | - [L; S "-"; O "istą"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 405 | - [L; S "-"; O "istę"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 406 | - [L; S "-"; O "iści"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | ||
| 407 | - [L; S "-"; O "iści"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 408 | - [L; S "-"; O "iście"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 409 | - [L; S "-"; O "iście"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 410 | - [L; S "-"; O "owca"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 411 | - [L; S "-"; O "owca"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 412 | - [L; S "-"; O "owcach"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 413 | - [L; S "-"; O "owcami"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 414 | - [L; S "-"; O "owce"], (function [x;_;_] -> compose_lemma x "-owiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 415 | - [L; S "-"; O "owce"], (function [x;_;_] -> compose_lemma x "-owiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 416 | - [L; S "-"; O "owcem"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 417 | - [L; S "-"; O "owcom"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 418 | - [L; S "-"; O "owcowi"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 419 | - [L; S "-"; O "owcu"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 420 | - [L; S "-"; O "owcu"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 421 | - [L; S "-"; O "owcy"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | ||
| 422 | - [L; S "-"; O "owcy"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 423 | - [L; S "-"; O "owcze"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 424 | - [L; S "-"; O "owców"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 425 | - [L; S "-"; O "owców"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 426 | - [L; S "-"; O "owiec"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns"); | ||
| 427 | - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); | ||
| 428 | - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:gen:f" | _ -> failwith "acronym_patterns"); | ||
| 429 | - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); | ||
| 430 | - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); | ||
| 431 | - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); | ||
| 432 | - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); | ||
| 433 | - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); | ||
| 434 | - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:voc:f" | _ -> failwith "acronym_patterns"); | ||
| 435 | - [L; S "-"; O "owskościach"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:loc:f" | _ -> failwith "acronym_patterns"); | ||
| 436 | - [L; S "-"; O "owskościami"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:inst:f" | _ -> failwith "acronym_patterns"); | ||
| 437 | - [L; S "-"; O "owskościom"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:dat:f" | _ -> failwith "acronym_patterns"); | ||
| 438 | - [L; S "-"; O "owskością"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:inst:f" | _ -> failwith "acronym_patterns"); | ||
| 439 | - [L; S "-"; O "owskość"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:acc:f" | _ -> failwith "acronym_patterns"); | ||
| 440 | - [L; S "-"; O "owskość"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:nom:f" | _ -> failwith "acronym_patterns"); | ||
| 441 | - [L; S "-"; O "wca"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 442 | - [L; S "-"; O "wca"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 443 | - [L; S "-"; O "wcach"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 444 | - [L; S "-"; O "wcami"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 445 | - [L; S "-"; O "wce"], (function [x;_;_] -> compose_lemma x "-wiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 446 | - [L; S "-"; O "wce"], (function [x;_;_] -> compose_lemma x "-wiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 447 | - [L; S "-"; O "wcem"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 448 | - [L; S "-"; O "wcom"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 449 | - [L; S "-"; O "wcowi"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 450 | - [L; S "-"; O "wcu"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 451 | - [L; S "-"; O "wcu"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 452 | - [L; S "-"; O "wcy"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | ||
| 453 | - [L; S "-"; O "wcy"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 454 | - [L; S "-"; O "wców"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 455 | - [L; S "-"; O "wców"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 456 | - [L; S "-"; O "wiec"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns"); | ||
| 457 | - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); | ||
| 458 | - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:gen:f" | _ -> failwith "acronym_patterns"); | ||
| 459 | - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); | ||
| 460 | - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); | ||
| 461 | - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); | ||
| 462 | - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); | ||
| 463 | - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); | ||
| 464 | - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:voc:f" | _ -> failwith "acronym_patterns"); | ||
| 465 | - [L; S "’"; O "owościach"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:loc:f" | _ -> failwith "acronym_patterns"); | ||
| 466 | - [L; S "’"; O "owościami"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:inst:f" | _ -> failwith "acronym_patterns"); | ||
| 467 | - [L; S "’"; O "owościom"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:dat:f" | _ -> failwith "acronym_patterns"); | ||
| 468 | - [L; S "’"; O "owością"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:inst:f" | _ -> failwith "acronym_patterns"); | ||
| 469 | - [L; S "’"; O "owość"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:acc:f" | _ -> failwith "acronym_patterns"); | ||
| 470 | - [L; S "’"; O "owość"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:nom:f" | _ -> failwith "acronym_patterns"); | ||
| 471 | - | ||
| 472 | - [L; S "-"; L; S "-"; O "owscy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | ||
| 473 | - [L; S "-"; L; S "-"; O "owska"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 474 | - [L; S "-"; L; S "-"; O "owski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns"); | ||
| 475 | - [L; S "-"; L; S "-"; O "owski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns"); | ||
| 476 | - [L; S "-"; L; S "-"; O "owskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | ||
| 477 | - [L; S "-"; L; S "-"; O "owskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 478 | - [L; S "-"; L; S "-"; O "owskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 479 | - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 480 | - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 481 | - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 482 | - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 483 | - [L; S "-"; L; S "-"; O "owskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns"); | ||
| 484 | - [L; S "-"; L; S "-"; O "owskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 485 | - [L; S "-"; L; S "-"; O "owskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 486 | - [L; S "-"; L; S "-"; O "owskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 487 | - [L; S "-"; L; S "-"; O "owskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 488 | - [L; S "-"; L; S "-"; O "owskiemu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 489 | - [L; S "-"; L; S "-"; O "owskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 490 | - [L; S "-"; L; S "-"; O "owskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 491 | - [L; S "-"; L; S "-"; O "owskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 492 | - [L; S "-"; L; S "-"; O "owskimi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 493 | - [L; S "-"; L; S "-"; O "owsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adja" | _ -> failwith "acronym_patterns"); | ||
| 494 | - [L; S "-"; L; S "-"; O "owsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owsko" "adv:pos" | _ -> failwith "acronym_patterns"); | ||
| 495 | - [L; S "-"; L; S "-"; O "owsku"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adjp" | _ -> failwith "acronym_patterns"); | ||
| 496 | - [L; S "-"; L; S "-"; O "owską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 497 | - [L; S "-"; L; S "-"; O "owską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 498 | - [L; S "-"; L; S "-"; O "wscy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | ||
| 499 | - [L; S "-"; L; S "-"; O "wska"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 500 | - [L; S "-"; L; S "-"; O "wski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns"); | ||
| 501 | - [L; S "-"; L; S "-"; O "wski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns"); | ||
| 502 | - [L; S "-"; L; S "-"; O "wskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns"); | ||
| 503 | - [L; S "-"; L; S "-"; O "wskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 504 | - [L; S "-"; L; S "-"; O "wskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 505 | - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 506 | - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 507 | - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 508 | - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 509 | - [L; S "-"; L; S "-"; O "wskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns"); | ||
| 510 | - [L; S "-"; L; S "-"; O "wskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 511 | - [L; S "-"; L; S "-"; O "wskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 512 | - [L; S "-"; L; S "-"; O "wskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 513 | - [L; S "-"; L; S "-"; O "wskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 514 | - [L; S "-"; L; S "-"; O "wskiemu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 515 | - [L; S "-"; L; S "-"; O "wskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 516 | - [L; S "-"; L; S "-"; O "wskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 517 | - [L; S "-"; L; S "-"; O "wskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); | ||
| 518 | - [L; S "-"; L; S "-"; O "wskimi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); | ||
| 519 | - [L; S "-"; L; S "-"; O "wsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adja" | _ -> failwith "acronym_patterns"); | ||
| 520 | - [L; S "-"; L; S "-"; O "wsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wsko" "adv:pos" | _ -> failwith "acronym_patterns"); | ||
| 521 | - [L; S "-"; L; S "-"; O "wsku"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adjp" | _ -> failwith "acronym_patterns"); | ||
| 522 | - [L; S "-"; L; S "-"; O "wską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 523 | - [L; S "-"; L; S "-"; O "wską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns"); | ||
| 524 | - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 525 | - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns"); | ||
| 526 | - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 527 | - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns"); | ||
| 528 | - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 529 | - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 530 | - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns"); | ||
| 531 | - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 532 | - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:n2" | _ -> failwith "acronym_patterns"); | ||
| 533 | - [CL; S "-"; CL; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:loc:f" | _ -> failwith "acronym_patterns"); | ||
| 534 | - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 535 | - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns"); | ||
| 536 | - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); | ||
| 537 | - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:n2" | _ -> failwith "acronym_patterns"); | ||
| 538 | - [CL; S "-"; CL; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:inst:f" | _ -> failwith "acronym_patterns"); | ||
| 539 | - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "T" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 540 | - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "T" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 541 | - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "TA" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); | ||
| 542 | - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "TA" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); | ||
| 543 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 544 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 545 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | ||
| 546 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | ||
| 547 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | ||
| 548 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 549 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | ||
| 550 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 551 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 552 | - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 553 | - [CL; S "-"; CL; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); | ||
| 554 | - [CL; S "-"; CL; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); | ||
| 555 | - [CL; S "-"; CL; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); | ||
| 556 | - [L; S "-"; L; S "-"; O "ecie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 557 | - [L; S "-"; L; S "-"; O "ecie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 558 | - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 559 | - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns"); | ||
| 560 | - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | ||
| 561 | - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:n2" | _ -> failwith "acronym_patterns"); | ||
| 562 | - [L; S "-"; L; S "-"; O "etach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 563 | - [L; S "-"; L; S "-"; O "etami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); | ||
| 564 | - [L; S "-"; L; S "-"; O "etem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | ||
| 565 | - [L; S "-"; L; S "-"; O "etom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); | ||
| 566 | - [L; S "-"; L; S "-"; O "etowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); | ||
| 567 | - [L; S "-"; L; S "-"; O "etu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 568 | - [L; S "-"; L; S "-"; O "ety"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | ||
| 569 | - [L; S "-"; L; S "-"; O "ety"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | ||
| 570 | - [L; S "-"; L; S "-"; O "ety"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 571 | - [L; S "-"; L; S "-"; O "etów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 572 | - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 573 | - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | ||
| 574 | - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | ||
| 575 | - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 576 | - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 577 | - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 578 | - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | ||
| 579 | - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 580 | - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 581 | - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:gen:f" | _ -> failwith "acronym_patterns"); | ||
| 582 | - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); | ||
| 583 | - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); | ||
| 584 | - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); | ||
| 585 | - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 586 | - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); | ||
| 587 | - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 588 | - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 589 | - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 590 | - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 591 | - [CL; S "-"; CL; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); | ||
| 592 | - [CL; S "-"; CL; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); | ||
| 593 | - [L; S "-"; L; S "-"; O "iem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns"); | ||
| 594 | - [L; S "-"; L; S "-"; O "iem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | ||
| 595 | - [CL; S "-"; CL; S "-"; O "o"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:voc:f" | _ -> failwith "acronym_patterns"); | ||
| 596 | - [L; S "-"; L; S "-"; O "ocie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 597 | - [L; S "-"; L; S "-"; O "ocie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 598 | - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 599 | - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns"); | ||
| 600 | - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); | ||
| 601 | - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:n2" | _ -> failwith "acronym_patterns"); | ||
| 602 | - [CL; S "-"; CL; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:dat:f" | _ -> failwith "acronym_patterns"); | ||
| 603 | - [L; S "-"; L; S "-"; O "otach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 604 | - [L; S "-"; L; S "-"; O "otami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); | ||
| 605 | - [L; S "-"; L; S "-"; O "otem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | ||
| 606 | - [L; S "-"; L; S "-"; O "otom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); | ||
| 607 | - [L; S "-"; L; S "-"; O "otowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); | ||
| 608 | - [L; S "-"; L; S "-"; O "otu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 609 | - [L; S "-"; L; S "-"; O "oty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | ||
| 610 | - [L; S "-"; L; S "-"; O "oty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | ||
| 611 | - [L; S "-"; L; S "-"; O "oty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 612 | - [L; S "-"; L; S "-"; O "otów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 613 | - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 614 | - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns"); | ||
| 615 | - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); | ||
| 616 | - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:n2" | _ -> failwith "acronym_patterns"); | ||
| 617 | - [L; S "-"; L; S "-"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | ||
| 618 | - [L; S "-"; L; S "-"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 619 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns"); | ||
| 620 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns"); | ||
| 621 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 622 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:n2" | _ -> failwith "acronym_patterns"); | ||
| 623 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 624 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); | ||
| 625 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 626 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 627 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 628 | - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 629 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 630 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 631 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | ||
| 632 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | ||
| 633 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:n2" | _ -> failwith "acronym_patterns"); | ||
| 634 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 635 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | ||
| 636 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:n2" | _ -> failwith "acronym_patterns"); | ||
| 637 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 638 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 639 | - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:n2" | _ -> failwith "acronym_patterns"); | ||
| 640 | - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); | ||
| 641 | - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); | ||
| 642 | - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); | ||
| 643 | - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); | ||
| 644 | - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); | ||
| 645 | - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 646 | - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:n2" | _ -> failwith "acronym_patterns"); | ||
| 647 | - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 648 | - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 649 | - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:n2" | _ -> failwith "acronym_patterns"); | ||
| 650 | - [L; S "-"; L; S "-"; O "zie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 651 | - [L; S "-"; L; S "-"; O "zie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 652 | - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 653 | - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 654 | - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns"); | ||
| 655 | - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 656 | - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:n2" | _ -> failwith "acronym_patterns"); | ||
| 657 | - [CL; S "-"; CL; S "-"; O "ą"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:inst:f" | _ -> failwith "acronym_patterns"); | ||
| 658 | - [CL; S "-"; CL; S "-"; O "ę"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:acc:f" | _ -> failwith "acronym_patterns"); | ||
| 659 | - [L; S "-"; L; S "-"; O "ista"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns"); | ||
| 660 | - [L; S "-"; L; S "-"; O "istach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 661 | - [L; S "-"; L; S "-"; O "istami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 662 | - [L; S "-"; L; S "-"; O "isto"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 663 | - [L; S "-"; L; S "-"; O "istom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 664 | - [L; S "-"; L; S "-"; O "isty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 665 | - [L; S "-"; L; S "-"; O "isty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 666 | - [L; S "-"; L; S "-"; O "isty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 667 | - [L; S "-"; L; S "-"; O "istów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 668 | - [L; S "-"; L; S "-"; O "istów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 669 | - [L; S "-"; L; S "-"; O "istą"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 670 | - [L; S "-"; L; S "-"; O "istę"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 671 | - [L; S "-"; L; S "-"; O "iści"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | ||
| 672 | - [L; S "-"; L; S "-"; O "iści"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 673 | - [L; S "-"; L; S "-"; O "iście"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 674 | - [L; S "-"; L; S "-"; O "iście"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 675 | - [L; S "-"; L; S "-"; O "owca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 676 | - [L; S "-"; L; S "-"; O "owca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 677 | - [L; S "-"; L; S "-"; O "owcach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 678 | - [L; S "-"; L; S "-"; O "owcami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 679 | - [L; S "-"; L; S "-"; O "owce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 680 | - [L; S "-"; L; S "-"; O "owce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 681 | - [L; S "-"; L; S "-"; O "owcem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 682 | - [L; S "-"; L; S "-"; O "owcom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 683 | - [L; S "-"; L; S "-"; O "owcowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 684 | - [L; S "-"; L; S "-"; O "owcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 685 | - [L; S "-"; L; S "-"; O "owcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 686 | - [L; S "-"; L; S "-"; O "owcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | ||
| 687 | - [L; S "-"; L; S "-"; O "owcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 688 | - [L; S "-"; L; S "-"; O "owcze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 689 | - [L; S "-"; L; S "-"; O "owców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 690 | - [L; S "-"; L; S "-"; O "owców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 691 | - [L; S "-"; L; S "-"; O "owiec"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns"); | ||
| 692 | - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); | ||
| 693 | - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:gen:f" | _ -> failwith "acronym_patterns"); | ||
| 694 | - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); | ||
| 695 | - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); | ||
| 696 | - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); | ||
| 697 | - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); | ||
| 698 | - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); | ||
| 699 | - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:voc:f" | _ -> failwith "acronym_patterns"); | ||
| 700 | - [L; S "-"; L; S "-"; O "owskościach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:loc:f" | _ -> failwith "acronym_patterns"); | ||
| 701 | - [L; S "-"; L; S "-"; O "owskościami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:inst:f" | _ -> failwith "acronym_patterns"); | ||
| 702 | - [L; S "-"; L; S "-"; O "owskościom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:dat:f" | _ -> failwith "acronym_patterns"); | ||
| 703 | - [L; S "-"; L; S "-"; O "owskością"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:inst:f" | _ -> failwith "acronym_patterns"); | ||
| 704 | - [L; S "-"; L; S "-"; O "owskość"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:acc:f" | _ -> failwith "acronym_patterns"); | ||
| 705 | - [L; S "-"; L; S "-"; O "owskość"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:nom:f" | _ -> failwith "acronym_patterns"); | ||
| 706 | - [L; S "-"; L; S "-"; O "wca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 707 | - [L; S "-"; L; S "-"; O "wca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 708 | - [L; S "-"; L; S "-"; O "wcach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 709 | - [L; S "-"; L; S "-"; O "wcami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 710 | - [L; S "-"; L; S "-"; O "wce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 711 | - [L; S "-"; L; S "-"; O "wce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 712 | - [L; S "-"; L; S "-"; O "wcem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 713 | - [L; S "-"; L; S "-"; O "wcom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 714 | - [L; S "-"; L; S "-"; O "wcowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 715 | - [L; S "-"; L; S "-"; O "wcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 716 | - [L; S "-"; L; S "-"; O "wcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 717 | - [L; S "-"; L; S "-"; O "wcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | ||
| 718 | - [L; S "-"; L; S "-"; O "wcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 719 | - [L; S "-"; L; S "-"; O "wców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 720 | - [L; S "-"; L; S "-"; O "wców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 721 | - [L; S "-"; L; S "-"; O "wiec"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns"); | ||
| 722 | - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 723 | - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns"); | ||
| 724 | - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 725 | - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns"); | ||
| 726 | - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 727 | - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 728 | - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns"); | ||
| 729 | - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 730 | - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:loc:p3" | _ -> failwith "acronym_patterns"); | ||
| 731 | - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 732 | - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns"); | ||
| 733 | - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); | ||
| 734 | - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:inst:p3" | _ -> failwith "acronym_patterns"); | ||
| 735 | - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 736 | - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 737 | - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | ||
| 738 | - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | ||
| 739 | - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 740 | - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | ||
| 741 | - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 742 | - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 743 | - [L; S "-"; L; S "’"; O "ego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 744 | - [L; S "-"; L; S "’"; O "ego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 745 | - [L; S "-"; L; S "’"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 746 | - [L; S "-"; L; S "’"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns"); | ||
| 747 | - [L; S "-"; L; S "’"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); | ||
| 748 | - [L; S "-"; L; S "’"; O "emu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 749 | - [L; S "-"; L; S "’"; O "go"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 750 | - [L; S "-"; L; S "’"; O "go"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 751 | - [L; S "-"; L; S "’"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns"); | ||
| 752 | - [L; S "-"; L; S "’"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 753 | - [L; S "-"; L; S "’"; O "m"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); | ||
| 754 | - [L; S "-"; L; S "’"; O "m"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 755 | - [L; S "-"; L; S "’"; O "mu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 756 | - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 757 | - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns"); | ||
| 758 | - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); | ||
| 759 | - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:dat:p3" | _ -> failwith "acronym_patterns"); | ||
| 760 | - [L; S "-"; L; S "’"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); | ||
| 761 | - [L; S "-"; L; S "’"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns"); | ||
| 762 | - [L; S "-"; L; S "’"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); | ||
| 763 | - [L; S "-"; L; S "’"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); | ||
| 764 | - [L; S "-"; L; S "’"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 765 | - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 766 | - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); | ||
| 767 | - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); | ||
| 768 | - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); | ||
| 769 | - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); | ||
| 770 | - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 771 | - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 772 | - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 773 | - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 774 | - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); | ||
| 775 | - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); | ||
| 776 | - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 777 | - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); | ||
| 778 | - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); | ||
| 779 | - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); | ||
| 780 | - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); | ||
| 781 | - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); | ||
| 782 | - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); | ||
| 783 | - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns"); | ||
| 784 | - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); | ||
| 785 | - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:gen:p3" | _ -> failwith "acronym_patterns"); | ||
| 786 | - ] | ||
| 787 | - | ||
| 788 | -let name_patterns = [ | ||
| 789 | - [O "O"; S "’"; L], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns"); | ||
| 790 | - [O "d"; S "’"; L], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns"); | ||
| 791 | - [O "l"; S "’"; L], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns"); | ||
| 792 | - [L; S "’"; O "s"], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns"); | ||
| 793 | - [L; S "’"; O "sa"], (function [x;_;_] -> compose_lemma x "’s" "subst:sg:gen.acc:_" | _ -> failwith "name_patterns"); | ||
| 794 | - ] | ||
| 795 | - | ||
| 796 | -let abr_patterns = [ | ||
| 797 | - [O "b"; S "."; O "u"; S "."], (function [a;b;c;d] -> [ct [a;b] "bez" "prep:gen:nwok"; ct [c;d] "uwaga" "subst:pl:gen:f"] | _ -> failwith "abr_patterns"); | ||
| 798 | - [O "b"; S "."; O "zm"; S "."], (function [a;b;c;d] -> [ct [a;b] "bez" "prep:gen:nwok"; ct [c;d] "zmiana" "subst:pl:gen:f"] | _ -> failwith "abr_patterns"); | ||
| 799 | - [O "blm"], (function [a] -> st a [1,"bez","prep:gen:nwok";1,"liczba","subst:sg:gen:f";1,"mnogi","adj:sg:gen:f:pos"] | _ -> failwith "abr_patterns"); | ||
| 800 | - [O "blp"], (function [a] -> st a [1,"bez","prep:gen:nwok";1,"liczba","subst:sg:gen:f";1,"pojedynczy","adj:sg:gen:f:pos"] | _ -> failwith "abr_patterns"); | ||
| 801 | - [O "błp"; S "."], (function [a;b] -> std a b [2,"błogosławiony","adj:sg:gen:f:pos";1,"pamięć","subst:sg:gen:f"] | _ -> failwith "abr_patterns"); | ||
| 802 | - [O "bm"], (function [a] -> st a [1,"bieżący","adj:sg:$C:m3:pos";1,"miesiąc","subst:sg:$C:m3"] | _ -> failwith "abr_patterns"); | ||
| 803 | - [O "bm"; S "."], (function [a;b] -> std a b [1,"bieżący","adj:sg:$C:m3:pos";1,"miesiąc","subst:sg:$C:m3"] | _ -> failwith "abr_patterns"); | ||
| 804 | - [O "bp"; S "."], (function [a;b] -> std a b [1,"błogosławiony","adj:sg:gen:f:pos";1,"pamięć","subst:sg:gen:f"] | _ -> failwith "abr_patterns"); | ||
| 805 | - [O "br"], (function [a] -> st a [1,"bieżący","adj:sg:$C:m3:pos";1,"rok","subst:sg:$C:m3"] | _ -> failwith "abr_patterns"); | ||
| 806 | - [O "br"; S "."], (function [a;b] -> std a b [1,"bieżący","adj:sg:$C:m3:pos";1,"rok","subst:sg:$C:m3"] | _ -> failwith "abr_patterns"); | ||
| 807 | - [O "c"; S "."; O "d"; S "."; O "n"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "ciąg" "subst:sg:nom:m3"; ct [c;d] "daleki" "adj:sg:nom:m3:com"; ct [e;f] "nastąpić" "fin:sg:ter:perf"] | _ -> failwith "abr_patterns"); | ||
| 808 | - [O "ccm"], (function [a] -> st a [1,"sześcienny","adj:_:$C:m3:pos";2,"centymetr","subst:_:$C:m3"] | _ -> failwith "abr_patterns"); | ||
| 809 | - [O "cd"; S "."], (function [a;b] -> std a b [1,"ciąg","subst:sg:nom:m3";1,"daleki","adj:sg:nom:m3:com"] | _ -> failwith "abr_patterns"); | ||
| 810 | - [O "cdn"; S "."], (function [a;b] -> std a b [1,"ciąg","subst:sg:nom:m3";1,"daleki","adj:sg:nom:m3:com";1,"nastąpić","fin:sg:ter:perf"] | _ -> failwith "abr_patterns"); | ||
| 811 | - [O "cm"; O "3"], (function [a;b] -> [ct [a] "centymetr" "subst:_:$C:m3"; ct [b] "sześcienny" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); | ||
| 812 | - [O "dcn"; S "."], (function [a;b] -> std a b [1,"daleki","adj:sg:nom:m3:com";1,"ciąg","subst:sg:nom:m3";1,"nastąpić","fin:sg:ter:perf"] | _ -> failwith "abr_patterns"); | ||
| 813 | - [O "dm"; O "3"], (function [a;b] -> [ct [a] "decymetr" "subst:_:$C:m3"; ct [b] "sześcienny" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); | ||
| 814 | - [O "ds"; S "."], (function [a;b] -> std a b [1,"do","prep:gen";1,"sprawa","subst:pl:gen:f"] | _ -> failwith "abr_patterns"); | ||
| 815 | - [O "d"; O "/"; O "s"], (function [a;b;c] -> [ct [a;b] "do" "prep:gen"; ct [c] "sprawa" "subst:pl:gen:f"] | _ -> failwith "abr_patterns"); | ||
| 816 | - [O "itd"; S "."], (function [a;b] -> std a b [1,"i","conj";1,"tak","adv:pos";1,"daleko","adv:com"] | _ -> failwith "abr_patterns"); | ||
| 817 | - [O "itede"; S "."], (function [a;b] -> std a b [1,"i","conj";2,"tak","adv:pos";2,"daleko","adv:com"] | _ -> failwith "abr_patterns"); | ||
| 818 | - [O "itp"; S "."], (function [a;b] -> std a b [1,"i","conj";1,"tym","adv";1,"podobny","adj:pl:nom:_:pos"] | _ -> failwith "abr_patterns"); | ||
| 819 | - [O "jw"; S "."], (function [a;b] -> std a b [1,"jak","adv:pos";1,"wysoko","adv:com"] | _ -> failwith "abr_patterns"); | ||
| 820 | - [O "JWP"], (function [a] -> st a [1,"jaśnie","adv:pos";1,"wielmożny","adj:_:$C:m1:pos";1,"pan","subst:_:$C:m1"] | _ -> failwith "abr_patterns"); | ||
| 821 | - [O "JWP"], (function [a] -> st a [1,"jaśnie","adv:pos";1,"wielmożny","adj:_:$C:f:pos";1,"pani","subst:_:$C:f"] | _ -> failwith "abr_patterns"); | ||
| 822 | - [O "km"; S "."; O "2"], (function [a;b;c] -> [ct [a;b] "kilometr" "subst:_:$C:m3"; ct [c] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); | ||
| 823 | - [O "km"; O "2"], (function [a;b] -> [ct [a] "kilometr" "subst:_:$C:m3"; ct [b] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); | ||
| 824 | - [O "km"; O "²"], (function [a;b] -> [ct [a] "kilometr" "subst:_:$C:m3"; ct [b] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); | ||
| 825 | - [O "lm"; S "."], (function [a;b] -> std a b [1,"liczba","subst:sg:$C:f";1,"mnogi","adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns"); | ||
| 826 | - [O "lp"; S "."], (function [a;b] -> std a b [1,"liczba","subst:sg:$C:f";1,"pojedynczy","adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns"); | ||
| 827 | - [O "m"; S "."; O "in"; S "."], (function [a;b;c;d] -> [ct [a;b] "między" "prep:inst"; ct [c;d] "inny" "adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns"); | ||
| 828 | - [O "m"; S "."; O "in"], (function [a;b;c] -> [ct [a;b] "między" "prep:inst"; ct [c] "inny" "adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns"); | ||
| 829 | - [O "m"; S "."; O "inn"; S "."], (function [a;b;c;d] -> [ct [a;b] "między" "prep:inst"; ct [c;d] "inny" "adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns"); | ||
| 830 | - [O "m"; S "."; O "st"; S "."], (function [a;b;c;d] -> [ct [a;b] "miasto" "subst:_:$C:n2"; ct [c;d] "stołeczny" "adj:_:$C:n2:pos"] | _ -> failwith "abr_patterns"); | ||
| 831 | - [O "m"; O "^"; O "2"], (function [a;b;c] -> [ct [a] "metr" "subst:_:$C:m3"; ct [b;c] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); | ||
| 832 | - [O "m"; O "2"], (function [a;b] -> [ct [a] "metr" "subst:_:$C:m3"; ct [b] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); | ||
| 833 | - [O "m"; O "3"], (function [a;b] -> [ct [a] "metr" "subst:_:$C:m3"; ct [b] "sześcienny" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); | ||
| 834 | - [O "min"; S "."], (function [a;b] -> std a b [1,"między","prep:inst";2,"inny","adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns"); | ||
| 835 | - [O "mkw"; S "."], (function [a;b] -> std a b [1,"metr","subst:_:$C:m3";2,"kwadratowy","adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); | ||
| 836 | - [O "n"; S "."; O "e"; S "."], (function [a;b;c;d] -> [ct [a;b] "nasz" "adj:sg:gen:f:pos"; ct [c;d] "era" "subst:sg:gen:f"] | _ -> failwith "abr_patterns"); | ||
| 837 | - [O "n"; S "."; O "p"; S "."; O "m"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "nad" "prep:inst"; ct [c;d] "poziom" "subst:sg:inst:m3"; ct [e;f] "morze" "subst:sg:gen:n2"] | _ -> failwith "abr_patterns"); | ||
| 838 | - [O "np"; S "."], (function [a;b] -> std a b [1,"na","prep:acc";1,"przykład","subst:sg:acc:m3"] | _ -> failwith "abr_patterns"); | ||
| 839 | - [O "nt"; S "."], (function [a;b] -> std a b [1,"na","prep:acc";1,"temat","subst:sg:acc:m3"] | _ -> failwith "abr_patterns"); | ||
| 840 | - [O "NTG"], (function [a] -> st a [1,"nie","qub";1,"ta","adj:sg:nom:f:pos";1,"grupa","subst:sg:nom:f"] | _ -> failwith "abr_patterns"); | ||
| 841 | - [O "o"; S "."; O "o"; S "."], (function [a;b;c;d] -> [ct [a;b] "ograniczony" "adj:sg:$C:f:pos"; ct [c;d] "odpowiedzialność" "subst:sg:$C:f"] | _ -> failwith "abr_patterns"); | ||
| 842 | - [O "p"; S "."; O "n"; S "."; O "e"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "przed" "prep:inst"; ct [c;d] "nasz" "adj:sg:inst:f:pos"; ct [e;f] "era" "subst:sg:inst:f"] | _ -> failwith "abr_patterns"); | ||
| 843 | - [O "p"; S "."; O "o"; S "."], (function [a;b;c;d] -> [ct [a;b] "pełniący" "pact:_:_:m1.m2.m3:imperf:aff"; ct [c;d] "obowiązek" "subst:pl:acc:m3"] | _ -> failwith "abr_patterns"); | ||
| 844 | - [O "p"; S "."; O "p"; S "."; O "m"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "pod" "prep:inst"; ct [c;d] "poziom" "subst:sg:inst:m3"; ct [e;f] "morze" "subst:sg:gen:n2"] | _ -> failwith "abr_patterns"); | ||
| 845 | - [O "p"; S "."; O "t"; S "."], (function [a;b;c;d] -> [ct [a;b] "pod" "prep:inst:nwokc"; ct [c;d] "tytuł" "subst:sg:inst:m3"] | _ -> failwith "abr_patterns"); | ||
| 846 | - [O "pn"; S "."], (function [a;b] -> std a b [1,"pod","prep:inst";1,"nazwa","subst:sg:inst:f"] | _ -> failwith "abr_patterns"); | ||
| 847 | - [O "pne"; S "."], (function [a;b] -> std a b [1,"przed","prep:inst";1,"nasz","adj:sg:inst:f:pos";1,"era","subst:sg:inst:f"] | _ -> failwith "abr_patterns"); | ||
| 848 | - [O "pt"; S "."], (function [a;b] -> std a b [1,"pod","prep:inst";1,"tytuł","subst:sg:inst:m3"] | _ -> failwith "abr_patterns"); | ||
| 849 | - [O "PW"], (function [a] -> st a [1,"prywatny","adj:_:$C:f:pos";1,"wiadomość","subst:_:$C:f"] | _ -> failwith "abr_patterns"); | ||
| 850 | - [O "pw"; S "."], (function [a;b] -> std a b [1,"pod","prep:inst";1,"wezwanie","subst:sg:inst:n2"] | _ -> failwith "abr_patterns"); | ||
| 851 | -(* [O "S"; S "."; O "A"; S "."], (function [a;b;c;d] -> [ct [a;b] "spółka" "subst:sg:$C:f"; ct [c;d] "akcyjny" "adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns"); | ||
| 852 | - [O "s"; S "."; O "c"; S "."], (function [a;b;c;d] -> [ct [a;b] "spółka" "subst:sg:$C:f"; ct [c;d] "cywilny" "adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns");*) | ||
| 853 | -(* [O "SA"], (function [a] -> st a [1,"spółka","subst:sg:$C:f";1,"akcyjny","adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns"); *) | ||
| 854 | - [O "ś"; S "."; O "p"; S "."], (function [a;b;c;d] -> [ct [a;b] "święty" "adj:sg:gen:f:pos"; ct [c;d] "pamięć" "subst:sg:gen:f"] | _ -> failwith "abr_patterns"); | ||
| 855 | - [O "śp"; S "."], (function [a;b] -> std a b [1,"święty","adj:sg:gen:f:pos";1,"pamięć","subst:sg:gen:f"] | _ -> failwith "abr_patterns"); | ||
| 856 | - [O "tgz"; S "."], (function [a;b] -> std a b [2,"tak","adv";1,"zwać","ppas:_:_:_:_:aff"] | _ -> failwith "abr_patterns"); | ||
| 857 | - [O "tj"; S "."], (function [a;b] -> std a b [1,"to","subst:sg:nom:n2";1,"być","fin:sg:ter:imperf"] | _ -> failwith "abr_patterns"); | ||
| 858 | - [O "tzn"; S "."], (function [a;b] -> std a b [1,"to","subst:sg:nom:n2";2,"znaczyć","fin:sg:ter:imperf"] | _ -> failwith "abr_patterns"); | ||
| 859 | - [O "tzw"; S "."], (function [a;b] -> std a b [1,"tak","adv:pos";2,"zwać","ppas:_:_:_:imperf:aff"] | _ -> failwith "abr_patterns"); | ||
| 860 | - [O "ub"; S "."; O "r"; S "."], (function [a;b;c;d] -> [ct [a;b] "ubiegły" "adj:sg:$C:m3:pos"; ct [c;d] "rok" "subst:sg:$C:m3"] | _ -> failwith "abr_patterns"); | ||
| 861 | - [O "w"; S "."; O "w"; S "."], (function [a;b;c;d] -> [ct [a;b] "wysoko" "adv:com"; ct [c;d] "wymienić" "ppas:_:_:_:perf:aff"] | _ -> failwith "abr_patterns"); | ||
| 862 | - [O "w"; O "/"; O "m"], (function [a;b;c] -> [ct [a;b] "w" "prep:loc"; ct [c] "miejsce" "subst:_:loc:m3"] | _ -> failwith "abr_patterns"); | ||
| 863 | - [O "w"; O "/"; O "w"], (function [a;b;c] -> [ct [a;b] "wysoko" "adv:com"; ct [c] "wymienić" "ppas:_:_:_:perf:aff"] | _ -> failwith "abr_patterns"); | ||
| 864 | - [O "ws"; S "."], (function [a;b] -> std a b [1,"w","prep:loc:nwok";1,"sprawa","subst:sg:loc:f"] | _ -> failwith "abr_patterns"); | ||
| 865 | - [O "ww"; S "."], (function [a;b] -> std a b [1,"wysoko","adv:com";1,"wymieniony","ppas:_:_:_:perf:aff"] | _ -> failwith "abr_patterns"); | ||
| 866 | - ] |
tokenizer/eniam-tokenizer-1.0/ENIAMpatterns.ml deleted
| 1 | -(* | ||
| 2 | - * ENIAMtokenizer, a tokenizer for Polish | ||
| 3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | ||
| 4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | ||
| 5 | - * | ||
| 6 | - * This library is free software: you can redistribute it and/or modify | ||
| 7 | - * it under the terms of the GNU Lesser General Public License as published by | ||
| 8 | - * the Free Software Foundation, either version 3 of the License, or | ||
| 9 | - * (at your option) any later version. | ||
| 10 | - * | ||
| 11 | - * This library is distributed in the hope that it will be useful, | ||
| 12 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 14 | - * GNU General Public License for more details. | ||
| 15 | - * | ||
| 16 | - * You should have received a copy of the GNU Lesser General Public License | ||
| 17 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 18 | - *) | ||
| 19 | - | ||
| 20 | -open Xstd | ||
| 21 | -open Printf | ||
| 22 | -open ENIAMtokenizerTypes | ||
| 23 | - | ||
| 24 | -let rec flatten_tokens rev_variants = function | ||
| 25 | - | [] -> rev_variants | ||
| 26 | - | Token t :: l -> flatten_tokens (Xlist.map rev_variants (fun rev_variant -> Token t :: rev_variant)) l | ||
| 27 | - | Seq seq :: l -> flatten_tokens rev_variants (seq @ l) | ||
| 28 | - | Variant variants :: l -> flatten_tokens (List.flatten (Xlist.map variants (fun variant -> flatten_tokens rev_variants [variant]))) l | ||
| 29 | - | ||
| 30 | -let rec normalize_tokens rev = function | ||
| 31 | - [] -> List.rev rev | ||
| 32 | - | Token t :: l -> normalize_tokens (Token t :: rev) l | ||
| 33 | - | Seq seq :: l -> normalize_tokens rev (seq @ l) | ||
| 34 | - | Variant[t] :: l -> normalize_tokens rev (t :: l) | ||
| 35 | - | Variant variants :: l -> | ||
| 36 | - let variants = flatten_tokens [[]] [Variant variants] in | ||
| 37 | - let variants = Xlist.map variants (fun rev_seq -> | ||
| 38 | - match List.rev rev_seq with | ||
| 39 | - [] -> failwith "normalize_tokens" | ||
| 40 | - | [t] -> t | ||
| 41 | - | seq -> Seq seq) in | ||
| 42 | - let t = match variants with | ||
| 43 | - [] -> failwith "normalize_tokens" | ||
| 44 | - | [t] -> t | ||
| 45 | - | variants -> Variant variants in | ||
| 46 | - normalize_tokens (t :: rev) l | ||
| 47 | - | ||
| 48 | -let concat_orths l = | ||
| 49 | - String.concat "" (Xlist.map l (fun t -> t.orth)) | ||
| 50 | - | ||
| 51 | -let concat_orths2 l = | ||
| 52 | - String.concat "" (Xlist.map l (fun t -> ENIAMtokens.get_orth t.token)) | ||
| 53 | - | ||
| 54 | -let concat_intnum = function | ||
| 55 | - [{token=Dig(v4,_)};_;{token=Dig(v3,_)};_;{token=Dig(v2,_)};_;{token=Dig(v1,_)}] -> v4^v3^v2^v1 | ||
| 56 | - | [{token=Dig(v3,_)};_;{token=Dig(v2,_)};_;{token=Dig(v1,_)}] -> v3^v2^v1 | ||
| 57 | - | [{token=Dig(v2,_)};_;{token=Dig(v1,_)}] -> v2^v1 | ||
| 58 | - | [{token=Dig(v1,_)}] -> v1 | ||
| 59 | - | _ -> failwith "concat_intnum" | ||
| 60 | - | ||
| 61 | -let dig_value t = | ||
| 62 | - match t.token with | ||
| 63 | - Dig(v,_) -> v | ||
| 64 | - | _ -> failwith "dig_value" | ||
| 65 | - | ||
| 66 | -(* FIXME: problem z ordnum - wyklucza year co stanowi problem na końcu zdania *) | ||
| 67 | -let digit_patterns1 = [ (* FIXME: problem z nadmiarowymi interpretacjami - trzeba uwzględnić w preprocesingu brak spacji - albo w dezambiguacji *) | ||
| 68 | - [D "dig"; S "."; D "dig"; S "."; D "dig"; S "."; D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"])); | ||
| 69 | - [D "dig"; S "."; D "dig"; S "."; D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"])); | ||
| 70 | - [D "dig"; S "."; D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"])); | ||
| 71 | - [D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"])); | ||
| 72 | -(* [D "dig"], "obj-id"; *) | ||
| 73 | - [D "pref3dig"; S "."; D "3dig"; S "."; D "3dig"; S "."; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum")); | ||
| 74 | - [D "pref3dig"; S "."; D "3dig"; S "."; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum")); | ||
| 75 | - [D "pref3dig"; S "."; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum")); | ||
| 76 | - [D "pref3dig"; S " "; D "3dig"; S " "; D "3dig"; S " "; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum")); | ||
| 77 | - [D "pref3dig"; S " "; D "3dig"; S " "; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum")); | ||
| 78 | - [D "pref3dig"; S " "; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum")); | ||
| 79 | - [D "intnum"; S "."], (function [token;_] -> Dig(concat_intnum [token],"ordnum") | _ -> failwith "digit_patterns1"); (* FIXME: to nie powinno wykluczać innych interpretacji *) | ||
| 80 | - [D "day"; S "."; D "month"; S "."; D "year"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns2"); | ||
| 81 | - [D "day"; S "."; RD "month"; S "."; D "year"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns3"); | ||
| 82 | - [D "day"; S " "; RD "month"; S " "; D "year"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns3"); | ||
| 83 | - [D "day"; S "."; D "month"; S "."; D "2dig"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns2"); | ||
| 84 | - [D "day"; S "."; RD "month"; S "."; D "2dig"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns3"); | ||
| 85 | - [D "day"; S "."; D "month"; S "."], (function [day;_;month;_] -> Compound("day-month",[day.token;month.token]) | _ -> failwith "digit_patterns4"); | ||
| 86 | - [D "hour"; S "."; D "minute"], (function [hour;_;minute] -> Compound("hour-minute",[hour.token;minute.token]) | _ -> failwith "digit_patterns5"); | ||
| 87 | - [D "hour"; S ":"; D "minute"], (function [hour;_;minute] -> Compound("hour-minute",[hour.token;minute.token]) | _ -> failwith "digit_patterns6"); | ||
| 88 | - [D "intnum"; S ":"; D "intnum"], (function [x;_;y] -> Compound("match-result",[x.token;y.token]) | _ -> failwith "digit_patterns7"); | ||
| 89 | - ] (* bez 1 i *2 *3 *4 mamy rec *) (* w morfeuszu zawsze num:pl?*) | ||
| 90 | - | ||
| 91 | -let digit_patterns2 = [ | ||
| 92 | - [D "intnum"; S ","; D "dig"], (function [x;_;y] -> Dig(dig_value x ^ "," ^ dig_value y,"realnum") | _ -> failwith "digit_patterns8"); | ||
| 93 | -(* [S "-"; D "intnum"; S ","; D "dig"], (function [_;x;_;y] -> Dig("-" ^ dig_value x ^ "," ^ dig_value y,"realnum") | _ -> failwith "digit_patterns9"); | ||
| 94 | - [S "-"; D "intnum"], (function [_;x] -> Dig("-" ^ dig_value x,"realnum") | _ -> failwith "digit_patterns10");*) | ||
| 95 | - [S "’"; D "2dig"], (function [_;x] -> Dig("’" ^ dig_value x,"year") | _ -> failwith "digit_patterns12"); | ||
| 96 | -(* [D "intnum"], "realnum"; *) | ||
| 97 | - ] | ||
| 98 | - | ||
| 99 | -let compose_latek_lemma t interp = | ||
| 100 | - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-latek", interp) | ||
| 101 | - | ||
| 102 | -let compose_latka_lemma t interp = | ||
| 103 | - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-latka", interp) | ||
| 104 | - | ||
| 105 | -let compose_latek_int_lemma t t2 interp = | ||
| 106 | - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-" ^ ENIAMtokens.get_orth t2.token ^ "-latek", interp) | ||
| 107 | - | ||
| 108 | -let compose_latka_int_lemma t t2 interp = | ||
| 109 | - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-" ^ ENIAMtokens.get_orth t2.token ^ "-latka", interp) | ||
| 110 | - | ||
| 111 | -let digit_patterns3 = [ | ||
| 112 | - [S "-"; D "intnum"], (function [_;x] -> Dig("-" ^ dig_value x,"intnum") | _ -> failwith "digit_patterns10"); | ||
| 113 | - [S "-"; D "realnum"], (function [_;x] -> Dig("-" ^ dig_value x,"realnum") | _ -> failwith "digit_patterns10"); | ||
| 114 | - [D "intnum"; S "-"; D "intnum"], (function [x;_;y] -> Compound("intnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns11"); | ||
| 115 | - [D "realnum"; S "-"; D "realnum"], (function [x;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *) | ||
| 116 | - [D "intnum"; S "-"; D "realnum"], (function [x;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *) | ||
| 117 | - [D "realnum"; S "-"; D "intnum"], (function [x;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *) | ||
| 118 | - [C "date"; S "-"; C "date"], (function [x;_;y] -> Compound("date-interval",[x.token;y.token]) | _ -> failwith "digit_patterns13"); | ||
| 119 | - [C "day-month"; S "-"; C "day-month"], (function [x;_;y] -> Compound("day-month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns14"); | ||
| 120 | - [D "day"; S "-"; D "day"], (function [x;_;y] -> Compound("day-interval",[x.token;y.token]) | _ -> failwith "digit_patterns15"); | ||
| 121 | - [D "month"; S "-"; D "month"], (function [x;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16"); | ||
| 122 | - [RD "month"; S "-"; RD "month"], (function [x;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns17"); | ||
| 123 | - [D "year"; S "-"; D "year"], (function [x;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16"); | ||
| 124 | - [D "year"; S "-"; D "2dig"], (function [x;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16"); | ||
| 125 | - [C "hour-minute"; S "-"; C "hour-minute"], (function [x;_;y] -> Compound("hour-minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns18"); | ||
| 126 | - [D "hour"; S "-"; D "hour"], (function [x;_;y] -> Compound("hour-interval",[x.token;y.token]) | _ -> failwith "digit_patterns19"); | ||
| 127 | - [D "minute"; S "-"; D "minute"], (function [x;_;y] -> Compound("minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns20"); | ||
| 128 | - [RD "roman"; S "-"; RD "roman"], (function [x;_;y] -> Compound("roman-interval",[x.token;y.token]) | _ -> failwith "digit_patterns21"); | ||
| 129 | - [D "intnum"; S " "; S "-"; S " "; D "intnum"], (function [x;_;_;_;y] -> Compound("intnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns11"); | ||
| 130 | - [D "realnum"; S " "; S "-"; S " "; D "realnum"], (function [x;_;_;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *) | ||
| 131 | - [D "intnum"; S " "; S "-"; S " "; D "realnum"], (function [x;_;_;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *) | ||
| 132 | - [D "realnum"; S " "; S "-"; S " "; D "intnum"], (function [x;_;_;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *) | ||
| 133 | - [C "date"; S " "; S "-"; S " "; C "date"], (function [x;_;_;_;y] -> Compound("date-interval",[x.token;y.token]) | _ -> failwith "digit_patterns13"); | ||
| 134 | - [C "day-month"; S " "; S "-"; S " "; C "day-month"], (function [x;_;_;_;y] -> Compound("day-month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns14"); | ||
| 135 | - [D "day"; S " "; S "-"; S " "; D "day"], (function [x;_;_;_;y] -> Compound("day-interval",[x.token;y.token]) | _ -> failwith "digit_patterns15"); | ||
| 136 | - [D "month"; S " "; S "-"; S " "; D "month"], (function [x;_;_;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16"); | ||
| 137 | - [RD "month"; S " "; S "-"; S " "; RD "month"], (function [x;_;_;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns17"); | ||
| 138 | - [D "year"; S " "; S "-"; S " "; D "year"], (function [x;_;_;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16"); | ||
| 139 | - [D "year"; S " "; S "-"; S " "; D "2dig"], (function [x;_;_;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16"); | ||
| 140 | - [C "hour-minute"; S " "; S "-"; S " "; C "hour-minute"], (function [x;_;_;_;y] -> Compound("hour-minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns18"); | ||
| 141 | - [D "hour"; S " "; S "-"; S " "; D "hour"], (function [x;_;_;_;y] -> Compound("hour-interval",[x.token;y.token]) | _ -> failwith "digit_patterns19"); | ||
| 142 | - [D "minute"; S " "; S "-"; S " "; D "minute"], (function [x;_;_;_;y] -> Compound("minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns20"); | ||
| 143 | - [RD "roman"; S " "; S "-"; S " "; RD "roman"], (function [x;_;_;_;y] -> Compound("roman-interval",[x.token;y.token]) | _ -> failwith "digit_patterns21"); | ||
| 144 | - [D "intnum"; S "-"; O "latek"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:nom:m1" | _ -> failwith "digit_patterns22"); | ||
| 145 | - [D "intnum"; S "-"; O "latka"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:gen.acc:m1" | _ -> failwith "digit_patterns22"); | ||
| 146 | - [D "intnum"; S "-"; O "latkowi"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:dat:m1" | _ -> failwith "digit_patterns22"); | ||
| 147 | - [D "intnum"; S "-"; O "latkiem"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:inst:m1" | _ -> failwith "digit_patterns22"); | ||
| 148 | - [D "intnum"; S "-"; O "latku"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:loc.voc:m1" | _ -> failwith "digit_patterns22"); | ||
| 149 | - [D "intnum"; S "-"; O "latkowie"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:nom.voc:m1" | _ -> failwith "digit_patterns22"); | ||
| 150 | - [D "intnum"; S "-"; O "latków"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:gen.acc:m1" | _ -> failwith "digit_patterns22"); | ||
| 151 | - [D "intnum"; S "-"; O "latkom"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:dat:m1" | _ -> failwith "digit_patterns22"); | ||
| 152 | - [D "intnum"; S "-"; O "latkami"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:inst:m1" | _ -> failwith "digit_patterns22"); | ||
| 153 | - [D "intnum"; S "-"; O "latkach"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:loc:m1" | _ -> failwith "digit_patterns22"); | ||
| 154 | - [D "intnum"; S "-"; O "latka"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:nom:f" | _ -> failwith "digit_patterns22"); | ||
| 155 | - [D "intnum"; S "-"; O "latki"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:gen:f" | _ -> failwith "digit_patterns22"); | ||
| 156 | - [D "intnum"; S "-"; O "latce"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:dat.loc:f" | _ -> failwith "digit_patterns22"); | ||
| 157 | - [D "intnum"; S "-"; O "latkę"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:acc:f" | _ -> failwith "digit_patterns22"); | ||
| 158 | - [D "intnum"; S "-"; O "latką"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:inst:f" | _ -> failwith "digit_patterns22"); | ||
| 159 | - [D "intnum"; S "-"; O "latko"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:voc:f" | _ -> failwith "digit_patterns22"); | ||
| 160 | - [D "intnum"; S "-"; O "latki"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:nom.acc.voc:f" | _ -> failwith "digit_patterns22"); | ||
| 161 | - [D "intnum"; S "-"; O "latek"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:gen:f" | _ -> failwith "digit_patterns22"); | ||
| 162 | - [D "intnum"; S "-"; O "latkom"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:dat:f" | _ -> failwith "digit_patterns22"); | ||
| 163 | - [D "intnum"; S "-"; O "latkami"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:inst:f" | _ -> failwith "digit_patterns22"); | ||
| 164 | - [D "intnum"; S "-"; O "latkach"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:loc:f" | _ -> failwith "digit_patterns22"); | ||
| 165 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latek"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:nom:m1" | _ -> failwith "digit_patterns22"); | ||
| 166 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latka"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:gen.acc:m1" | _ -> failwith "digit_patterns22"); | ||
| 167 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkowi"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:dat:m1" | _ -> failwith "digit_patterns22"); | ||
| 168 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkiem"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:inst:m1" | _ -> failwith "digit_patterns22"); | ||
| 169 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latku"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:loc.voc:m1" | _ -> failwith "digit_patterns22"); | ||
| 170 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkowie"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:nom.voc:m1" | _ -> failwith "digit_patterns22"); | ||
| 171 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latków"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:gen.acc:m1" | _ -> failwith "digit_patterns22"); | ||
| 172 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkom"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:dat:m1" | _ -> failwith "digit_patterns22"); | ||
| 173 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkami"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:inst:m1" | _ -> failwith "digit_patterns22"); | ||
| 174 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkach"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:loc:m1" | _ -> failwith "digit_patterns22"); | ||
| 175 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latka"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:nom:f" | _ -> failwith "digit_patterns22"); | ||
| 176 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latki"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:gen:f" | _ -> failwith "digit_patterns22"); | ||
| 177 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latce"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:dat.loc:f" | _ -> failwith "digit_patterns22"); | ||
| 178 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkę"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:acc:f" | _ -> failwith "digit_patterns22"); | ||
| 179 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latką"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:inst:f" | _ -> failwith "digit_patterns22"); | ||
| 180 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latko"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:voc:f" | _ -> failwith "digit_patterns22"); | ||
| 181 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latki"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:nom.acc.voc:f" | _ -> failwith "digit_patterns22"); | ||
| 182 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latek"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:gen:f" | _ -> failwith "digit_patterns22"); | ||
| 183 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkom"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:dat:f" | _ -> failwith "digit_patterns22"); | ||
| 184 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkami"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:inst:f" | _ -> failwith "digit_patterns22"); | ||
| 185 | - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkach"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:loc:f" | _ -> failwith "digit_patterns22"); | ||
| 186 | - ] | ||
| 187 | - | ||
| 188 | -let url_patterns1 = [ | ||
| 189 | - [L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 190 | - [L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 191 | - [L; S "."; L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 192 | - [L; S "."; L; S "."; L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 193 | - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 194 | - [L; S "."; L; S "-"; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 195 | - [L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 196 | - [L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 197 | - [L; S "."; L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 198 | - [L; S "."; L; S "."; L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 199 | - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 200 | - [L; S "."; L; S "-"; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 201 | - [L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 202 | - [L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 203 | - [L; S "."; L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 204 | - [L; S "."; L; S "."; L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 205 | - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 206 | - [L; S "."; L; S "-"; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 207 | - [L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 208 | - [L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 209 | - [L; S "."; L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 210 | - [L; S "."; L; S "."; L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 211 | - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 212 | - [L; S "."; L; S "-"; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 213 | - [L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 214 | - [L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 215 | - [L; S "."; L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 216 | - [L; S "."; L; S "."; L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 217 | - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 218 | - [L; S "."; L; S "-"; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 219 | - [L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 220 | - [L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 221 | - [L; S "."; L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 222 | - [L; S "."; L; S "."; L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 223 | - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 224 | - [L; S "."; L; S "-"; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 225 | - [L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 226 | - [L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 227 | - [L; S "."; L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 228 | - [L; S "."; L; S "."; L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 229 | - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 230 | - [L; S "."; L; S "-"; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 231 | - [L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 232 | - [L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 233 | - [L; S "."; L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 234 | - [L; S "."; L; S "."; L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 235 | - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 236 | - [L; S "."; L; S "-"; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 237 | - ] | ||
| 238 | - | ||
| 239 | -let url_patterns2 = [ | ||
| 240 | - [L; S "@"; D "url"], (function l -> Dig(concat_orths2 l,"email")); | ||
| 241 | - [O "http"; S ":"; S "/"; S "/"; D "url"], (function l -> Dig(concat_orths2 l,"email")); | ||
| 242 | - ] | ||
| 243 | - | ||
| 244 | -let url_patterns3 = [ | ||
| 245 | - [D "url"; S "/"], (function l -> Dig(concat_orths2 l,"url")); | ||
| 246 | - [D "url"; S "/"; L], (function l -> Dig(concat_orths2 l,"url")); | ||
| 247 | - [D "url"; S "/"; L; S "."; L], (function l -> Dig(concat_orths2 l,"url")); | ||
| 248 | - ] | ||
| 249 | - | ||
| 250 | -type matching = { | ||
| 251 | - prefix: tokens list; | ||
| 252 | - matched: token_record list; | ||
| 253 | - suffix: tokens list; | ||
| 254 | - pattern: pat list; | ||
| 255 | - command: token_record list -> token; | ||
| 256 | - command_abr: token_record list -> tokens list; | ||
| 257 | - } | ||
| 258 | - | ||
| 259 | -let execute_command matching = | ||
| 260 | - let l = List.rev matching.matched in | ||
| 261 | - let len = Xlist.fold l 0 (fun len t -> t.len + len) in | ||
| 262 | - Seq((List.rev matching.prefix) @ [Token{empty_token with | ||
| 263 | - orth=concat_orths l; | ||
| 264 | - beg=(List.hd l).beg; | ||
| 265 | - len=len; | ||
| 266 | - next=(List.hd l).beg+len; | ||
| 267 | - token=matching.command l; | ||
| 268 | - (*weight=0.;*) (* FIXME: dodać wagi do konkretnych reguł i uwzględnić wagi maczowanych tokenów *) | ||
| 269 | - attrs=ENIAMtokens.merge_attrs l}] @ matching.suffix) | ||
| 270 | - | ||
| 271 | -let execute_abr_command matching = | ||
| 272 | - let l = List.rev matching.matched in | ||
| 273 | - Seq((List.rev matching.prefix) @ (matching.command_abr l) @ matching.suffix) | ||
| 274 | - | ||
| 275 | -let match_token = function | ||
| 276 | - D cat, Dig(_,cat2) -> cat = cat2 | ||
| 277 | - | C s, Compound(s2,_) -> s = s2 | ||
| 278 | - | S s, Symbol s2 -> s = s2 | ||
| 279 | - | RD cat, RomanDig(_,cat2) -> cat = cat2 | ||
| 280 | - | O pat, Dig(s,"dig") -> pat = s | ||
| 281 | - | O pat, Symbol s -> pat = s | ||
| 282 | - | O pat, SmallLetter orth -> pat = orth | ||
| 283 | - | O pat, CapLetter(orth,lc) -> pat = orth | ||
| 284 | - | O pat, AllSmall orth -> pat = orth | ||
| 285 | - | O pat, AllCap(orth,lc,lc2) -> pat = orth | ||
| 286 | - | O pat, FirstCap(orth,lc,_,_) -> pat = orth | ||
| 287 | - | O pat, SomeCap orth -> pat = orth | ||
| 288 | - | L, SmallLetter _ -> true | ||
| 289 | - | L, CapLetter _ -> true | ||
| 290 | - | L, AllSmall _ -> true | ||
| 291 | - | L, AllCap _ -> true | ||
| 292 | - | L, FirstCap _ -> true | ||
| 293 | - | L, SomeCap _ -> true | ||
| 294 | - | CL, CapLetter _ -> true | ||
| 295 | - | CL, AllCap _ -> true | ||
| 296 | - | CL, SomeCap _ -> true | ||
| 297 | - | _ -> false | ||
| 298 | - | ||
| 299 | -let rec find_first_token matching pat = function | ||
| 300 | - Token t -> if match_token (pat,t.token) then [{matching with matched = t :: matching.matched}] else [] | ||
| 301 | - | Seq l -> Xlist.map (find_first_token matching pat (List.hd (List.rev l))) (fun matching -> {matching with prefix = matching.prefix @ (List.tl (List.rev l))}) | ||
| 302 | - | Variant l -> List.flatten (Xlist.map l (find_first_token matching pat)) | ||
| 303 | - | ||
| 304 | -let rec find_middle_token matching pat = function | ||
| 305 | - Token t -> if match_token (pat,t.token) then [{matching with matched = t :: matching.matched}] else [] | ||
| 306 | - | Seq _ -> [] | ||
| 307 | - | Variant l -> List.flatten (Xlist.map l (find_middle_token matching pat)) | ||
| 308 | - | ||
| 309 | -let rec find_last_token matching pat = function | ||
| 310 | - Token t -> if match_token (pat,t.token) then [{matching with matched = t :: matching.matched}] else [] | ||
| 311 | - | Seq l -> Xlist.map (find_last_token matching pat (List.hd l)) (fun matching -> {matching with suffix = matching.suffix @ (List.tl l)}) | ||
| 312 | - | Variant l -> List.flatten (Xlist.map l (find_last_token matching pat)) | ||
| 313 | - | ||
| 314 | -let rec find_pattern_tail matchings = function | ||
| 315 | - [] -> raise Not_found | ||
| 316 | - | token :: l -> | ||
| 317 | - let found,finished = Xlist.fold matchings ([],[]) (fun (found,finished) matching -> | ||
| 318 | - match matching.pattern with | ||
| 319 | - [pat] -> found, (find_last_token {matching with pattern=[]} pat token) @ finished | ||
| 320 | - | pat :: pattern -> (find_middle_token {matching with pattern=pattern} pat token) @ found, finished | ||
| 321 | - | _ -> failwith "find_pattern: ni") in | ||
| 322 | - (try | ||
| 323 | - if found = [] then raise Not_found else | ||
| 324 | - find_pattern_tail found l | ||
| 325 | - with Not_found -> | ||
| 326 | - let finished = List.flatten (Xlist.map finished (fun matching -> try [execute_command matching] with Not_found -> [])) in | ||
| 327 | - if finished = [] then raise Not_found else Variant finished,l) | ||
| 328 | - | ||
| 329 | -(* wzorce nie mogą mieć długości 1 *) | ||
| 330 | -let rec find_pattern matchings rev = function | ||
| 331 | - token :: l -> | ||
| 332 | - let found = Xlist.fold matchings [] (fun found matching -> | ||
| 333 | - match matching.pattern with | ||
| 334 | - pat :: pattern -> (find_first_token {matching with pattern=pattern} pat token) @ found | ||
| 335 | - | [] -> failwith "find_pattern: empty pattern") in | ||
| 336 | - if found = [] then find_pattern matchings (token :: rev) l else | ||
| 337 | - (try | ||
| 338 | - let token,l = find_pattern_tail found l in | ||
| 339 | - find_pattern matchings (token :: rev) l | ||
| 340 | - with Not_found -> find_pattern matchings (token :: rev) l) | ||
| 341 | - | [] -> List.rev rev | ||
| 342 | - | ||
| 343 | -let find_patterns patterns tokens = | ||
| 344 | - find_pattern (Xlist.map patterns (fun (pattern,command) -> | ||
| 345 | - {prefix=[]; matched=[]; suffix=[]; pattern=pattern; command=command; command_abr=(fun _ -> [])})) [] tokens | ||
| 346 | - | ||
| 347 | -let rec find_abr_pattern_tail matchings = function | ||
| 348 | - [] -> raise Not_found | ||
| 349 | - | token :: l -> | ||
| 350 | - let found,finished = Xlist.fold matchings ([],[]) (fun (found,finished) matching -> | ||
| 351 | - match matching.pattern with | ||
| 352 | - [pat] -> found, (find_last_token {matching with pattern=[]} pat token) @ finished | ||
| 353 | - | pat :: pattern -> (find_middle_token {matching with pattern=pattern} pat token) @ found, finished | ||
| 354 | - | [] -> found, matching :: finished) in | ||
| 355 | - (try | ||
| 356 | - if found = [] then raise Not_found else | ||
| 357 | - find_abr_pattern_tail found l | ||
| 358 | - with Not_found -> | ||
| 359 | - let finished = List.flatten (Xlist.map finished (fun matching -> try [execute_abr_command matching] with Not_found -> [])) in | ||
| 360 | - if finished = [] then raise Not_found else Variant finished,l) | ||
| 361 | - | ||
| 362 | -let rec find_abr_pattern matchings rev = function | ||
| 363 | - token :: l -> | ||
| 364 | - let found = Xlist.fold matchings [] (fun found matching -> | ||
| 365 | - match matching.pattern with | ||
| 366 | - pat :: pattern -> (find_first_token {matching with pattern=pattern} pat token) @ found | ||
| 367 | - | [] -> failwith "find_abr_pattern: empty pattern") in | ||
| 368 | - if found = [] then find_abr_pattern matchings (token :: rev) l else | ||
| 369 | - (try | ||
| 370 | - let token,l = find_abr_pattern_tail found l in | ||
| 371 | - find_abr_pattern matchings (token :: rev) l | ||
| 372 | - with Not_found -> find_abr_pattern matchings (token :: rev) l) | ||
| 373 | - | [] -> List.rev rev | ||
| 374 | - | ||
| 375 | -let find_abr_patterns patterns tokens = | ||
| 376 | - find_abr_pattern (Xlist.map patterns (fun (pattern,command) -> | ||
| 377 | - {prefix=[]; matched=[]; suffix=[]; pattern=pattern; command=(fun _ -> Symbol ""); command_abr=command})) [] tokens | ||
| 378 | - | ||
| 379 | -let find_replacement_patterns tokens = | ||
| 380 | - let tokens = find_patterns digit_patterns1 tokens in | ||
| 381 | - let tokens = normalize_tokens [] tokens in | ||
| 382 | - let tokens = find_patterns digit_patterns2 tokens in | ||
| 383 | - let tokens = normalize_tokens [] tokens in | ||
| 384 | - let tokens = find_patterns digit_patterns3 tokens in | ||
| 385 | - let tokens = normalize_tokens [] tokens in | ||
| 386 | - let tokens = find_patterns ENIAMacronyms.acronym_patterns tokens in | ||
| 387 | - let tokens = normalize_tokens [] tokens in | ||
| 388 | - let tokens = find_patterns ENIAMacronyms.mte_patterns tokens in | ||
| 389 | - let tokens = normalize_tokens [] tokens in | ||
| 390 | -(* Xlist.iter tokens (fun t -> print_endline (ENIAMtokens.string_of_tokens 0 t)); *) | ||
| 391 | - let tokens = find_patterns ENIAMacronyms.name_patterns tokens in | ||
| 392 | -(* Xlist.iter tokens (fun t -> print_endline (ENIAMtokens.string_of_tokens 0 t)); *) | ||
| 393 | - let tokens = normalize_tokens [] tokens in | ||
| 394 | - let tokens = find_patterns url_patterns1 tokens in | ||
| 395 | - let tokens = normalize_tokens [] tokens in | ||
| 396 | - let tokens = find_patterns url_patterns2 tokens in | ||
| 397 | - let tokens = normalize_tokens [] tokens in | ||
| 398 | - let tokens = find_patterns url_patterns3 tokens in | ||
| 399 | - let tokens = normalize_tokens [] tokens in | ||
| 400 | -(* Xlist.iter tokens (fun t -> print_endline (ENIAMtokens.string_of_tokens 0 t)); *) | ||
| 401 | - tokens | ||
| 402 | - | ||
| 403 | -let rec set_next_id n = function | ||
| 404 | - Token t -> Token{t with next=n} | ||
| 405 | - | Seq l -> | ||
| 406 | - (match List.rev l with | ||
| 407 | - t :: l -> Seq(List.rev ((set_next_id n t) :: l)) | ||
| 408 | - | [] -> failwith "set_next_id n") | ||
| 409 | - | Variant l -> Variant(Xlist.map l (set_next_id n)) | ||
| 410 | - | ||
| 411 | -let rec remove_spaces rev = function | ||
| 412 | - [] -> List.rev rev | ||
| 413 | - | x :: Token{token=Symbol " "; next=n} :: l -> remove_spaces ((set_next_id n x) :: rev) l | ||
| 414 | - | Token{token=Symbol " "} :: l -> remove_spaces rev l | ||
| 415 | - | x :: l -> remove_spaces (x :: rev) l |
tokenizer/eniam-tokenizer-1.0/ENIAMtokenizer.ml deleted
| 1 | -(* | ||
| 2 | - * ENIAMtokenizer, a tokenizer for Polish | ||
| 3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | ||
| 4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | ||
| 5 | - * | ||
| 6 | - * This library is free software: you can redistribute it and/or modify | ||
| 7 | - * it under the terms of the GNU Lesser General Public License as published by | ||
| 8 | - * the Free Software Foundation, either version 3 of the License, or | ||
| 9 | - * (at your option) any later version. | ||
| 10 | - * | ||
| 11 | - * This library is distributed in the hope that it will be useful, | ||
| 12 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 14 | - * GNU General Public License for more details. | ||
| 15 | - * | ||
| 16 | - * You should have received a copy of the GNU Lesser General Public License | ||
| 17 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 18 | - *) | ||
| 19 | - | ||
| 20 | -open Xstd | ||
| 21 | -open ENIAMtokenizerTypes | ||
| 22 | - | ||
| 23 | -let string_of = | ||
| 24 | - ENIAMtokens.string_of_tokens | ||
| 25 | - | ||
| 26 | -let parse query = | ||
| 27 | - let l = Xunicode.classified_chars_of_utf8_string query in | ||
| 28 | - let l = ENIAMtokens.tokenize l in | ||
| 29 | - let l = ENIAMpatterns.normalize_tokens [] l in | ||
| 30 | - let l = ENIAMpatterns.find_replacement_patterns l in | ||
| 31 | - let l = ENIAMpatterns.remove_spaces [] l in | ||
| 32 | - let l = ENIAMpatterns.find_abr_patterns ENIAMacronyms.abr_patterns l in | ||
| 33 | - let l = ENIAMpatterns.normalize_tokens [] l in | ||
| 34 | - l |
tokenizer/eniam-tokenizer-1.0/ENIAMtokenizerTypes.ml deleted
| 1 | -(* | ||
| 2 | - * ENIAMtokenizer, a tokenizer for Polish | ||
| 3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | ||
| 4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | ||
| 5 | - * | ||
| 6 | - * This library is free software: you can redistribute it and/or modify | ||
| 7 | - * it under the terms of the GNU Lesser General Public License as published by | ||
| 8 | - * the Free Software Foundation, either version 3 of the License, or | ||
| 9 | - * (at your option) any later version. | ||
| 10 | - * | ||
| 11 | - * This library is distributed in the hope that it will be useful, | ||
| 12 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 14 | - * GNU General Public License for more details. | ||
| 15 | - * | ||
| 16 | - * You should have received a copy of the GNU Lesser General Public License | ||
| 17 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 18 | - *) | ||
| 19 | - | ||
| 20 | -open Xstd | ||
| 21 | - | ||
| 22 | -(* Długość pojedynczego znaku w tekście *) | ||
| 23 | -let factor = 100 | ||
| 24 | - | ||
| 25 | -type token = | ||
| 26 | - SmallLetter of string (* orth *) | ||
| 27 | - | CapLetter of string * string (* orth * lowercase *) | ||
| 28 | - | AllSmall of string (* orth *) | ||
| 29 | - | AllCap of string * string * string (* orth * lowercase * all lowercase *) | ||
| 30 | - | FirstCap of string * string * string * string (* orth * all lowercase * first letter uppercase * first letter lowercase *) | ||
| 31 | - | SomeCap of string (* orth *) | ||
| 32 | - | RomanDig of string * string (* value * cat *) | ||
| 33 | - | Interp of string (* orth *) | ||
| 34 | - | Symbol of string (* orth *) | ||
| 35 | - | Dig of string * string (* value * cat *) | ||
| 36 | - | Other of string (* orth *) | ||
| 37 | - | Lemma of string * string * string list list list (* lemma * cat * interp *) | ||
| 38 | - | Proper of string * string * string list list list * string list (* lemma * cat * interp * senses *) | ||
| 39 | -(* | Sense of string * string * string list list list * (string * string * string list) list (* lemma * cat * interp * senses *) *) | ||
| 40 | - | Compound of string * token list (* sense * components *) | ||
| 41 | - (* | Tokens of string * int list (*cat * token id list *) *) | ||
| 42 | - | ||
| 43 | -(* Tekst reprezentuję jako zbiór obiektów typu token_record zawierających | ||
| 44 | - informacje o poszczególnych tokenach *) | ||
| 45 | -and token_record = { | ||
| 46 | - orth: string; (* sekwencja znaków pierwotnego tekstu składająca się na token *) | ||
| 47 | - corr_orth: string; (* sekwencja znaków pierwotnego tekstu składająca się na token z poprawionymi błędami *) | ||
| 48 | - beg: int; (* pozycja początkowa tokenu względem początku akapitu *) | ||
| 49 | - len: int; (* długość tokenu *) | ||
| 50 | - next: int; (* pozycja początkowa następnego tokenu względem początku akapitu *) | ||
| 51 | - token: token; (* treść tokenu *) | ||
| 52 | - attrs: string list; (* dodatkowe atrybuty *) | ||
| 53 | - } | ||
| 54 | - | ||
| 55 | -(* Tokeny umieszczone są w strukturze danych umożliwiającej efektywne wyszukiwanie ich sekwencji, | ||
| 56 | - struktura danych sama z siebie nie wnosi informacji *) | ||
| 57 | -type tokens = | ||
| 58 | - | Token of token_record | ||
| 59 | - | Variant of tokens list | ||
| 60 | - | Seq of tokens list | ||
| 61 | - | ||
| 62 | -type pat = L | CL | D of string | C of string | S of string | RD of string | O of string | ||
| 63 | - | ||
| 64 | -let empty_token = { | ||
| 65 | - orth="";corr_orth="";beg=0;len=0;next=0; token=Symbol ""; attrs=[]} | ||
| 66 | - | ||
| 67 | -let config = | ||
| 68 | - try File.load_attr_val_pairs "config-tokenizer" | ||
| 69 | - with _ -> (print_endline "ENIAMtokenizer config file not found"; []) | ||
| 70 | - | ||
| 71 | -let mte_filename = | ||
| 72 | - try Xlist.assoc config "MTE_FILENAME" | ||
| 73 | - with Not_found -> (print_endline "ENIAMtokenizer MTE_FILENAME config variable undefined"; "") |
tokenizer/eniam-tokenizer-1.0/ENIAMtokens.ml deleted
| 1 | -(* | ||
| 2 | - * ENIAMtokenizer, a tokenizer for Polish | ||
| 3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | ||
| 4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | ||
| 5 | - * | ||
| 6 | - * This library is free software: you can redistribute it and/or modify | ||
| 7 | - * it under the terms of the GNU Lesser General Public License as published by | ||
| 8 | - * the Free Software Foundation, either version 3 of the License, or | ||
| 9 | - * (at your option) any later version. | ||
| 10 | - * | ||
| 11 | - * This library is distributed in the hope that it will be useful, | ||
| 12 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 14 | - * GNU General Public License for more details. | ||
| 15 | - * | ||
| 16 | - * You should have received a copy of the GNU Lesser General Public License | ||
| 17 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 18 | - *) | ||
| 19 | - | ||
| 20 | -open Printf | ||
| 21 | -open ENIAMtokenizerTypes | ||
| 22 | -open Xstd | ||
| 23 | -open Xunicode | ||
| 24 | - | ||
| 25 | -let string_of_interps interps = | ||
| 26 | - String.concat "|" (Xlist.map interps (fun interp -> | ||
| 27 | - (String.concat ":" (Xlist.map interp (fun interp2 -> | ||
| 28 | - (String.concat "." interp2)))))) | ||
| 29 | - | ||
| 30 | -let rec string_of_token = function | ||
| 31 | - SmallLetter orth -> sprintf "SmallLetter(%s)" orth | ||
| 32 | - | CapLetter(orth,lc) -> sprintf "CapLetter(%s,%s)" orth lc | ||
| 33 | - | AllSmall orth -> sprintf "AllSmall(%s)" orth | ||
| 34 | - | AllCap(orth,lc,lc2) -> sprintf "AllCap(%s,%s,%s)" orth lc lc2 | ||
| 35 | - | FirstCap(orth,lc,cl,ll) -> sprintf "FirstCap(%s,%s,%s,%s)" orth lc cl ll | ||
| 36 | - | SomeCap orth -> sprintf "SomeCap(%s)" orth | ||
| 37 | - | RomanDig(v,t) -> sprintf "RomanDig(%s,%s)" v t | ||
| 38 | - | Interp orth -> sprintf "Interp(%s)" orth | ||
| 39 | - | Symbol orth -> sprintf "Symbol(%s)" orth | ||
| 40 | - | Dig(v,t) -> sprintf "Dig(%s,%s)" v t | ||
| 41 | - | Other orth -> sprintf "Other(%s)" orth | ||
| 42 | - | Lemma(lemma,cat,interps) -> sprintf "Lemma(%s,%s,%s)" lemma cat (string_of_interps interps) | ||
| 43 | - | Proper(lemma,cat,interps,senses) -> sprintf "Proper(%s,%s,%s,%s)" lemma cat (string_of_interps interps) (String.concat "|" senses) | ||
| 44 | -(* | Sense(lemma,cat,interps,senses) -> sprintf "Sense(%s,%s,%s,%s)" lemma cat (string_of_interps interps) | ||
| 45 | - (String.concat "|" (Xlist.map senses (fun (_,v,_) -> v)))*) | ||
| 46 | - | Compound(sense,l) -> sprintf "Compound(%s,[%s])" sense (String.concat ";" (Xlist.map l string_of_token)) | ||
| 47 | - (* | Tokens(cat,l) -> sprintf "Tokens(%s,%s)" cat (String.concat ";" (Xlist.map l string_of_int)) *) | ||
| 48 | - | ||
| 49 | -let rec spaces i = | ||
| 50 | - if i = 0 then "" else " " ^ spaces (i-1) | ||
| 51 | - | ||
| 52 | -let rec string_of_tokens i = function | ||
| 53 | - Token t -> sprintf "%s{orth=%s;beg=%d;len=%d;next=%d;token=%s;attrs=[%s]}" (spaces i) t.orth t.beg t.len t.next (string_of_token t.token) | ||
| 54 | - (String.concat ";" t.attrs) | ||
| 55 | - | Variant l -> sprintf "%sVariant[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1)))) | ||
| 56 | - | Seq l -> sprintf "%sSeq[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1)))) | ||
| 57 | - | ||
| 58 | -let rec string_of_token_simple = function | ||
| 59 | - SmallLetter orth -> "SmallLetter" | ||
| 60 | - | CapLetter(orth,lc) -> "CapLetter" | ||
| 61 | - | AllSmall orth -> "AllSmall" | ||
| 62 | - | AllCap(orth,lc,lc2) -> "AllCap" | ||
| 63 | - | FirstCap(orth,lc,_,_) -> "FirstCap" | ||
| 64 | - | SomeCap orth -> "SomeCap" | ||
| 65 | - | RomanDig(v,t) -> "RomanDig" | ||
| 66 | - | Interp orth -> sprintf "Interp(%s)" orth | ||
| 67 | - | Symbol orth -> sprintf "Symbol(%s)" orth | ||
| 68 | - | Dig(v,t) -> "Dig" | ||
| 69 | - | Other orth -> sprintf "Other(%s)" orth | ||
| 70 | - | Lemma(lemma,cat,interp) -> "Lemma" | ||
| 71 | - | Proper(lemma,cat,interp,sense) -> "Proper" | ||
| 72 | -(* | Sense(lemma,cat,interp,sense) -> "Sense" *) | ||
| 73 | - | Compound(sense,l) -> sprintf "Compound" | ||
| 74 | - (* | Tokens _ -> sprintf "Tokens" *) | ||
| 75 | - | ||
| 76 | -let rec string_of_tokens_simple = function | ||
| 77 | - Token t -> string_of_token_simple t.token | ||
| 78 | - | Variant l -> sprintf "Variant[%s]" (String.concat ";" (Xlist.map l string_of_tokens_simple)) | ||
| 79 | - | Seq l -> sprintf "Seq[%s]" (String.concat ";" (Xlist.map l string_of_tokens_simple)) | ||
| 80 | - | ||
| 81 | -let get_orth = function | ||
| 82 | - SmallLetter orth -> orth | ||
| 83 | - | CapLetter(orth,lc) -> orth | ||
| 84 | - | AllSmall orth -> orth | ||
| 85 | - | AllCap(orth,lc,lc2) -> orth | ||
| 86 | - | FirstCap(orth,lc,_,_) -> orth | ||
| 87 | - | SomeCap orth -> orth | ||
| 88 | - | Symbol orth -> orth | ||
| 89 | - | Dig(v,_) -> v | ||
| 90 | - | Other orth -> orth | ||
| 91 | - | _ -> ""(*failwith "get_orth"*) | ||
| 92 | - | ||
| 93 | - | ||
| 94 | -let months = StringSet.of_list ["1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; "01"; "02"; "03"; "04"; "05"; "06"; "07"; "08"; "09"; "10"; "11"; "12"] | ||
| 95 | -let hours = StringSet.of_list ["0"; "1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; "00"; "01"; "02"; "03"; "04"; "05"; "06"; "07"; "08"; "09"; | ||
| 96 | - "10"; "11"; "12"; "13"; "14"; "15"; "16"; "17"; "18"; "19"; "20"; "21"; "22"; "23"; "24"] | ||
| 97 | -let days = StringSet.of_list ["1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; "01"; "02"; "03"; "04"; "05"; "06"; "07"; "08"; "09"; | ||
| 98 | - "10"; "11"; "12"; "13"; "14"; "15"; "16"; "17"; "18"; "19"; "20"; "21"; "22"; "23"; "24"; "25"; "26"; "27"; "28"; "29"; "30"; "31"] | ||
| 99 | -let romanmonths = StringSet.of_list ["I"; "II"; "III"; "IV"; "V"; "VI"; "VII"; "VIII"; "IX"; "X"; "XI"; "XII"] | ||
| 100 | - | ||
| 101 | - | ||
| 102 | -let s_beg i = Token{empty_token with beg=i;len=1;next=i+1; token=Interp "<sentence>"} | ||
| 103 | -let c_beg i = Token{empty_token with beg=i;len=1;next=i+1; token=Interp "<clause>"} | ||
| 104 | - | ||
| 105 | -let dig_token orth i digs token = | ||
| 106 | - Token{empty_token with orth=orth;beg=i;len=Xlist.size digs * factor;next=i+Xlist.size digs * factor; token=token; attrs=["maybe cs"]} | ||
| 107 | - | ||
| 108 | -let sc_dig_token orth i digs token = | ||
| 109 | - Seq[s_beg i;c_beg (i+1);Token{empty_token with orth=orth;beg=i+2;len=Xlist.size digs * factor - 2;next=i+Xlist.size digs * factor; token=token; attrs=["maybe cs"]}] | ||
| 110 | - | ||
| 111 | -let dig_tokens orth poss_s_beg i digs v cat = | ||
| 112 | - if poss_s_beg then | ||
| 113 | - [dig_token orth i digs (Dig(v,cat)); | ||
| 114 | - sc_dig_token orth i digs (Dig(v,cat))] | ||
| 115 | - else | ||
| 116 | - [dig_token orth i digs (Dig(v,cat))] | ||
| 117 | - | ||
| 118 | -let merge_digits poss_s_beg i digs = | ||
| 119 | - let orth = String.concat "" digs in | ||
| 120 | - let t = dig_tokens orth poss_s_beg i digs in | ||
| 121 | - let v = try string_of_int (int_of_string orth) with _ -> failwith "merge_digits" in | ||
| 122 | - let variants = | ||
| 123 | - (t orth "dig") @ | ||
| 124 | - [Token{empty_token with orth=orth;beg=i;len=Xlist.size digs * factor;next=i+Xlist.size digs * factor; token=Proper(orth,"obj-id",[[]],["obj-id"]); attrs=["maybe cs"]}] @ | ||
| 125 | - (if digs = ["0"] || List.hd digs <> "0" then (t orth "intnum")(* @ (t orth "realnum")*) else []) @ | ||
| 126 | - (if List.hd digs <> "0" then (t v "year") else []) @ | ||
| 127 | - (if StringSet.mem months orth then (t v "month") else []) @ | ||
| 128 | - (if StringSet.mem hours orth then (t v "hour") else []) @ | ||
| 129 | - (if StringSet.mem days orth then (t v "day") else []) @ | ||
| 130 | - (if Xlist.size digs = 2 && List.hd digs < "6" then (t v "minute") else []) @ | ||
| 131 | - (if Xlist.size digs = 3 then (t orth "3dig") else []) @ | ||
| 132 | - (if Xlist.size digs = 2 then (t orth "2dig") else []) @ | ||
| 133 | - (if Xlist.size digs <= 3 && List.hd digs <> "0" then (t orth "pref3dig") else []) in | ||
| 134 | -(* let t = dig_token orth i digs in | ||
| 135 | - let sc_t = sc_dig_token orth i digs in | ||
| 136 | - let v = try int_of_string orth with _ -> failwith "merge_digits" in | ||
| 137 | - let variants = | ||
| 138 | - [t (Dig(v,"dig"));sc_t (Dig(v,"dig"))] @ | ||
| 139 | - (if digs = ["0"] || List.hd digs <> "0" then [t (Dig(v,"intnum"));sc_t (Dig(v,"intnum"))] else []) @ | ||
| 140 | - (if List.hd digs <> "0" then [t (Dig(v,"year"));sc_t (Dig(v,"year"))] else []) @ | ||
| 141 | - (if StringSet.mem months orth then [t (Dig(v,"month"));sc_t (Dig(v,"month"))] else []) @ | ||
| 142 | - (if StringSet.mem hours orth then [t (Dig(v,"hour"));sc_t (Dig(v,"hour"))] else []) @ | ||
| 143 | - (if StringSet.mem days orth then [t (Dig(v,"day"));sc_t (Dig(v,"day"))] else []) @ | ||
| 144 | - (if Xlist.size digs = 2 && List.hd digs < "6" then [t (Dig(v,"minute"));sc_t (Dig(v,"minute"))] else []) @ | ||
| 145 | - (if Xlist.size digs = 3 then [t (Dig(v,"3dig"));sc_t (Dig(v,"3dig"))] else []) @ | ||
| 146 | - (if Xlist.size digs <= 3 && List.hd digs <> "0" then [t (Dig(v,"pref3dig"));sc_t (Dig(v,"pref3dig"))] else []) in*) | ||
| 147 | - Variant variants | ||
| 148 | - | ||
| 149 | -let recognize_roman_I v = function | ||
| 150 | - Capital("I",_) :: Capital("I",_) :: Capital("I",_) :: [] -> v+3,false | ||
| 151 | - | Capital("I",_) :: Capital("I",_) :: [] -> v+2,false | ||
| 152 | - | Capital("I",_) :: [] -> v+1,false | ||
| 153 | - | [] -> v,false | ||
| 154 | - | Capital("I",_) :: Capital("I",_) :: Capital("I",_) :: Small("w") :: [] -> v+3,true | ||
| 155 | - | Capital("I",_) :: Capital("I",_) :: Small("w") :: [] -> v+2,true | ||
| 156 | - | Capital("I",_) :: Small("w") :: [] -> v+1,true | ||
| 157 | - | Small("w") :: [] -> v,true | ||
| 158 | - | _ -> 0,false | ||
| 159 | - | ||
| 160 | -let recognize_roman_V v = function | ||
| 161 | - Capital("I",_) :: ForeignCapital("V",_) :: [] -> v+4,false | ||
| 162 | - | ForeignCapital("V",_) :: l -> recognize_roman_I (v+5) l | ||
| 163 | - | Capital("I",_) :: ForeignCapital("X",_) :: [] -> v+9,false | ||
| 164 | - | Capital("I",_) :: ForeignCapital("V",_) :: Small("w") :: [] -> v+4,true | ||
| 165 | - | Capital("I",_) :: ForeignCapital("X",_) :: Small("w") :: [] -> v+9,true | ||
| 166 | - | l -> recognize_roman_I v l | ||
| 167 | - | ||
| 168 | -let recognize_roman_X v = function | ||
| 169 | - | ForeignCapital("X",_) :: ForeignCapital("X",_) :: ForeignCapital("X",_) :: l -> recognize_roman_V (v+30) l | ||
| 170 | - | ForeignCapital("X",_) :: ForeignCapital("X",_) :: l -> recognize_roman_V (v+20) l | ||
| 171 | - | ForeignCapital("X",_) :: l -> recognize_roman_V (v+10) l | ||
| 172 | - | l -> recognize_roman_V v l | ||
| 173 | - | ||
| 174 | -let recognize_roman_L v = function | ||
| 175 | - ForeignCapital("X",_) :: Capital("L",_) :: l -> recognize_roman_V (v+40) l | ||
| 176 | - | Capital("L",_) :: l -> recognize_roman_X (v+50) l | ||
| 177 | - | ForeignCapital("X",_) :: Capital("C",_) :: l -> recognize_roman_V (v+90) l | ||
| 178 | - | l -> recognize_roman_X v l | ||
| 179 | - | ||
| 180 | -let recognize_roman_C v = function | ||
| 181 | - | Capital("C",_) :: Capital("C",_) :: Capital("C",_) :: l -> recognize_roman_L (v+300) l | ||
| 182 | - | Capital("C",_) :: Capital("C",_) :: l -> recognize_roman_L (v+200) l | ||
| 183 | - | Capital("C",_) :: l -> recognize_roman_L (v+100) l | ||
| 184 | - | l -> recognize_roman_L v l | ||
| 185 | - | ||
| 186 | -let recognize_roman_D v = function | ||
| 187 | - Capital("C",_) :: Capital("D",_) :: l -> recognize_roman_L (v+400) l | ||
| 188 | - | Capital("D",_) :: l -> recognize_roman_C (v+500) l | ||
| 189 | - | Capital("C",_) :: Capital("M",_) :: l -> recognize_roman_L (v+900) l | ||
| 190 | - | l -> recognize_roman_C v l | ||
| 191 | - | ||
| 192 | -let recognize_roman_M v = function | ||
| 193 | - | Capital("M",_) :: Capital("M",_) :: Capital("M",_) :: l -> recognize_roman_D (v+3000) l | ||
| 194 | - | Capital("M",_) :: Capital("M",_) :: l -> recognize_roman_D (v+2000) l | ||
| 195 | - | Capital("M",_) :: l -> recognize_roman_D (v+1000) l | ||
| 196 | - | l -> recognize_roman_D v l | ||
| 197 | - | ||
| 198 | -let rec merge l = | ||
| 199 | - String.concat "" (Xlist.map l (function | ||
| 200 | - Capital(s,t) -> s | ||
| 201 | - | ForeignCapital(s,t) -> s | ||
| 202 | - | Small s -> s | ||
| 203 | - | ForeignSmall s -> s | ||
| 204 | - | _ -> failwith "merge")) | ||
| 205 | - | ||
| 206 | -let lowercase_first = function | ||
| 207 | - [] -> [] | ||
| 208 | - | Capital(s,t) :: l -> Small t :: l | ||
| 209 | - | ForeignCapital(s,t) :: l -> ForeignSmall t :: l | ||
| 210 | - | Small s :: l -> Small s :: l | ||
| 211 | - | ForeignSmall s :: l -> ForeignSmall s :: l | ||
| 212 | - | _ -> failwith "lowercase_first" | ||
| 213 | - | ||
| 214 | -let rec lowercase_all = function | ||
| 215 | - [] -> [] | ||
| 216 | - | Capital(s,t) :: l -> Small t :: lowercase_all l | ||
| 217 | - | ForeignCapital(s,t) :: l -> ForeignSmall t :: lowercase_all l | ||
| 218 | - | Small s :: l -> Small s :: lowercase_all l | ||
| 219 | - | ForeignSmall s :: l -> ForeignSmall s :: lowercase_all l | ||
| 220 | - | _ -> failwith "lowercase_all" | ||
| 221 | - | ||
| 222 | -let lowercase_rest = function | ||
| 223 | - [] -> [] | ||
| 224 | - | x :: l -> x :: lowercase_all l | ||
| 225 | - | ||
| 226 | -let first_capital = function | ||
| 227 | - Capital _ :: _ -> true | ||
| 228 | - | ForeignCapital _ :: _ -> true | ||
| 229 | - | Small _ :: _ -> false | ||
| 230 | - | ForeignSmall _ :: _ -> false | ||
| 231 | - | _ -> failwith "first_capital" | ||
| 232 | - | ||
| 233 | -let rec all_capital = function | ||
| 234 | - Capital _ :: l -> all_capital l | ||
| 235 | - | ForeignCapital _ :: l -> all_capital l | ||
| 236 | - | Small _ :: l -> false | ||
| 237 | - | ForeignSmall _ :: l -> false | ||
| 238 | - | [] -> true | ||
| 239 | - | _ -> failwith "first_capital" | ||
| 240 | - | ||
| 241 | -let rec all_small = function | ||
| 242 | - Capital _ :: l -> false | ||
| 243 | - | ForeignCapital _ :: l -> false | ||
| 244 | - | Small _ :: l -> all_small l | ||
| 245 | - | ForeignSmall _ :: l -> all_small l | ||
| 246 | - | [] -> true | ||
| 247 | - | _ -> failwith "first_capital" | ||
| 248 | - | ||
| 249 | -let rest_capital = function | ||
| 250 | - [] -> failwith "rest_capital" | ||
| 251 | - | _ :: l -> all_capital l | ||
| 252 | - | ||
| 253 | -let rest_small = function | ||
| 254 | - [] -> failwith "rest_small" | ||
| 255 | - | _ :: l -> all_small l | ||
| 256 | - | ||
| 257 | -let get_first_cap = function | ||
| 258 | - | Capital(s,t) :: l -> s | ||
| 259 | - | ForeignCapital(s,t) :: l -> s | ||
| 260 | - | _ -> failwith "get_first_cap" | ||
| 261 | - | ||
| 262 | -let get_first_lower = function | ||
| 263 | - | Capital(s,t) :: l -> t | ||
| 264 | - | ForeignCapital(s,t) :: l -> t | ||
| 265 | - | _ -> failwith "get_first_lower" | ||
| 266 | - | ||
| 267 | -(*let cs_weight = -1. | ||
| 268 | -let sc_cap_weight = -0.3*) | ||
| 269 | - | ||
| 270 | -let is_add_attr_token = function | ||
| 271 | - SmallLetter _ -> true | ||
| 272 | - | CapLetter _ -> true | ||
| 273 | - | AllSmall _ -> true | ||
| 274 | - | AllCap _ -> true | ||
| 275 | - | FirstCap _ -> true | ||
| 276 | - | SomeCap _ -> true | ||
| 277 | - | _ -> false | ||
| 278 | - | ||
| 279 | -let rec add_attr s = function | ||
| 280 | - Token t -> if is_add_attr_token t.token then Token{t with attrs=s :: t.attrs} else Token t | ||
| 281 | - | Variant l -> Variant(Xlist.map l (add_attr s)) | ||
| 282 | - | Seq l -> Seq(Xlist.map l (add_attr s)) | ||
| 283 | - | ||
| 284 | -let recognize_stem poss_s_beg has_sufix i letters = | ||
| 285 | - let orth = merge letters in | ||
| 286 | - let t = {empty_token with orth=orth;beg=i;len=Xlist.size letters * factor;next=i+Xlist.size letters * factor} in | ||
| 287 | - let t = if poss_s_beg then | ||
| 288 | - if Xlist.size letters = 1 then | ||
| 289 | - if first_capital letters then Variant[ | ||
| 290 | - Token{t with token=SmallLetter(merge (lowercase_first letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs}; | ||
| 291 | - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=SmallLetter(merge (lowercase_first letters)); attrs="maybe cs" :: t.attrs}]; | ||
| 292 | - Token{t with token=CapLetter(orth,merge (lowercase_first letters)); attrs="maybe cs" :: t.attrs}; | ||
| 293 | - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=CapLetter(orth,merge (lowercase_first letters)); (*weight=sc_cap_weight;*) attrs="maybe cs" :: t.attrs}]] | ||
| 294 | - else Token{t with token=SmallLetter orth} | ||
| 295 | - else | ||
| 296 | - if first_capital letters then | ||
| 297 | - if rest_small letters then Variant[ | ||
| 298 | - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=AllSmall(merge (lowercase_first letters))}]; | ||
| 299 | - Token{t with token=FirstCap(orth,merge (lowercase_first letters),get_first_cap letters,get_first_lower letters)}; | ||
| 300 | - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=FirstCap(orth,merge (lowercase_first letters),get_first_cap letters,get_first_lower letters); (*weight=sc_cap_weight*)}]] | ||
| 301 | - else if rest_capital letters then Variant([ | ||
| 302 | - Token{t with token=AllSmall(merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs}; | ||
| 303 | - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=AllSmall(merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs}]; | ||
| 304 | - Token{t with token=FirstCap(merge (lowercase_rest letters),merge (lowercase_all letters),get_first_cap letters,get_first_lower letters); (*weight=cs_weight;*) attrs="cs" :: t.attrs}; | ||
| 305 | - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=FirstCap(merge (lowercase_rest letters),merge (lowercase_all letters),get_first_cap letters,get_first_lower letters); (*weight=cs_weight+.sc_cap_weight;*) attrs="cs" :: t.attrs}]] @ | ||
| 306 | - (if has_sufix then [] else [ | ||
| 307 | - Token{t with token=AllCap(orth,merge (lowercase_rest letters),merge (lowercase_all letters)); attrs="maybe cs" :: t.attrs}; | ||
| 308 | - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=AllCap(orth,merge (lowercase_rest letters),merge (lowercase_all letters)); attrs="maybe cs" :: t.attrs}]])) | ||
| 309 | - else Token{t with token=SomeCap orth} | ||
| 310 | - else | ||
| 311 | - if rest_small letters then Token{t with token=AllSmall orth} | ||
| 312 | - else Token{t with token=SomeCap orth} | ||
| 313 | - else | ||
| 314 | - if Xlist.size letters = 1 then | ||
| 315 | - if first_capital letters then Variant[ | ||
| 316 | - Token{t with token=SmallLetter orth; (*weight=cs_weight;*) attrs="cs" :: t.attrs}; | ||
| 317 | - Token{t with token=CapLetter(orth,merge (lowercase_first letters)); attrs="maybe cs" :: t.attrs}] | ||
| 318 | - else Token{t with token=SmallLetter orth} | ||
| 319 | - else | ||
| 320 | - if first_capital letters then | ||
| 321 | - if rest_small letters then | ||
| 322 | - Token{t with token=FirstCap(orth,merge (lowercase_first letters),get_first_cap letters,get_first_lower letters)} | ||
| 323 | - else if rest_capital letters then Variant([ | ||
| 324 | - Token{t with token=AllSmall(merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs}; | ||
| 325 | - Token{t with token=FirstCap(merge (lowercase_rest letters),merge (lowercase_all letters),get_first_cap letters,get_first_lower letters); (*weight=cs_weight;*) attrs="cs" :: t.attrs}] @ | ||
| 326 | - (if has_sufix then [] else [ | ||
| 327 | - Token{t with token=AllCap(orth,merge (lowercase_rest letters),merge (lowercase_all letters)); attrs="maybe cs" :: t.attrs}])) | ||
| 328 | - else Token{t with token=SomeCap orth} | ||
| 329 | - else | ||
| 330 | - if rest_small letters then Token{t with token=AllSmall orth} | ||
| 331 | - else Token{t with token=SomeCap orth} in | ||
| 332 | - if has_sufix then add_attr "required validated lemmatization" t else t | ||
| 333 | - | ||
| 334 | -let parse_postags s = | ||
| 335 | - List.map (fun s -> | ||
| 336 | - match List.map (fun t -> Str.split (Str.regexp "\\.") t) (Str.split (Str.regexp ":") s) with | ||
| 337 | - [pos] :: tags -> pos, tags | ||
| 338 | - | _ -> failwith ("parse_postags: " ^ s)) (Str.split (Str.regexp "|") s) | ||
| 339 | - | ||
| 340 | -let make_lemma (lemma,interp) = | ||
| 341 | - match parse_postags interp with | ||
| 342 | - [pos,tags] -> Lemma(lemma,pos,[tags]) | ||
| 343 | - | _ -> failwith "make_lemma" | ||
| 344 | - | ||
| 345 | -let merge_attrs l = | ||
| 346 | -(* print_endline (String.concat " " (Xlist.map l (fun token -> "[" ^ token.orth ^ " " ^ String.concat ";" token.attrs ^ "]"))); *) | ||
| 347 | - let len = Xlist.size l in | ||
| 348 | - let attrs = Xlist.fold l StringQMap.empty (fun attrs token -> | ||
| 349 | - Xlist.fold token.attrs attrs StringQMap.add) in | ||
| 350 | - let n_cs = try StringQMap.find attrs "cs" with Not_found -> 0 in | ||
| 351 | - let n_maybe_cs = try StringQMap.find attrs "maybe cs" with Not_found -> 0 in | ||
| 352 | - let new_attrs = | ||
| 353 | - (if n_cs > 0 then | ||
| 354 | - if n_cs + n_maybe_cs = len then ["cs"] else raise Not_found | ||
| 355 | - else | ||
| 356 | - if n_maybe_cs = len then ["maybe cs"] else []) @ | ||
| 357 | - (StringQMap.fold attrs [] (fun attrs attr _ -> if attr = "cs" || attr = "maybe cs" then attrs else attr :: attrs)) in | ||
| 358 | -(* print_endline (String.concat " " new_attrs); *) | ||
| 359 | - new_attrs | ||
| 360 | - | ||
| 361 | -let suffix_lemmata = Xlist.fold [ | ||
| 362 | - "em",make_lemma ("być","aglt:sg:pri:imperf:wok"); | ||
| 363 | - "eś",make_lemma ("być","aglt:sg:sec:imperf:wok"); | ||
| 364 | - "eście",make_lemma ("być","aglt:pl:sec:imperf:wok"); | ||
| 365 | - "eśmy",make_lemma ("być","aglt:pl:pri:imperf:wok"); | ||
| 366 | - "m",make_lemma ("być","aglt:sg:pri:imperf:nwok"); | ||
| 367 | - "ś",make_lemma ("być","aglt:sg:sec:imperf:nwok"); | ||
| 368 | - "ście",make_lemma ("być","aglt:pl:sec:imperf:nwok"); | ||
| 369 | - "śmy",make_lemma ("być","aglt:pl:pri:imperf:nwok"); | ||
| 370 | - "by",make_lemma ("by","qub"); | ||
| 371 | - ] StringMap.empty (fun map (suf,lemma) -> StringMap.add map suf lemma) | ||
| 372 | - | ||
| 373 | -let recognize_suffix i letters = | ||
| 374 | - let orth = merge letters in | ||
| 375 | - let t = {empty_token with orth=orth;beg=i;len=Xlist.size letters * factor;next=i+Xlist.size letters * factor} in | ||
| 376 | - if all_capital letters then Token{t with token=StringMap.find suffix_lemmata (merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs} | ||
| 377 | - else if all_small letters then Token{t with token=StringMap.find suffix_lemmata orth} | ||
| 378 | - else raise Not_found | ||
| 379 | - | ||
| 380 | -let recognize_romandig poss_s_beg i letters = | ||
| 381 | - let roman,w = recognize_roman_M 0 letters in | ||
| 382 | - if roman > 0 then | ||
| 383 | - let letters,w = if w then let l = List.rev letters in List.rev (List.tl l), [List.hd l] else letters,[] in | ||
| 384 | - let orth = merge letters in | ||
| 385 | - let roman = string_of_int roman in | ||
| 386 | - let t = {empty_token with orth=orth;beg=i;len=Xlist.size letters * factor;next=i+Xlist.size letters * factor} in | ||
| 387 | - let w = if w = [] then [] else | ||
| 388 | - let beg = i + Xlist.size letters * factor in | ||
| 389 | - [Variant[Token{empty_token with orth=merge w; beg=beg; len=factor; next=beg+factor; token=SmallLetter(merge w)}; | ||
| 390 | - Token{empty_token with orth=merge w; beg=beg; len=factor; next=beg+factor; token=make_lemma ("wiek","subst:sg:_:m3")}]] in | ||
| 391 | - if StringSet.mem romanmonths orth then [ | ||
| 392 | - Seq(Token{t with token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}::w); | ||
| 393 | - Seq(Token{t with token=RomanDig(roman,"month"); attrs="maybe cs" :: t.attrs}::w)] @ | ||
| 394 | - (if poss_s_beg then [ | ||
| 395 | - Seq([s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}]@w); | ||
| 396 | - Seq([s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=RomanDig(roman,"month"); attrs="maybe cs" :: t.attrs}]@w); | ||
| 397 | - ] else []) | ||
| 398 | - else [ | ||
| 399 | - Seq(Token{t with token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}::w)] @ | ||
| 400 | - (if poss_s_beg then [ | ||
| 401 | - Seq([s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}]@w); | ||
| 402 | - ] else []) | ||
| 403 | - else [] | ||
| 404 | - | ||
| 405 | -let sufixes1 = Xlist.map [ | ||
| 406 | - ["m"]; | ||
| 407 | - ["e"; "m"]; | ||
| 408 | - ["ś"]; | ||
| 409 | - ["e"; "ś"]; | ||
| 410 | - ["ś"; "m"; "y"]; | ||
| 411 | - ["e"; "ś"; "m"; "y"]; | ||
| 412 | - ["ś"; "c"; "i"; "e"]; | ||
| 413 | - ["e"; "ś"; "c"; "i"; "e"]; | ||
| 414 | - ] List.rev | ||
| 415 | - | ||
| 416 | -let sufixes2 = Xlist.map [ | ||
| 417 | - ["b"; "y"]; | ||
| 418 | - ] List.rev | ||
| 419 | - | ||
| 420 | -let rec find_suffix rev = function | ||
| 421 | - _, [] -> raise Not_found | ||
| 422 | - | [], l -> rev, l | ||
| 423 | - | s :: pat, Capital(c,t) :: l -> if s = t then find_suffix (Capital(c,t) :: rev) (pat,l) else raise Not_found | ||
| 424 | - | s :: pat, Small t :: l -> if s = t then find_suffix (Small t :: rev) (pat,l) else raise Not_found | ||
| 425 | - | _,_ -> raise Not_found | ||
| 426 | - | ||
| 427 | -let find_suffixes2 sufixes letters sufs = | ||
| 428 | - Xlist.fold sufixes [] (fun l suf -> | ||
| 429 | - try | ||
| 430 | - let suf,rev_stem = find_suffix [] (suf,letters) in | ||
| 431 | - (rev_stem,suf :: sufs) :: l | ||
| 432 | - with Not_found -> l) | ||
| 433 | - | ||
| 434 | -let find_suffixes i letters = | ||
| 435 | - let letters = List.rev letters in | ||
| 436 | - let l = (letters,[]) :: find_suffixes2 sufixes1 letters [] in | ||
| 437 | - let l = Xlist.fold l l (fun l (letters,sufs) -> | ||
| 438 | - (find_suffixes2 sufixes2 letters sufs) @ l) in | ||
| 439 | - Xlist.map l (fun (rev_stem, sufs) -> | ||
| 440 | - List.rev (fst (Xlist.fold (List.rev rev_stem :: sufs) ([],i) (fun (seq,i) letters -> | ||
| 441 | - (letters,i) :: seq, i + factor * Xlist.size letters)))) | ||
| 442 | - | ||
| 443 | -let merge_letters poss_s_beg i letters = | ||
| 444 | - let l = find_suffixes i letters in | ||
| 445 | - let roman = recognize_romandig poss_s_beg i letters in | ||
| 446 | - let variants = Xlist.fold l roman (fun variants -> function | ||
| 447 | - [] -> failwith "merge_letters" | ||
| 448 | - | [stem,i] -> (recognize_stem poss_s_beg false i stem) :: variants | ||
| 449 | - | (stem,i) :: suffixes -> | ||
| 450 | - (try (Seq((recognize_stem poss_s_beg true i stem) :: Xlist.map suffixes (fun (suf,i) -> recognize_suffix i suf))) :: variants | ||
| 451 | - with Not_found -> variants)) in | ||
| 452 | - Variant variants | ||
| 453 | - | ||
| 454 | -let rec group_digits rev = function | ||
| 455 | - [] -> List.rev rev, [] | ||
| 456 | - | Digit s :: l -> group_digits (s :: rev) l | ||
| 457 | - | x :: l -> List.rev rev, x :: l | ||
| 458 | - | ||
| 459 | -let rec group_letters rev = function | ||
| 460 | - [] -> List.rev rev, [] | ||
| 461 | - | Capital(s,t) :: l -> group_letters ((Capital(s,t)) :: rev) l | ||
| 462 | - | ForeignCapital(s,t) :: l -> group_letters ((ForeignCapital(s,t)) :: rev) l | ||
| 463 | - | Small s :: l -> group_letters ((Small s) :: rev) l | ||
| 464 | - | ForeignSmall s :: l -> group_letters ((ForeignSmall s) :: rev) l | ||
| 465 | - | x :: l -> List.rev rev, x :: l | ||
| 466 | - | ||
| 467 | -let rec group_others rev = function | ||
| 468 | - [] -> List.rev rev, [] | ||
| 469 | - | Other(s,_) :: l -> group_others (s :: rev) l | ||
| 470 | - | x :: l -> List.rev rev, x :: l | ||
| 471 | - | ||
| 472 | -let create_sign_token poss_s_beg i signs l token = | ||
| 473 | - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in | ||
| 474 | - let len = Xlist.size signs * factor in | ||
| 475 | - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=token; attrs=["maybe cs"]},i+len,l,poss_s_beg | ||
| 476 | - | ||
| 477 | -let create_empty_sign_token i signs = | ||
| 478 | - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in | ||
| 479 | - let len = Xlist.size signs * factor in | ||
| 480 | - {empty_token with orth=orth;beg=i;len=len;next=i+len; attrs=["maybe cs"]},i+len | ||
| 481 | - | ||
| 482 | -let create_sentence_seq i signs l lemma = | ||
| 483 | - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in | ||
| 484 | - let len = Xlist.size signs * factor in | ||
| 485 | - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"}; | ||
| 486 | - Token{empty_token with orth=orth;beg=i+20;len=len-30;next=i+len-10;token=make_lemma (lemma,"sinterj")}; | ||
| 487 | - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}] | ||
| 488 | - | ||
| 489 | -let create_sentence_seq_hapl i signs l lemma = | ||
| 490 | - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in | ||
| 491 | - let len = Xlist.size signs * factor in | ||
| 492 | - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]}; | ||
| 493 | - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</clause>"}; | ||
| 494 | - Token{empty_token with orth=orth;beg=i+20;len=len-30;next=i+len-10;token=make_lemma (lemma,"sinterj")}; | ||
| 495 | - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}] | ||
| 496 | - | ||
| 497 | -let create_sentence_seq_q i signs l lemma = | ||
| 498 | - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in | ||
| 499 | - let len = Xlist.size signs * factor in | ||
| 500 | - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "?"}; | ||
| 501 | - Token{empty_token with beg=i+20;len=10;next=i+30;token=Interp "</clause>"}; | ||
| 502 | - Token{empty_token with orth=orth;beg=i+30;len=len-40;next=i+len-10;token=make_lemma (lemma,"sinterj")}; | ||
| 503 | - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}] | ||
| 504 | - | ||
| 505 | -let create_sentence_seq_hapl_q i signs l lemma = | ||
| 506 | - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in | ||
| 507 | - let len = Xlist.size signs * factor in | ||
| 508 | - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]}; | ||
| 509 | - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "?"}; | ||
| 510 | - Token{empty_token with beg=i+20;len=10;next=i+30;token=Interp "</clause>"}; | ||
| 511 | - Token{empty_token with orth=orth;beg=i+30;len=len-40;next=i+len-10;token=make_lemma (lemma,"sinterj")}; | ||
| 512 | - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}] | ||
| 513 | - | ||
| 514 | -let create_or_beg i signs l poss_s_beg = | ||
| 515 | - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in | ||
| 516 | - let len = Xlist.size signs * factor in | ||
| 517 | - Variant[ | ||
| 518 | - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Symbol "-"; attrs=["maybe cs"]}; | ||
| 519 | - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Interp "-"; attrs=["maybe cs"]}; (* hyphen *) | ||
| 520 | - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Interp "<or>"}; | ||
| 521 | - (* Seq[Token{empty_token with beg=i; len=20; next=i+20; token=Interp "<sentence>"}; | ||
| 522 | - Token{empty_token with orth=orth;beg=i+20; len=len-20;next=i+len; token=Interp "<or>"}]; *) | ||
| 523 | - Seq[Token{empty_token with beg=i; len=21; next=i+21; token=Interp "</clause>"}; | ||
| 524 | - Token{empty_token with beg=i+21; len=20; next=i+41; token=Interp "</sentence>"}; | ||
| 525 | - Token{empty_token with orth=orth;beg=i+41; len=len-59;next=i+len-20;token=Interp "</or>"}; | ||
| 526 | - Token{empty_token with beg=i+len-20;len=20; next=i+len; token=Interp "<clause>"}]; | ||
| 527 | - Seq[Token{empty_token with orth=orth;beg=i; len=len-22;next=i+len-22;token=Interp "</or>"}; | ||
| 528 | - Token{empty_token with beg=i+len-22;len=22; next=i+len; token=Interp "<clause>"}]; | ||
| 529 | - ],i+len,l,poss_s_beg | ||
| 530 | - | ||
| 531 | -let create_or_beg2 i signs l poss_s_beg = | ||
| 532 | - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in | ||
| 533 | - let len = Xlist.size signs * factor in | ||
| 534 | - Variant[ | ||
| 535 | - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Interp "<or>"}; | ||
| 536 | - (* Seq[Token{empty_token with beg=i; len=20; next=i+20; token=Interp "<sentence>"}; | ||
| 537 | - Token{empty_token with orth=orth;beg=i+20; len=len-20;next=i+len; token=Interp "<or>"}]; *) | ||
| 538 | - Seq[Token{empty_token with beg=i; len=21; next=i+21; token=Interp "</clause>"}; | ||
| 539 | - Token{empty_token with beg=i+21; len=20; next=i+41; token=Interp "</sentence>"}; | ||
| 540 | - Token{empty_token with orth=orth;beg=i+41; len=len-59;next=i+len-20;token=Interp "</or>"}; | ||
| 541 | - Token{empty_token with beg=i+len-20;len=20; next=i+len; token=Interp "<clause>"}]; | ||
| 542 | - Seq[Token{empty_token with orth=orth;beg=i; len=len-22;next=i+len-22;token=Interp "</or>"}; | ||
| 543 | - Token{empty_token with beg=i+len-22;len=22; next=i+len; token=Interp "<clause>"}]; | ||
| 544 | - ],i+len,l,poss_s_beg | ||
| 545 | - | ||
| 546 | -let is_dot_sentence_end_marker = function | ||
| 547 | - [] -> true | ||
| 548 | - | [Sign " "] -> true | ||
| 549 | - | [Sign ""] -> true | ||
| 550 | - | [Sign " "] -> true | ||
| 551 | - | [Sign "\""] -> true | ||
| 552 | - | [Sign "»"] -> true | ||
| 553 | - | [Sign "”"] -> true | ||
| 554 | - | _ -> false | ||
| 555 | - | ||
| 556 | -let not_dot_sentence_end_marker = function | ||
| 557 | - Sign " " :: Small _ :: _ -> true | ||
| 558 | - | Sign "" :: Small _ :: _ -> true | ||
| 559 | - | Sign " " :: Small _ :: _ -> true | ||
| 560 | - | Sign "," :: _ -> true | ||
| 561 | - | Sign ":" :: _ -> true | ||
| 562 | - | Sign "?" :: _ -> true | ||
| 563 | - | Sign "!" :: _ -> true | ||
| 564 | - | Small _ :: _ -> true | ||
| 565 | - | ForeignSmall _ :: _ -> true | ||
| 566 | - | Capital _ :: _ -> true | ||
| 567 | - | ForeignCapital _ :: _ -> true | ||
| 568 | - | Digit _ :: _ -> true | ||
| 569 | - | _ -> false | ||
| 570 | - | ||
| 571 | -let is_comma_digit_marker = function | ||
| 572 | - Digit _ :: l -> true | ||
| 573 | - | _ -> false | ||
| 574 | - | ||
| 575 | -let is_colon_sentence_end_marker = function | ||
| 576 | - [] -> true | ||
| 577 | - | [Sign " "] -> true | ||
| 578 | - | [Sign ""] -> true | ||
| 579 | - | [Sign " "] -> true | ||
| 580 | - | _ -> false | ||
| 581 | - | ||
| 582 | -let is_colon_symbol = function | ||
| 583 | - Digit _ :: _ -> true | ||
| 584 | - | Sign "/" :: _ -> true | ||
| 585 | - | _ -> false | ||
| 586 | - | ||
| 587 | -let is_multidot_sentence_end_marker = function | ||
| 588 | - [] -> true | ||
| 589 | - | [Sign " "] -> true | ||
| 590 | - | [Sign ""] -> true | ||
| 591 | - | [Sign " "] -> true | ||
| 592 | - | [Sign "\""] -> true | ||
| 593 | - | [Sign "»"] -> true | ||
| 594 | - | [Sign "”"] -> true | ||
| 595 | -(* | "\"" :: l -> true | ||
| 596 | - | "»" :: l -> true | ||
| 597 | - | "”" :: l -> true | ||
| 598 | - | "“" :: l -> true | ||
| 599 | - | " " :: "-" :: l -> true | ||
| 600 | - | " " :: "–" :: l -> true | ||
| 601 | - | " " :: "—" :: l -> true | ||
| 602 | - | ")" :: l -> true | ||
| 603 | - | "]" :: l -> true*) | ||
| 604 | - | _ -> false | ||
| 605 | - | ||
| 606 | -let create_quot_digit_token i signs l = | ||
| 607 | - let t,i2 = create_empty_sign_token i signs in | ||
| 608 | - Variant[ | ||
| 609 | - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"}; | ||
| 610 | - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}; | ||
| 611 | - Token{t with beg=t.beg+factor; next=t.next+factor;token=Interp "”s"}]; | ||
| 612 | - Seq[Token{t with token=Interp "”"}; | ||
| 613 | - Token{empty_token with beg=i2;len=20;next=i2+20;token=Interp "</clause>"}; | ||
| 614 | - Token{empty_token with orth=".";beg=i2+20;len=factor-20;next=i2+factor;token=Interp "</sentence>"}]; | ||
| 615 | - ],i2+factor,l,true | ||
| 616 | - | ||
| 617 | -let rec recognize_sign_group poss_s_beg i = function | ||
| 618 | - | (Sign " ") :: l -> create_sign_token poss_s_beg i [Sign " "] l (Symbol " ") | ||
| 619 | - | (Sign "") :: l -> create_sign_token poss_s_beg i [Sign ""] l (Symbol " ") | ||
| 620 | - | (Sign " ") :: l -> create_sign_token poss_s_beg i [Sign " "] l (Symbol " ") | ||
| 621 | - | (Sign "\"") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "\""] l | ||
| 622 | - | (Sign "\"") :: l -> | ||
| 623 | - let t,i = create_empty_sign_token i [Sign "\""] in | ||
| 624 | - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"};Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg | ||
| 625 | - | (Sign "˝") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "˝"] l | ||
| 626 | - | (Sign "˝") :: l -> | ||
| 627 | - let t,i = create_empty_sign_token i [Sign "˝"] in | ||
| 628 | - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"};Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg | ||
| 629 | - | (Sign "„") :: l -> | ||
| 630 | - let t,i = create_empty_sign_token i [Sign "„"] in | ||
| 631 | - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"}],i,l,poss_s_beg | ||
| 632 | - | (Sign "”") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "”"] l | ||
| 633 | - | (Sign "”") :: l -> | ||
| 634 | - let t,i = create_empty_sign_token i [Sign "”"] in | ||
| 635 | - Variant[Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg | ||
| 636 | - | (Sign "“") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "“"] l | ||
| 637 | - | (Sign "“") :: l -> | ||
| 638 | - let t,i = create_empty_sign_token i [Sign "“"] in | ||
| 639 | - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"};Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg | ||
| 640 | - | (Sign ",") :: (Sign ",") :: l -> | ||
| 641 | - let t,i = create_empty_sign_token i [Sign ",";Sign ","] in | ||
| 642 | - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"}],i,l,poss_s_beg | ||
| 643 | - | (Sign ",") :: l -> | ||
| 644 | - let t,i2 = create_empty_sign_token i [Sign ","] in | ||
| 645 | - if is_comma_digit_marker l then | ||
| 646 | - Token{t with token=Symbol ","},i2,l,false | ||
| 647 | - else | ||
| 648 | - Variant[Token{t with token=Interp ","}; | ||
| 649 | - Seq[Token{empty_token with orth=",";beg=i;len=factor/2;next=i+factor/2;token=Interp "</clause>"}; | ||
| 650 | - Token{empty_token with beg=i+factor/2;len=factor-(factor/2);next=i+factor;token=Interp "<clause>"}]],i2,l,false | ||
| 651 | - | (Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: []) l (make_lemma ("(…)","sinterj")) | ||
| 652 | - | (Sign "(") :: (Sign "?") :: (Sign "!") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "?") :: (Sign "!") :: (Sign ")") :: []) l (make_lemma ("(?!)","sinterj")) | ||
| 653 | - | (Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: []) l (make_lemma ("(…)","sinterj")) | ||
| 654 | - | (Sign "(") :: (Sign "+") :: (Sign "+") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "+") :: (Sign "+") :: (Sign ")") :: []) l (make_lemma ("(++)","sinterj")) | ||
| 655 | - | (Sign "(") :: (Sign "-") :: (Sign "-") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "-") :: (Sign "-") :: (Sign ")") :: []) l (make_lemma ("(--)","symbol")) | ||
| 656 | - | (Sign "(") :: (Sign "…") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "…") :: (Sign ")") :: []) l (make_lemma ("(…)","sinterj")) | ||
| 657 | - | (Sign "(") :: (Sign "?") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "?") :: (Sign ")") :: []) l (make_lemma ("(?)","sinterj")) | ||
| 658 | - | (Sign "(") :: (Sign "+") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "+") :: (Sign ")") :: []) l (make_lemma ("(+)","symbol")) | ||
| 659 | - | (Sign "(") :: (Sign "!") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "!") :: (Sign ")") :: []) l (make_lemma ("(!)","sinterj")) | ||
| 660 | - | (Sign "(") :: (Sign "-") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "-") :: (Sign ")") :: []) l (make_lemma ("(-)","symbol")) | ||
| 661 | - | (Sign "(") :: (Sign "*") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "*") :: (Sign ")") :: []) l (make_lemma ("(*)","symbol")) | ||
| 662 | - | (Sign "(") :: l -> create_sign_token poss_s_beg i [Sign "("] l (Interp "(") | ||
| 663 | - | (Sign ":") :: (Sign "(") :: (Sign "(") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "(") :: (Sign "(") :: (Sign "(") :: []) l (make_lemma (":(((","sinterj")) | ||
| 664 | - | (Sign ":") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "(") :: []) l (make_lemma (":(","sinterj")) | ||
| 665 | - | (Sign ":") :: (Sign "-") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign "(") :: []) l (make_lemma (":-(","sinterj")) | ||
| 666 | - | (Sign ";") :: (Sign "(") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "(") :: (Sign "(") :: []) l (make_lemma (";((","sinterj")) | ||
| 667 | - | (Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";-))))","sinterj")) | ||
| 668 | - | (Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":-))))","sinterj")) | ||
| 669 | - | (Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":-)))","sinterj")) | ||
| 670 | - | (Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";-)))","sinterj")) | ||
| 671 | - | (Sign ";") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";)))","sinterj")) | ||
| 672 | - | (Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":-))","sinterj")) | ||
| 673 | - | (Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";-))","sinterj")) | ||
| 674 | - | (Sign ":") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":)))","sinterj")) | ||
| 675 | - | (Sign ":") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":))","sinterj")) | ||
| 676 | - | (Sign ";") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";))","sinterj")) | ||
| 677 | - | (Sign ";") :: (Sign "-") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: []) l (make_lemma (";-)","sinterj")) | ||
| 678 | - | (Sign ":") :: (Sign "|") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "|") :: []) l (make_lemma (":|","sinterj")) | ||
| 679 | - | (Sign ":") :: (Sign "\\") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "\\") :: []) l (make_lemma (":\\","sinterj")) | ||
| 680 | - | (Sign ":") :: (Sign "-") :: (Sign "/") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign "/") :: []) l (make_lemma (":-/","sinterj")) | ||
| 681 | - | (Sign ":") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign ")") :: []) l (make_lemma (":)","sinterj")) | ||
| 682 | - | (Sign ";") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign ")") :: []) l (make_lemma (";)","sinterj")) | ||
| 683 | - | (Sign ")") :: l -> create_sign_token poss_s_beg i [Sign ")"] l (Interp ")") | ||
| 684 | - | (Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: []) l (make_lemma ("(…)","sinterj")) | ||
| 685 | - | (Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: []) l (make_lemma ("(…)","sinterj")) | ||
| 686 | - | (Sign "[") :: (Sign "+") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign "+") :: (Sign "]") :: []) l (make_lemma ("[+]","symbol")) | ||
| 687 | - | (Sign "[") :: (Sign "-") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign "-") :: (Sign "]") :: []) l (make_lemma ("[-]","symbol")) | ||
| 688 | - | (Sign "[") :: (Sign "?") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign "?") :: (Sign "]") :: []) l (make_lemma ("[?]","sinterj")) | ||
| 689 | - | (Sign ":") :: (Sign "]") :: l -> | ||
| 690 | - let t,i2 = create_empty_sign_token i [Sign ":";Sign "]"] in | ||
| 691 | - Variant[Token{t with token=make_lemma (":]","sinterj")}; | ||
| 692 | - Seq[Token{empty_token with orth=":";beg=i;len=factor;next=i+factor;token=Interp ":"; attrs=["maybe cs"]}; | ||
| 693 | - Token{empty_token with orth="]";beg=i+factor;len=factor;next=i+2*factor;token=Interp "]"; attrs=["maybe cs"]}]],i2,l,false | ||
| 694 | - | (Sign ";") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "]") :: []) l (make_lemma (";]","sinterj")) | ||
| 695 | - | (Sign "]") :: l -> create_sign_token poss_s_beg i [Sign "]"] l (Interp "]") | ||
| 696 | - | (Sign "[") :: l -> create_sign_token poss_s_beg i [Sign "["] l (Interp "[") | ||
| 697 | - | (Sign ":") :: l -> | ||
| 698 | - if is_colon_symbol l then | ||
| 699 | - Token{empty_token with orth=":";beg=i;len=factor;next=i+factor;token=Symbol ":"; attrs=["maybe cs"]},i+factor,l,false | ||
| 700 | - else | ||
| 701 | - Variant[ | ||
| 702 | - Seq[Token{empty_token with beg=i;len=11;next=i+11;token=Interp "</clause>"}; (* wyliczenie*) | ||
| 703 | - Token{empty_token with orth=":";beg=i+11;len=factor-11;next=i+factor;token=Interp "<clause>"}]; | ||
| 704 | - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"}; | ||
| 705 | - Token{empty_token with orth=":";beg=i+10;len=factor-30;next=i+factor-20;token=Interp ":"}; (* mowa zależna, koniec zdania *) | ||
| 706 | - Token{empty_token with beg=i+factor-20;len=20;next=i+factor;token=Interp "</sentence>"}]; | ||
| 707 | - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"}; | ||
| 708 | - Token{empty_token with orth=":";beg=i+10;len=factor-40;next=i+factor-30;token=Interp ":"}; (* po ':' zdanie z małej litery *) | ||
| 709 | - Token{empty_token with beg=i+factor-30;len=10;next=i+factor-20;token=Interp "</sentence>"}; | ||
| 710 | - Token{empty_token with beg=i+factor-20;len=10;next=i+factor-10;token=Interp "<sentence>"}; | ||
| 711 | - Token{empty_token with beg=i+factor-10;len=10;next=i+factor;token=Interp "<clause>"}]; | ||
| 712 | - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"}; | ||
| 713 | - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</sentence>"}; | ||
| 714 | - Token{empty_token with orth=":";beg=i+20;len=factor-20;next=i+factor;token=Interp ":s"}]; (* speaker *) | ||
| 715 | - ],i+factor,l,true | ||
| 716 | -(* if is_colon_sentence_end_marker l then | ||
| 717 | - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"}; | ||
| 718 | - Token{empty_token with orth=":";beg=i+10;len=10;next=i+20;token=Interp ":"}; | ||
| 719 | - Token{empty_token with beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}],i+factor,l,true | ||
| 720 | - else | ||
| 721 | - else | ||
| 722 | - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"}; | ||
| 723 | - Token{empty_token with orth=":";beg=i+10;len=10;next=i+20;token=Interp ""}; | ||
| 724 | - Token{empty_token with beg=i+20;len=factor-20;next=i+factor;token=Interp "<clause>"}],i+factor,l,false*) | ||
| 725 | - | (Sign "'") :: (Sign "'") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "'";Sign "'"] l | ||
| 726 | - | (Sign "'") :: (Sign "'") :: l -> | ||
| 727 | - let t,i = create_empty_sign_token i [Sign "”"] in | ||
| 728 | - Variant[Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg | ||
| 729 | - | (Sign "'") :: l -> create_sign_token poss_s_beg i [Sign "'"] l (Symbol "’") | ||
| 730 | - | (Sign "’") :: (Sign "’") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "’";Sign "’"] l | ||
| 731 | - | (Sign "’") :: (Sign "’") :: l -> | ||
| 732 | - let t,i = create_empty_sign_token i [Sign "”"] in | ||
| 733 | - Variant[Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg | ||
| 734 | - | (Sign "’") :: l -> create_sign_token poss_s_beg i [Sign "’"] l (Symbol "’") | ||
| 735 | - | (Sign ";") :: (Sign "*") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "*") :: []) l (make_lemma (";*","sinterj")) | ||
| 736 | - | (Sign ";") :: l -> | ||
| 737 | - Variant[Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"}; | ||
| 738 | - Token{empty_token with orth=";";beg=i+20;len=20;next=i+40;token=Interp "</sentence>"}; | ||
| 739 | - Token{empty_token with beg=i+40;len=20;next=i+60;token=Interp "<sentence>"}; | ||
| 740 | - Token{empty_token with beg=i+60;len=factor-60;next=i+factor;token=Interp "<clause>"}]; | ||
| 741 | - Token{empty_token with orth=";";beg=i;len=factor;next=i+factor;token=Interp ";"; attrs=["maybe cs"]}],i+factor,l,false | ||
| 742 | - | (Sign "?") :: (Sign "!") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> | ||
| 743 | - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "?!...",i+5*factor,l,true | ||
| 744 | - | (Sign "?") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> | ||
| 745 | - create_sentence_seq_q i ((Sign "?") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "?...",i+4*factor,l,true | ||
| 746 | - | (Sign "?") :: (Sign "?") :: (Sign "?") :: (Sign "?") :: l -> | ||
| 747 | - create_sentence_seq_q i ((Sign "?") :: (Sign "?") :: (Sign "?") :: (Sign "?") :: []) l "????",i+4*factor,l,true | ||
| 748 | - | (Sign "?") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: l -> | ||
| 749 | - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: []) l "?!!!",i+4*factor,l,true | ||
| 750 | - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "?") :: l -> | ||
| 751 | - Variant[create_sentence_seq_hapl_q i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "?") :: []) l "…?"; | ||
| 752 | - create_sentence_seq_q i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "?") :: []) l "…?"],i+4*factor,l,true | ||
| 753 | - | (Sign "?") :: (Sign "!") :: (Sign "?") :: l -> | ||
| 754 | - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: (Sign "?") :: []) l "?!?",i+3*factor,l,true | ||
| 755 | - | (Sign "?") :: (Sign "?") :: (Sign "?") :: l -> | ||
| 756 | - create_sentence_seq_q i ((Sign "?") :: (Sign "?") :: (Sign "?") :: []) l "???",i+3*factor,l,true | ||
| 757 | - | (Sign "?") :: (Sign "!") :: l -> | ||
| 758 | - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: []) l "?!",i+2*factor,l,true | ||
| 759 | - | (Sign "?") :: (Sign "?") :: l -> | ||
| 760 | - create_sentence_seq_q i ((Sign "?") :: (Sign "?") :: []) l "??",i+2*factor,l,true | ||
| 761 | -(* | (Sign "?") :: (Sign ".") :: l -> *) | ||
| 762 | - | (Sign "!") :: (Sign "?") :: l -> | ||
| 763 | - create_sentence_seq_q i ((Sign "!") :: (Sign "?") :: []) l "!?",i+2*factor,l,true | ||
| 764 | - | (Sign "?") :: (Sign "…") :: l -> | ||
| 765 | - create_sentence_seq_q i ((Sign "?") :: (Sign "…") :: []) l "?…",i+2*factor,l,true | ||
| 766 | - | (Sign "…") :: (Sign "?") :: l -> | ||
| 767 | - Variant[create_sentence_seq_hapl_q i ((Sign "…") :: (Sign "?") :: []) l "…?"; | ||
| 768 | - create_sentence_seq_q i ((Sign "…") :: (Sign "?") :: []) l "…?"],i+2*factor,l,true | ||
| 769 | - | (Sign "?") :: l -> | ||
| 770 | - create_sentence_seq_q i ((Sign "?") :: []) l "?",i+factor,l,true | ||
| 771 | - | (Sign "!") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: l -> | ||
| 772 | - create_sentence_seq i ((Sign "!") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: []) l "!!!!",i+4*factor,l,true | ||
| 773 | - | (Sign "!") :: (Sign "!") :: (Sign "!") :: l -> | ||
| 774 | - create_sentence_seq i ((Sign "!") :: (Sign "!") :: (Sign "!") :: []) l "!!!",i+3*factor,l,true | ||
| 775 | - | (Sign "!") :: (Sign "!") :: l -> | ||
| 776 | - create_sentence_seq i ((Sign "!") :: (Sign "!") :: []) l "!!",i+2*factor,l,true | ||
| 777 | - | (Sign "!") :: l -> | ||
| 778 | - create_sentence_seq i ((Sign "!") :: []) l "!",i+factor,l,true | ||
| 779 | - | (Sign "…") :: l -> | ||
| 780 | - if is_multidot_sentence_end_marker l then | ||
| 781 | - Variant[create_sentence_seq_hapl i ((Sign "…") :: []) l "…"; | ||
| 782 | - create_sentence_seq i ((Sign "…") :: []) l "…"],i+factor,l,true | ||
| 783 | - else | ||
| 784 | - Variant[create_sentence_seq_hapl i ((Sign "…") :: []) l "…"; | ||
| 785 | - create_sentence_seq i ((Sign "…") :: []) l "…"; | ||
| 786 | - Token{empty_token with orth="…";beg=i;len=factor;next=i+factor;token=make_lemma ("…","sinterj"); attrs=["maybe cs"]}],i+factor,l,true | ||
| 787 | - | (Sign "/") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "/") :: l -> create_sign_token poss_s_beg i ((Sign "/") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "/") :: []) l (make_lemma ("(…)","sinterj")) | ||
| 788 | - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> (* Różne natęrzenia wielokropka i wypunktowania *) | ||
| 789 | - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"; | ||
| 790 | - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+8*factor,l,true | ||
| 791 | - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> | ||
| 792 | - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"; | ||
| 793 | - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+7*factor,l,true | ||
| 794 | - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> | ||
| 795 | - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"; | ||
| 796 | - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+6*factor,l,true | ||
| 797 | - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> | ||
| 798 | - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"; | ||
| 799 | - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+5*factor,l,true | ||
| 800 | - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> | ||
| 801 | - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"; | ||
| 802 | - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+4*factor,l,true | ||
| 803 | - | (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> | ||
| 804 | - if is_multidot_sentence_end_marker l then | ||
| 805 | - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"; | ||
| 806 | - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"],i+3*factor,l,true | ||
| 807 | - else | ||
| 808 | - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"; | ||
| 809 | - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"; | ||
| 810 | - Token{empty_token with orth="...";beg=i;len=3*factor;next=i+3*factor;token=make_lemma ("…","sinterj"); attrs=["maybe cs"]}],i+3*factor,l,true | ||
| 811 | - | (Sign ".") :: (Sign ".") :: l -> | ||
| 812 | - if is_multidot_sentence_end_marker l then | ||
| 813 | - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: []) l "…"; | ||
| 814 | - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"],i+2*factor,l,true | ||
| 815 | - else | ||
| 816 | - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: []) l "…"; | ||
| 817 | - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"; | ||
| 818 | - Token{empty_token with orth="..";beg=i;len=2*factor;next=i+2*factor;token=make_lemma ("…","sinterj"); attrs=["maybe cs"]}],i+2*factor,l,true | ||
| 819 | - | (Sign ".") :: l -> | ||
| 820 | - if is_dot_sentence_end_marker l then | ||
| 821 | - Variant[Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]}; | ||
| 822 | - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</clause>"}; | ||
| 823 | - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}]; | ||
| 824 | - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"}; | ||
| 825 | - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}]],i+factor,l,true | ||
| 826 | - else if not_dot_sentence_end_marker l then | ||
| 827 | - Token{empty_token with orth=".";beg=i;len=factor;next=i+factor;token=Symbol "."; attrs=["maybe cs"]},i+factor,l,false | ||
| 828 | - else | ||
| 829 | - Variant[Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]}; | ||
| 830 | - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</clause>"}; | ||
| 831 | - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}]; | ||
| 832 | - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"}; | ||
| 833 | - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}]; | ||
| 834 | - Token{empty_token with orth=".";beg=i;len=factor;next=i+factor;token=Symbol "."; attrs=["maybe cs"]}],i+factor,l,true | ||
| 835 | - | (Sign "*") :: (Sign "*") :: (Sign "*") :: (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*";Sign "*";Sign "*";Sign "*"] l (Interp "*****") (* zastępniki liter *) | ||
| 836 | - | (Sign "*") :: (Sign "*") :: (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*";Sign "*";Sign "*"] l (Interp "****") | ||
| 837 | - | (Sign "*") :: (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*";Sign "*"] l (Interp "***") | ||
| 838 | - | (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*"] l (Interp "**") | ||
| 839 | - | (Sign "*") :: l -> (* Interp zastępnik liter i cudzysłów, symbol listy *) | ||
| 840 | - let t,i = create_empty_sign_token i [Sign "*"] in | ||
| 841 | - Variant[Token{t with token=Interp "*"};Token{t with token=Symbol "*"}],i,l,poss_s_beg | ||
| 842 | - | (Sign "+") :: l -> create_sign_token poss_s_beg i [Sign "+"] l (Symbol "+") | ||
| 843 | - | (Sign "«") :: l -> | ||
| 844 | - let t,i = create_empty_sign_token i [Sign "«"] in | ||
| 845 | - Variant[Token{t with token=Interp "«"};Token{t with token=Interp "«s"}],i,l,poss_s_beg | ||
| 846 | - | (Sign "»") :: l -> | ||
| 847 | - let t,i = create_empty_sign_token i [Sign "»"] in | ||
| 848 | - Variant[Token{t with token=Interp "»"};Token{t with token=Interp "»s"}],i,l,poss_s_beg | ||
| 849 | - | (Sign "<") :: (Sign "<") :: l -> create_sign_token poss_s_beg i [Sign "<";Sign "<"] l (Interp "«") (* prawy cudzysłów *) | ||
| 850 | - | (Sign "<") :: l -> (* prawy cudzysłów i element wzoru matematycznego *) | ||
| 851 | - let t,i = create_empty_sign_token i [Sign "<"] in | ||
| 852 | - Variant[Token{t with token=Interp "«"};Token{t with token=Symbol "<"}],i,l,poss_s_beg | ||
| 853 | - | (Sign ">") :: (Sign ">") :: l -> create_sign_token poss_s_beg i [Sign ">";Sign ">"] l (Interp "»") (* lewy cudzysłów *) | ||
| 854 | - | (Sign ">") :: l -> create_sign_token poss_s_beg i [Sign ">"] l (Symbol ">") | ||
| 855 | - | (Sign "-") :: (Sign "-") :: (Sign "-") :: l -> create_or_beg2 i [Sign "-";Sign "-";Sign "-"] l poss_s_beg | ||
| 856 | - | (Sign "-") :: (Sign "-") :: l -> create_or_beg2 i [Sign "-";Sign "-"] l poss_s_beg | ||
| 857 | - | (Sign "-") :: l -> create_or_beg i [Sign "-"] l poss_s_beg | ||
| 858 | - | (Sign "‐") :: l -> create_or_beg i [Sign "‐"] l poss_s_beg | ||
| 859 | - | (Sign "‑") :: l -> create_or_beg i [Sign "‑"] l poss_s_beg | ||
| 860 | - | (Sign "‒") :: l -> create_or_beg i [Sign "‒"] l poss_s_beg | ||
| 861 | - | (Sign "−") :: l -> create_or_beg i [Sign "−"] l poss_s_beg | ||
| 862 | - | (Sign "–") :: l -> create_or_beg i [Sign "–"] l poss_s_beg | ||
| 863 | - | (Sign "—") :: l -> create_or_beg i [Sign "—"] l poss_s_beg | ||
| 864 | - | (Sign "‘") :: l -> create_sign_token poss_s_beg i [Sign "‘"] l (Interp "‘") | ||
| 865 | - | (Sign "´") :: l -> create_sign_token poss_s_beg i [Sign "´"] l (Symbol "’") | ||
| 866 | - | (Sign "`") :: (Sign "`") :: l -> | ||
| 867 | - let t,i = create_empty_sign_token i [Sign "`";Sign "`"] in | ||
| 868 | - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"}],i,l,poss_s_beg | ||
| 869 | - | (Sign "`") :: l -> create_sign_token poss_s_beg i [Sign "`"] l (Symbol "’") | ||
| 870 | - | (Sign "·") :: l -> create_sign_token poss_s_beg i [Sign "·"] l (Interp "·") | ||
| 871 | - | (Sign "•") :: l -> create_sign_token poss_s_beg i [Sign "•"] l (Interp "•") | ||
| 872 | - | (Sign "¨") :: l -> create_sign_token poss_s_beg i [Sign "¨"] l (Interp "¨") | ||
| 873 | - | (Sign "~") :: l -> | ||
| 874 | - let t,i = create_empty_sign_token i [Sign "~"] in | ||
| 875 | - Variant[Token{t with token=Symbol "~"};Token{t with token=make_lemma ("około","prep:gen")}],i,l,false | ||
| 876 | - | (Sign "{") :: l -> | ||
| 877 | - let t,i = create_empty_sign_token i [Sign "{"] in | ||
| 878 | - Variant[Token{t with token=Symbol "{"};Token{t with token=Interp "{"}],i,l,poss_s_beg | ||
| 879 | - | (Sign "}") :: l -> | ||
| 880 | - let t,i = create_empty_sign_token i [Sign "}"] in | ||
| 881 | - Variant[Token{t with token=Symbol "}"};Token{t with token=Interp "}"}],i,l,poss_s_beg | ||
| 882 | - | (Sign "#") :: l -> create_sign_token poss_s_beg i [Sign ""] l (Symbol "") | ||
| 883 | - | (Sign "^") :: (Sign "^") :: l -> create_sign_token poss_s_beg i [Sign "^";Sign "^"] l (make_lemma ("^^","sinterj")) | ||
| 884 | - | (Sign "^") :: l -> create_sign_token poss_s_beg i [Sign "^"] l (Symbol "^") | ||
| 885 | - | (Sign "|") :: l -> create_sign_token poss_s_beg i [Sign "|"] l (Symbol "|") | ||
| 886 | - | (Sign "&") :: l -> create_sign_token poss_s_beg i [Sign "&"] l (Symbol "&") | ||
| 887 | - | (Sign "=") :: l -> create_sign_token poss_s_beg i [Sign "="] l (Symbol "=") | ||
| 888 | - | (Sign "/") :: l -> | ||
| 889 | - let t,i = create_empty_sign_token i [Sign "/"] in | ||
| 890 | - Variant[Token{t with token=Symbol "/"};Token{t with token=make_lemma ("na","prep:acc")}],i,l,false | ||
| 891 | - | (Sign "_") :: l -> create_sign_token poss_s_beg i [Sign "_"] l (Symbol "_") | ||
| 892 | - | (Sign "@") :: l -> create_sign_token poss_s_beg i [Sign "@"] l (Symbol "@") | ||
| 893 | - | (Sign "×") :: l -> create_sign_token poss_s_beg i [Sign "×"] l (Symbol "×") | ||
| 894 | - | (Sign "%") :: l -> | ||
| 895 | - let t,i = create_empty_sign_token i [Sign "%"] in | ||
| 896 | - Variant[Token{t with token=Symbol "%"};Token{t with token=make_lemma ("procent","subst:_:_:m3")}],i,l,false | ||
| 897 | - | (Sign "$") :: l -> | ||
| 898 | - let t,i = create_empty_sign_token i [Sign "$"] in | ||
| 899 | - Variant[Token{t with token=Symbol "$"};Token{t with token=make_lemma ("dolar","subst:_:_:m2")}],i,l,false | ||
| 900 | - | (Sign "€") :: l -> create_sign_token poss_s_beg i [Sign "€"] l (make_lemma ("euro","subst:_:_:n2")) | ||
| 901 | - | (Sign "²") :: l -> create_sign_token poss_s_beg i [Sign "²"] l (Symbol "²") | ||
| 902 | - | (Sign "°") :: l -> create_sign_token poss_s_beg i [Sign "°"] l (make_lemma ("stopień","subst:_:_:m3")) | ||
| 903 | - | (Sign "§") :: l -> create_sign_token false i [Sign "§"] l (make_lemma ("paragraf","subst:_:_:m3")) | ||
| 904 | - | (Sign s) :: l -> print_endline ("recognize_sign_group: " ^ s); create_sign_token poss_s_beg i [Sign s] l (Symbol s) | ||
| 905 | - | l -> failwith "recognize_sign_group" | ||
| 906 | - | ||
| 907 | -(* FIXME: "„Szpak” frunie." trzeba przenie przenieść <sentence> przed „, ale zostawić po „s. *) | ||
| 908 | - | ||
| 909 | -let rec group_chars poss_s_beg i rev = function | ||
| 910 | - [] -> List.rev ((Token{empty_token with beg=i;len=factor;next=i+factor;token=Interp "</query>"}) :: rev) | ||
| 911 | - | (Digit s) :: l -> let x,l = group_digits [] ((Digit s) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_digits poss_s_beg i x) :: rev) l | ||
| 912 | - | (Sign s) :: l -> let x,i,l,poss_s_beg = recognize_sign_group poss_s_beg i ((Sign s) :: l) in group_chars poss_s_beg i (x :: rev) l | ||
| 913 | - | (Capital(s,t)) :: l -> let x,l = group_letters [] ((Capital(s,t)) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l | ||
| 914 | - | (ForeignCapital(s,t)) :: l -> let x,l = group_letters [] ((ForeignCapital(s,t)) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l | ||
| 915 | - | (Small s) :: l -> let x,l = group_letters [] ((Small s) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l | ||
| 916 | - | (ForeignSmall s) :: l -> let x,l = group_letters [] ((ForeignSmall s) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l | ||
| 917 | - | (Other(s,x)) :: l -> | ||
| 918 | - let x,l = group_others [] ((Other(s,x)) :: l) in | ||
| 919 | - group_chars false (i + Xlist.size x * factor) | ||
| 920 | - ((Token{empty_token with orth=String.concat "" x;beg=i;len=Xlist.size x * factor;next=i+factor;token=Other(String.concat "" x)}) :: rev) l | ||
| 921 | - | ||
| 922 | -let tokenize l = | ||
| 923 | - (Token{empty_token with beg=0;len=factor;next=factor;token=Interp "<query>"}) :: (group_chars true factor [] l) |
tokenizer/eniam-tokenizer-1.0/README deleted
| 1 | -ENIAMtokenizer Version 1.0 : | ||
| 2 | ------------------------ | ||
| 3 | - | ||
| 4 | -ENIAMtokenizer is a library that provides a tokenizer for Polish. | ||
| 5 | - | ||
| 6 | -Install | ||
| 7 | -------- | ||
| 8 | - | ||
| 9 | -ENIAMtokenizer requires OCaml version 4.02.3 compiler | ||
| 10 | -together with Xlib library version 3.1 or later. | ||
| 11 | - | ||
| 12 | -In order to install type: | ||
| 13 | - | ||
| 14 | -make install | ||
| 15 | - | ||
| 16 | -by default, ENIAMtokenizer is installed in the 'ocamlc -where'/eniam directory. | ||
| 17 | -you can change it by editing the Makefile. | ||
| 18 | - | ||
| 19 | -In order to test library type: | ||
| 20 | -make test | ||
| 21 | -./test | ||
| 22 | - | ||
| 23 | -Credits | ||
| 24 | -------- | ||
| 25 | -Copyright © 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | ||
| 26 | -Copyright © 2016 Institute of Computer Science Polish Academy of Sciences | ||
| 27 | - | ||
| 28 | -The parser uses the following licensed resources: | ||
| 29 | - | ||
| 30 | -SGJP: Grammatical Dictionary of Polish, version 20151020 | ||
| 31 | -Copyright © 2007–2015 Zygmunt Saloni, Włodzimierz Gruszczyński, Marcin | ||
| 32 | -Woliński, Robert Wołosz, Danuta Skowrońska | ||
| 33 | -http://sgjp.pl | ||
| 34 | - | ||
| 35 | -Licence | ||
| 36 | -------- | ||
| 37 | - | ||
| 38 | -This library is free software: you can redistribute it and/or modify | ||
| 39 | -it under the terms of the GNU Lesser General Public License as published by | ||
| 40 | -the Free Software Foundation, either version 3 of the License, or | ||
| 41 | -(at your option) any later version. | ||
| 42 | - | ||
| 43 | -This library is distributed in the hope that it will be useful, | ||
| 44 | -but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 45 | -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 46 | -GNU General Public License for more details. | ||
| 47 | - | ||
| 48 | -You should have received a copy of the GNU Lesser General Public License | ||
| 49 | -along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 50 | - |
tokenizer/eniam-tokenizer-1.0/config-tokenizer deleted
tokenizer/eniam-tokenizer-1.0/lgpl-3.0.txt deleted
| 1 | - GNU LESSER GENERAL PUBLIC LICENSE | ||
| 2 | - Version 3, 29 June 2007 | ||
| 3 | - | ||
| 4 | - Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> | ||
| 5 | - Everyone is permitted to copy and distribute verbatim copies | ||
| 6 | - of this license document, but changing it is not allowed. | ||
| 7 | - | ||
| 8 | - | ||
| 9 | - This version of the GNU Lesser General Public License incorporates | ||
| 10 | -the terms and conditions of version 3 of the GNU General Public | ||
| 11 | -License, supplemented by the additional permissions listed below. | ||
| 12 | - | ||
| 13 | - 0. Additional Definitions. | ||
| 14 | - | ||
| 15 | - As used herein, "this License" refers to version 3 of the GNU Lesser | ||
| 16 | -General Public License, and the "GNU GPL" refers to version 3 of the GNU | ||
| 17 | -General Public License. | ||
| 18 | - | ||
| 19 | - "The Library" refers to a covered work governed by this License, | ||
| 20 | -other than an Application or a Combined Work as defined below. | ||
| 21 | - | ||
| 22 | - An "Application" is any work that makes use of an interface provided | ||
| 23 | -by the Library, but which is not otherwise based on the Library. | ||
| 24 | -Defining a subclass of a class defined by the Library is deemed a mode | ||
| 25 | -of using an interface provided by the Library. | ||
| 26 | - | ||
| 27 | - A "Combined Work" is a work produced by combining or linking an | ||
| 28 | -Application with the Library. The particular version of the Library | ||
| 29 | -with which the Combined Work was made is also called the "Linked | ||
| 30 | -Version". | ||
| 31 | - | ||
| 32 | - The "Minimal Corresponding Source" for a Combined Work means the | ||
| 33 | -Corresponding Source for the Combined Work, excluding any source code | ||
| 34 | -for portions of the Combined Work that, considered in isolation, are | ||
| 35 | -based on the Application, and not on the Linked Version. | ||
| 36 | - | ||
| 37 | - The "Corresponding Application Code" for a Combined Work means the | ||
| 38 | -object code and/or source code for the Application, including any data | ||
| 39 | -and utility programs needed for reproducing the Combined Work from the | ||
| 40 | -Application, but excluding the System Libraries of the Combined Work. | ||
| 41 | - | ||
| 42 | - 1. Exception to Section 3 of the GNU GPL. | ||
| 43 | - | ||
| 44 | - You may convey a covered work under sections 3 and 4 of this License | ||
| 45 | -without being bound by section 3 of the GNU GPL. | ||
| 46 | - | ||
| 47 | - 2. Conveying Modified Versions. | ||
| 48 | - | ||
| 49 | - If you modify a copy of the Library, and, in your modifications, a | ||
| 50 | -facility refers to a function or data to be supplied by an Application | ||
| 51 | -that uses the facility (other than as an argument passed when the | ||
| 52 | -facility is invoked), then you may convey a copy of the modified | ||
| 53 | -version: | ||
| 54 | - | ||
| 55 | - a) under this License, provided that you make a good faith effort to | ||
| 56 | - ensure that, in the event an Application does not supply the | ||
| 57 | - function or data, the facility still operates, and performs | ||
| 58 | - whatever part of its purpose remains meaningful, or | ||
| 59 | - | ||
| 60 | - b) under the GNU GPL, with none of the additional permissions of | ||
| 61 | - this License applicable to that copy. | ||
| 62 | - | ||
| 63 | - 3. Object Code Incorporating Material from Library Header Files. | ||
| 64 | - | ||
| 65 | - The object code form of an Application may incorporate material from | ||
| 66 | -a header file that is part of the Library. You may convey such object | ||
| 67 | -code under terms of your choice, provided that, if the incorporated | ||
| 68 | -material is not limited to numerical parameters, data structure | ||
| 69 | -layouts and accessors, or small macros, inline functions and templates | ||
| 70 | -(ten or fewer lines in length), you do both of the following: | ||
| 71 | - | ||
| 72 | - a) Give prominent notice with each copy of the object code that the | ||
| 73 | - Library is used in it and that the Library and its use are | ||
| 74 | - covered by this License. | ||
| 75 | - | ||
| 76 | - b) Accompany the object code with a copy of the GNU GPL and this license | ||
| 77 | - document. | ||
| 78 | - | ||
| 79 | - 4. Combined Works. | ||
| 80 | - | ||
| 81 | - You may convey a Combined Work under terms of your choice that, | ||
| 82 | -taken together, effectively do not restrict modification of the | ||
| 83 | -portions of the Library contained in the Combined Work and reverse | ||
| 84 | -engineering for debugging such modifications, if you also do each of | ||
| 85 | -the following: | ||
| 86 | - | ||
| 87 | - a) Give prominent notice with each copy of the Combined Work that | ||
| 88 | - the Library is used in it and that the Library and its use are | ||
| 89 | - covered by this License. | ||
| 90 | - | ||
| 91 | - b) Accompany the Combined Work with a copy of the GNU GPL and this license | ||
| 92 | - document. | ||
| 93 | - | ||
| 94 | - c) For a Combined Work that displays copyright notices during | ||
| 95 | - execution, include the copyright notice for the Library among | ||
| 96 | - these notices, as well as a reference directing the user to the | ||
| 97 | - copies of the GNU GPL and this license document. | ||
| 98 | - | ||
| 99 | - d) Do one of the following: | ||
| 100 | - | ||
| 101 | - 0) Convey the Minimal Corresponding Source under the terms of this | ||
| 102 | - License, and the Corresponding Application Code in a form | ||
| 103 | - suitable for, and under terms that permit, the user to | ||
| 104 | - recombine or relink the Application with a modified version of | ||
| 105 | - the Linked Version to produce a modified Combined Work, in the | ||
| 106 | - manner specified by section 6 of the GNU GPL for conveying | ||
| 107 | - Corresponding Source. | ||
| 108 | - | ||
| 109 | - 1) Use a suitable shared library mechanism for linking with the | ||
| 110 | - Library. A suitable mechanism is one that (a) uses at run time | ||
| 111 | - a copy of the Library already present on the user's computer | ||
| 112 | - system, and (b) will operate properly with a modified version | ||
| 113 | - of the Library that is interface-compatible with the Linked | ||
| 114 | - Version. | ||
| 115 | - | ||
| 116 | - e) Provide Installation Information, but only if you would otherwise | ||
| 117 | - be required to provide such information under section 6 of the | ||
| 118 | - GNU GPL, and only to the extent that such information is | ||
| 119 | - necessary to install and execute a modified version of the | ||
| 120 | - Combined Work produced by recombining or relinking the | ||
| 121 | - Application with a modified version of the Linked Version. (If | ||
| 122 | - you use option 4d0, the Installation Information must accompany | ||
| 123 | - the Minimal Corresponding Source and Corresponding Application | ||
| 124 | - Code. If you use option 4d1, you must provide the Installation | ||
| 125 | - Information in the manner specified by section 6 of the GNU GPL | ||
| 126 | - for conveying Corresponding Source.) | ||
| 127 | - | ||
| 128 | - 5. Combined Libraries. | ||
| 129 | - | ||
| 130 | - You may place library facilities that are a work based on the | ||
| 131 | -Library side by side in a single library together with other library | ||
| 132 | -facilities that are not Applications and are not covered by this | ||
| 133 | -License, and convey such a combined library under terms of your | ||
| 134 | -choice, if you do both of the following: | ||
| 135 | - | ||
| 136 | - a) Accompany the combined library with a copy of the same work based | ||
| 137 | - on the Library, uncombined with any other library facilities, | ||
| 138 | - conveyed under the terms of this License. | ||
| 139 | - | ||
| 140 | - b) Give prominent notice with the combined library that part of it | ||
| 141 | - is a work based on the Library, and explaining where to find the | ||
| 142 | - accompanying uncombined form of the same work. | ||
| 143 | - | ||
| 144 | - 6. Revised Versions of the GNU Lesser General Public License. | ||
| 145 | - | ||
| 146 | - The Free Software Foundation may publish revised and/or new versions | ||
| 147 | -of the GNU Lesser General Public License from time to time. Such new | ||
| 148 | -versions will be similar in spirit to the present version, but may | ||
| 149 | -differ in detail to address new problems or concerns. | ||
| 150 | - | ||
| 151 | - Each version is given a distinguishing version number. If the | ||
| 152 | -Library as you received it specifies that a certain numbered version | ||
| 153 | -of the GNU Lesser General Public License "or any later version" | ||
| 154 | -applies to it, you have the option of following the terms and | ||
| 155 | -conditions either of that published version or of any later version | ||
| 156 | -published by the Free Software Foundation. If the Library as you | ||
| 157 | -received it does not specify a version number of the GNU Lesser | ||
| 158 | -General Public License, you may choose any version of the GNU Lesser | ||
| 159 | -General Public License ever published by the Free Software Foundation. | ||
| 160 | - | ||
| 161 | - If the Library as you received it specifies that a proxy can decide | ||
| 162 | -whether future versions of the GNU Lesser General Public License shall | ||
| 163 | -apply, that proxy's public statement of acceptance of any version is | ||
| 164 | -permanent authorization for you to choose that version for the | ||
| 165 | -Library. |
tokenizer/eniam-tokenizer-1.0/makefile deleted
| 1 | -OCAMLC=ocamlc | ||
| 2 | -OCAMLOPT=ocamlopt | ||
| 3 | -OCAMLDEP=ocamldep | ||
| 4 | -INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam | ||
| 5 | -OCAMLFLAGS=$(INCLUDES) -g | ||
| 6 | -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa | ||
| 7 | -INSTALLDIR=`ocamlc -where`/eniam | ||
| 8 | - | ||
| 9 | -SOURCES= ENIAMtokenizerTypes.ml ENIAMtokens.ml ENIAMacronyms.ml ENIAMpatterns.ml ENIAMtokenizer.ml | ||
| 10 | - | ||
| 11 | -all: eniam-tokenizer.cma eniam-tokenizer.cmxa | ||
| 12 | - | ||
| 13 | -install: all | ||
| 14 | - mkdir -p $(INSTALLDIR) | ||
| 15 | - cp eniam-tokenizer.cmxa eniam-tokenizer.a eniam-tokenizer.cma config-tokenizer $(INSTALLDIR) | ||
| 16 | - cp ENIAMtokenizerTypes.cmi ENIAMtokens.cmi ENIAMacronyms.cmi ENIAMpatterns.cmi ENIAMtokenizer.cmi $(INSTALLDIR) | ||
| 17 | - cp ENIAMtokenizerTypes.cmx ENIAMtokens.cmx ENIAMacronyms.cmx ENIAMpatterns.cmx ENIAMtokenizer.cmx $(INSTALLDIR) | ||
| 18 | - mkdir -p /usr/share/eniam/resources/SGJP | ||
| 19 | - cp resources/SGJP/* /usr/share/eniam/resources/SGJP | ||
| 20 | - | ||
| 21 | -eniam-tokenizer.cma: $(SOURCES) | ||
| 22 | - ocamlc -linkall -a -o eniam-tokenizer.cma $(OCAMLFLAGS) $^ | ||
| 23 | - | ||
| 24 | -eniam-tokenizer.cmxa: $(SOURCES) | ||
| 25 | - ocamlopt -linkall -a -o eniam-tokenizer.cmxa $(INCLUDES) $^ | ||
| 26 | - | ||
| 27 | -test: test.ml | ||
| 28 | - $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml | ||
| 29 | - | ||
| 30 | -.SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx | ||
| 31 | - | ||
| 32 | -.mll.ml: | ||
| 33 | - ocamllex $< | ||
| 34 | - | ||
| 35 | -.mly.mli: | ||
| 36 | - ocamlyacc $< | ||
| 37 | - | ||
| 38 | -.mly.ml: | ||
| 39 | - ocamlyacc $< | ||
| 40 | - | ||
| 41 | -.ml.cmo: | ||
| 42 | - $(OCAMLC) $(OCAMLFLAGS) -c $< | ||
| 43 | - | ||
| 44 | -.mli.cmi: | ||
| 45 | - $(OCAMLC) $(OCAMLFALGS) -c $< | ||
| 46 | - | ||
| 47 | -.ml.cmx: | ||
| 48 | - $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $< | ||
| 49 | - | ||
| 50 | -clean: | ||
| 51 | - rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a test |
tokenizer/eniam-tokenizer-1.0/test.ml deleted
| 1 | -(* | ||
| 2 | - * ENIAMtokenizer, a tokenizer for Polish | ||
| 3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | ||
| 4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | ||
| 5 | - * | ||
| 6 | - * This library is free software: you can redistribute it and/or modify | ||
| 7 | - * it under the terms of the GNU Lesser General Public License as published by | ||
| 8 | - * the Free Software Foundation, either version 3 of the License, or | ||
| 9 | - * (at your option) any later version. | ||
| 10 | - * | ||
| 11 | - * This library is distributed in the hope that it will be useful, | ||
| 12 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 14 | - * GNU General Public License for more details. | ||
| 15 | - * | ||
| 16 | - * You should have received a copy of the GNU Lesser General Public License | ||
| 17 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 18 | - *) | ||
| 19 | - | ||
| 20 | - | ||
| 21 | -let test_strings = [ | ||
| 22 | -(* "a gdybym miałem"; | ||
| 23 | - "A Gdy Miałem"; | ||
| 24 | - "GDY MIAŁEM"; | ||
| 25 | - "I II III IV V VI VII VIII IX X MCXIV MXC"; | ||
| 26 | - "Kiedy Piotr Prabucki, przewodniczący Komisji Budżetu PeKaO"; | ||
| 27 | - "25 idzie 20."; | ||
| 28 | - "Kot. Kot. kot."; | ||
| 29 | - "25."; | ||
| 30 | - "25.888.231"; | ||
| 31 | - "Ala 25.888.231.111 ma."; | ||
| 32 | - "Ala 25.888.031,011."; | ||
| 33 | - "Ala -25.888.031,011."; | ||
| 34 | - "Ala -25 ."; | ||
| 35 | - "Ala -1° C 3° ciepła 20—30°C od 180° do 260°C około 6° poniżej horyzontu."; | ||
| 36 | - "Ala 22-25 ."; | ||
| 37 | - "Ala 22.5.2000-25.5.2001 ."; | ||
| 38 | - "Szpak frunie.";*) | ||
| 39 | - "Kot miauczy."; | ||
| 40 | -(* "Np. Ala.";*) | ||
| 41 | - "w. dom."; | ||
| 42 | - "tzn."; | ||
| 43 | - "c.d.n."; | ||
| 44 | -(* "Arabia Saudyjska biegnie."; | ||
| 45 | - "Cauchy'ego ONZ-owska biegnie.";*) | ||
| 46 | - "TE-cie E-e."; | ||
| 47 | - "MS-DOS-owska CI-cie KRRi-cie UJ-ocie UJ-OCIE."; | ||
| 48 | - "rock'n'rollowy d’Alembertowi staro-cerkiewno-słowiańskimi"; | ||
| 49 | -(* "Tom idzie.";*) | ||
| 50 | - "Miałem miał."; | ||
| 51 | -(* "Szpak śpiewa."; | ||
| 52 | - "Ala ma kota."; | ||
| 53 | - "Ale mają kota:"*) | ||
| 54 | - ] | ||
| 55 | - | ||
| 56 | -let _ = | ||
| 57 | - print_endline "Testy wbudowane"; | ||
| 58 | - Xlist.iter test_strings (fun s -> | ||
| 59 | - print_endline ("\nTEST: " ^ s); | ||
| 60 | - let tokens = ENIAMtokenizer.parse s in | ||
| 61 | - (* print_endline (ENIAMtokenizer.xml_of tokens); *) | ||
| 62 | - Xlist.iter tokens (fun token -> print_endline (ENIAMtokenizer.string_of 0 token))); | ||
| 63 | - print_endline "Testy użytkownika."; | ||
| 64 | - print_endline "Wpisz tekst i naciśnij ENTER, pusty tekst kończy."; | ||
| 65 | - let s = ref (read_line ()) in | ||
| 66 | - while !s <> "" do | ||
| 67 | - let tokens = ENIAMtokenizer.parse !s in | ||
| 68 | - (* print_endline (ENIAMtokenizer.xml_of tokens); *) | ||
| 69 | - Xlist.iter tokens (fun token -> print_endline (ENIAMtokenizer.string_of 0 token)); | ||
| 70 | - print_endline "Wpisz tekst i naciśnij ENTER, pusty tekst kończy."; | ||
| 71 | - s := read_line () | ||
| 72 | - done; | ||
| 73 | - () |
tokenizer/makefile
| @@ -12,11 +12,13 @@ all: eniam-tokenizer.cma eniam-tokenizer.cmxa | @@ -12,11 +12,13 @@ all: eniam-tokenizer.cma eniam-tokenizer.cmxa | ||
| 12 | 12 | ||
| 13 | install: all | 13 | install: all |
| 14 | mkdir -p $(INSTALLDIR) | 14 | mkdir -p $(INSTALLDIR) |
| 15 | - cp eniam-tokenizer.cmxa eniam-tokenizer.a eniam-tokenizer.cma config-tokenizer $(INSTALLDIR) | 15 | + cp eniam-tokenizer.cmxa eniam-tokenizer.a eniam-tokenizer.cma $(INSTALLDIR) |
| 16 | cp ENIAMtokenizerTypes.cmi ENIAMtokens.cmi ENIAMacronyms.cmi ENIAMpatterns.cmi ENIAMtokenizer.cmi $(INSTALLDIR) | 16 | cp ENIAMtokenizerTypes.cmi ENIAMtokens.cmi ENIAMacronyms.cmi ENIAMpatterns.cmi ENIAMtokenizer.cmi $(INSTALLDIR) |
| 17 | cp ENIAMtokenizerTypes.cmx ENIAMtokens.cmx ENIAMacronyms.cmx ENIAMpatterns.cmx ENIAMtokenizer.cmx $(INSTALLDIR) | 17 | cp ENIAMtokenizerTypes.cmx ENIAMtokens.cmx ENIAMacronyms.cmx ENIAMpatterns.cmx ENIAMtokenizer.cmx $(INSTALLDIR) |
| 18 | - mkdir -p /usr/share/eniam/resources/SGJP | ||
| 19 | - cp resources/SGJP/* /usr/share/eniam/resources/SGJP | 18 | + mkdir -p /usr/share/eniam/tokenizer |
| 19 | + cp resources/mte_20151215.tab /usr/share/eniam/tokenizer/mte_20151215.tab | ||
| 20 | + cp resources/README /usr/share/eniam/tokenizer/README | ||
| 21 | + ln -s /usr/share/eniam/tokenizer/mte_20151215.tab /usr/share/eniam/tokenizer/mte.tab | ||
| 20 | 22 | ||
| 21 | eniam-tokenizer.cma: $(SOURCES) | 23 | eniam-tokenizer.cma: $(SOURCES) |
| 22 | ocamlc -linkall -a -o eniam-tokenizer.cma $(OCAMLFLAGS) $^ | 24 | ocamlc -linkall -a -o eniam-tokenizer.cma $(OCAMLFLAGS) $^ |
tokenizer/resources/SGJP/README renamed to tokenizer/resources/README
tokenizer/resources/SGJP/mte_20151215.tab renamed to tokenizer/resources/mte_20151215.tab