diff --git a/tokenizer/ENIAMtokenizerTypes.ml b/tokenizer/ENIAMtokenizerTypes.ml index e007915..3212bea 100644 --- a/tokenizer/ENIAMtokenizerTypes.ml +++ b/tokenizer/ENIAMtokenizerTypes.ml @@ -64,10 +64,8 @@ type pat = L | CL | D of string | C of string | S of string | RD of string | O o let empty_token = { orth="";corr_orth="";beg=0;len=0;next=0; token=Symbol ""; attrs=[]} -let config = - try File.load_attr_val_pairs "config-tokenizer" - with _ -> (print_endline "ENIAMtokenizer config file not found"; []) +let resource_path = + try Sys.getenv "ENIAM_RESOURCE_PATH" + with Not_found -> "/usr/share/eniam" -let mte_filename = - try Xlist.assoc config "MTE_FILENAME" - with Not_found -> (print_endline "ENIAMtokenizer MTE_FILENAME config variable undefined"; "") +let mte_filename = resource_path ^ "/tokenizer/mte.tab" diff --git a/tokenizer/README b/tokenizer/README index 393363d..a6ab537 100644 --- a/tokenizer/README +++ b/tokenizer/README @@ -6,7 +6,7 @@ ENIAMtokenizer is a library that provides a tokenizer for Polish. Install ------- -ENIAMtokenizer requires OCaml version 4.02.3 compiler +ENIAMtokenizer requires OCaml version 4.02.3 compiler together with Xlib library version 3.1 or later. In order to install type: @@ -20,6 +20,10 @@ In order to test library type: make test ./test +By default ENIAMtokenizer looks for resources in /usr/share/eniam directory. +However this behaviour may be changed by setting end exporting ENIAM_RESOURCE_PATH +environment variable. + Credits ------- Copyright © 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> @@ -47,4 +51,3 @@ GNU General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. - diff --git a/tokenizer/config-tokenizer b/tokenizer/config-tokenizer deleted file mode 100644 index 5c6adc2..0000000 --- a/tokenizer/config-tokenizer +++ /dev/null @@ -1,2 +0,0 @@ -# Localization of definitions of multi-token-expressions -MTE_FILENAME=/usr/share/eniam/resources/SGJP/mte_20151215.tab diff --git a/tokenizer/eniam-tokenizer-1.0.tar.bz2 b/tokenizer/eniam-tokenizer-1.0.tar.bz2 index 72cb36e..9c91389 100644 --- a/tokenizer/eniam-tokenizer-1.0.tar.bz2 +++ b/tokenizer/eniam-tokenizer-1.0.tar.bz2 diff --git a/tokenizer/eniam-tokenizer-1.0/ENIAMacronyms.ml b/tokenizer/eniam-tokenizer-1.0/ENIAMacronyms.ml deleted file mode 100644 index 250ae33..0000000 --- a/tokenizer/eniam-tokenizer-1.0/ENIAMacronyms.ml +++ /dev/null @@ -1,866 +0,0 @@ -(* - * ENIAMtokenizer, a tokenizer for Polish - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences - * - * This library is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - *) - -open ENIAMtokenizerTypes - -let mte_patterns = - let lines = try File.load_lines mte_filename - with _ -> (print_endline ("ENIAMtokenizer mte file " ^ mte_filename ^ " not found"); []) in - let l = List.rev (Xlist.rev_map lines (fun line -> - match Str.split (Str.regexp "\t") line with - [orths; lemma; interp] -> Str.split (Str.regexp " ") orths, lemma, interp - | _ -> failwith ("mte_patterns: " ^ line))) in - List.rev (Xlist.rev_map l (fun (orths,lemma,interp) -> - Xlist.map orths (fun orth -> O orth), (fun (_:token_record list) -> ENIAMtokens.make_lemma (lemma,interp)))) - - -let compose_lemma t lemma_suf interp = - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ lemma_suf, interp) - -let compose_lemma3 t1 t2 t3 lemma_suf interp = - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t1.token ^ ENIAMtokens.get_orth t2.token ^ ENIAMtokens.get_orth t3.token ^ lemma_suf, interp) - -let concat_orths l = - String.concat "" (Xlist.map l (fun t -> t.orth)) - -let ct l lemma interp = - let beg = (List.hd l).beg in - let t = List.hd (List.rev l) in - let len = t.beg + t.len - beg in - Token{empty_token with - orth=concat_orths l; - beg=beg; - len=len; - next=t.next; - token=ENIAMtokens.make_lemma (lemma,interp); - attrs=ENIAMtokens.merge_attrs l} - -let rec get_orth_prefix i l = - if i = 0 then "",l else - match l with - c :: l -> let s,l = get_orth_prefix (i-1) l in c ^ s, l - | [] -> failwith "get_orth_prefix" - -let make_sub_tokens t l = - let n = Xlist.fold l 0 (fun n (i,_,_) -> n + i) in - let orth = Xunicode.utf8_chars_of_utf8_string t.orth in - if Xlist.size orth <> n then failwith "make_sub_tokens: invalid orth length" else - let l,_,_,_ = Xlist.fold l ([],t.beg,t.len,orth) (fun (l,beg,remaining_len,orth) (i,lemma,interp) -> - let orth,remaining_orth = get_orth_prefix i orth in - let len = if beg mod factor = 0 then i * factor else ((i-1) * factor) + (beg mod factor) in - if remaining_len = 0 then failwith "make_sub_tokens: invalid remaining_len" else - let len = if len > remaining_len then remaining_len else len in - Token{empty_token with - orth=orth; - beg=beg; - len=len; - next=beg+len; - token=ENIAMtokens.make_lemma (lemma,interp); - attrs=t.attrs} :: l, - beg+len, remaining_len-len, remaining_orth) in - l - -let st t l = - let l = make_sub_tokens t l in - match l with - Token s :: l -> List.rev (Token{s with next=t.next} :: l) - | _ -> failwith "st" - -let std t d l = - let l = make_sub_tokens t l in - match l with - Token s :: l -> List.rev (Token{s with orth=s.orth^d.orth; len=d.beg+d.len-s.beg; next=d.next} :: l) - | _ -> failwith "std" - -let acronym_patterns = [ - [L; S "-"; O "owscy"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owska"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owski"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owski"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskich"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskich"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskich"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskie"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskiego"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskiego"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskiej"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskiej"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskiej"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskiemu"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskim"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskim"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskim"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskimi"], (function [x;_;_] -> compose_lemma x "-owski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owsko"], (function [x;_;_] -> compose_lemma x "-owski" "adja" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owsko"], (function [x;_;_] -> compose_lemma x "-owsko" "adv:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owsku"], (function [x;_;_] -> compose_lemma x "-owski" "adjp" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owską"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owską"], (function [x;_;_] -> compose_lemma x "-owski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wscy"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wska"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wski"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wski"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wskich"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wskich"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wskich"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wskie"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wskiego"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wskiego"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wskiej"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wskiej"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wskiej"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wskiemu"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wskim"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wskim"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wskim"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wskimi"], (function [x;_;_] -> compose_lemma x "-wski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wsko"], (function [x;_;_] -> compose_lemma x "-wski" "adja" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wsko"], (function [x;_;_] -> compose_lemma x "-wsko" "adv:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wsku"], (function [x;_;_] -> compose_lemma x "-wski" "adjp" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wską"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wską"], (function [x;_;_] -> compose_lemma x "-wski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owa"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owe"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owego"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owego"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owej"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owej"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owej"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owemu"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owo"], (function [x;_;_] -> compose_lemma x "’owo" "adv:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owo"], (function [x;_;_] -> compose_lemma x "’owy" "adja" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owy"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owy"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owych"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owych"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owych"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owym"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owym"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owym"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owymi"], (function [x;_;_] -> compose_lemma x "’owy" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "ową"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "ową"], (function [x;_;_] -> compose_lemma x "’owy" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owscy"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owska"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owski"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owski"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owskich"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owskich"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owskich"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owskie"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owskiego"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owskiego"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owskiej"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owskiej"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owskiej"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owskiemu"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owskim"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owskim"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owskim"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owskimi"], (function [x;_;_] -> compose_lemma x "’owski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owsko"], (function [x;_;_] -> compose_lemma x "’owski" "adja" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owsko"], (function [x;_;_] -> compose_lemma x "’owsko" "adv:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owsku"], (function [x;_;_] -> compose_lemma x "’owski" "adjp" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owską"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owską"], (function [x;_;_] -> compose_lemma x "’owski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:n2" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "ach"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:loc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:n2" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "ami"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:inst:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "T" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "T" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "TA" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "cie"], (function [x;_;_] -> compose_lemma x "TA" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "e"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ecie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ecie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:n2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "etach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "etami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "etem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "etom"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "etowi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "etu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ety"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ety"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ety"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "etów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:gen:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "i"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "ie"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "iem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "iem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "o"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:voc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ocie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ocie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:n2" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "om"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:dat:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "otach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "otami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "otem"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "otom"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "otowi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "otu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "oty"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "oty"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "oty"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "otów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:n2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:n2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:n2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:n2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:n2" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "y"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:n2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ze"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:n2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "zie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "zie"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:n2" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "ą"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:inst:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; O "ę"], (function [x;_;_] -> compose_lemma x "A" "subst:sg:acc:f" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "a"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "ach"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:loc:p3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "ami"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:inst:p3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "e"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "ego"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "ego"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "em"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "emu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "go"], (function [x;_;_] -> compose_lemma x "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "go"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "i"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "m"], (function [x;_;_] -> compose_lemma x "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "m"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "mu"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "om"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:dat:p3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owi"], (function [x;_;_] -> compose_lemma x "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owie"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "u"], (function [x;_;_] -> compose_lemma x "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "y"], (function [x;_;_] -> compose_lemma x "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "ów"], (function [x;_;_] -> compose_lemma x "s" "subst:pl:gen:p3" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "ista"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "istach"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "istami"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "isto"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "istom"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "isty"], (function [x;_;_] -> compose_lemma x "-ista" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "isty"], (function [x;_;_] -> compose_lemma x "-ista" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "isty"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "istów"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "istów"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "istą"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "istę"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "iści"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "iści"], (function [x;_;_] -> compose_lemma x "-ista" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "iście"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "iście"], (function [x;_;_] -> compose_lemma x "-ista" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owca"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owca"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owcach"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owcami"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owce"], (function [x;_;_] -> compose_lemma x "-owiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owce"], (function [x;_;_] -> compose_lemma x "-owiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owcem"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owcom"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owcowi"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owcu"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owcu"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owcy"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owcy"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owcze"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owców"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owców"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owiec"], (function [x;_;_] -> compose_lemma x "-owiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:gen:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskości"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:voc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskościach"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:loc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskościami"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:inst:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskościom"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:pl:dat:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskością"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:inst:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskość"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:acc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "owskość"], (function [x;_;_] -> compose_lemma x "-owskość" "subst:sg:nom:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wca"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wca"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wcach"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wcami"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wce"], (function [x;_;_] -> compose_lemma x "-wiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wce"], (function [x;_;_] -> compose_lemma x "-wiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wcem"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wcom"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wcowi"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wcu"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wcu"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wcy"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wcy"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wców"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wców"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; O "wiec"], (function [x;_;_] -> compose_lemma x "-wiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:gen:f" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owości"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:voc:f" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owościach"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:loc:f" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owościami"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:inst:f" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owościom"], (function [x;_;_] -> compose_lemma x "’owość" "subst:pl:dat:f" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owością"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:inst:f" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owość"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:acc:f" | _ -> failwith "acronym_patterns"); - [L; S "’"; O "owość"], (function [x;_;_] -> compose_lemma x "’owość" "subst:sg:nom:f" | _ -> failwith "acronym_patterns"); - - [L; S "-"; L; S "-"; O "owscy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owska"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskiemu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskimi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adja" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owsko" "adv:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owsku"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adjp" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wscy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:nom.voc:m1.p1:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wska"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:nom.voc:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:m3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wski"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:nom.voc:m1.m2.m3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:acc:m1.p1:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wskich"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wskie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:nom.voc:n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:m1.m2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wskiego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:gen:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:dat:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:gen:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wskiej"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:loc:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wskiemu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:dat:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:inst:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wskim"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:loc:m1.m2.m3.n1.n2:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wskimi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adja" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wsko"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wsko" "adv:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wsku"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adjp" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:acc:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wską"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wski" "adj:sg:inst:f:pos" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:n2" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:loc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:n2" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:inst:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "T" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "T" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "TA" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "cie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "TA" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ecie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ecie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:n2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "etach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "etami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "etem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "etom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "etowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "etu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ety"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ety"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ety"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "etów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:gen:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "ie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "iem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "iem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "o"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:voc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ocie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ocie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:n2" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:dat:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "otach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "otami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "otem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "otom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "otowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "otu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "oty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "oty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "oty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "otów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:n2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:n2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:n2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:n2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:n2" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:n2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:n2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "zie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "zie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:n2" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "ą"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:inst:f" | _ -> failwith "acronym_patterns"); - [CL; S "-"; CL; S "-"; O "ę"], (function [x;y;z;_;_] -> compose_lemma3 x y z "A" "subst:sg:acc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "ista"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "istach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "istami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "isto"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "istom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "isty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "isty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "isty"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "istów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "istów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "istą"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "istę"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "iści"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "iści"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "iście"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "iście"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-ista" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owcach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owcami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owcem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owcom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owcowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owcze"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owiec"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:acc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:gen:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:nom:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:voc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:dat:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:gen:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:loc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskości"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:voc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskościach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:loc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskościami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:inst:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskościom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:pl:dat:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskością"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:inst:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskość"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:acc:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "owskość"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-owskość" "subst:sg:nom:f" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wca"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wcach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wcami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wce"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wcem"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wcom"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wcowi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wcu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wcy"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wców"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "-"; O "wiec"], (function [x;y;z;_;_] -> compose_lemma3 x y z "-wiec" "subst:sg:nom:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "a"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "ach"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:loc:p3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:inst:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "ami"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:inst:p3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "e"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "ego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "ego"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "em"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "emu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "go"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "go"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "i"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "m"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:inst:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "m"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "mu"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:dat:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "om"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:dat:p3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "owi"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:dat:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "owie"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:loc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "u"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:sg:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "depr:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:nom:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "y"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:voc:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:acc:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m1" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m2" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "" "subst:pl:gen:m3" | _ -> failwith "acronym_patterns"); - [L; S "-"; L; S "’"; O "ów"], (function [x;y;z;_;_] -> compose_lemma3 x y z "s" "subst:pl:gen:p3" | _ -> failwith "acronym_patterns"); - ] - -let name_patterns = [ - [O "O"; S "’"; L], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns"); - [O "d"; S "’"; L], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns"); - [O "l"; S "’"; L], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns"); - [L; S "’"; O "s"], (function [x;y;z] -> compose_lemma3 x y z "" "subst:_:_:_" | _ -> failwith "name_patterns"); - [L; S "’"; O "sa"], (function [x;_;_] -> compose_lemma x "’s" "subst:sg:gen.acc:_" | _ -> failwith "name_patterns"); - ] - -let abr_patterns = [ - [O "b"; S "."; O "u"; S "."], (function [a;b;c;d] -> [ct [a;b] "bez" "prep:gen:nwok"; ct [c;d] "uwaga" "subst:pl:gen:f"] | _ -> failwith "abr_patterns"); - [O "b"; S "."; O "zm"; S "."], (function [a;b;c;d] -> [ct [a;b] "bez" "prep:gen:nwok"; ct [c;d] "zmiana" "subst:pl:gen:f"] | _ -> failwith "abr_patterns"); - [O "blm"], (function [a] -> st a [1,"bez","prep:gen:nwok";1,"liczba","subst:sg:gen:f";1,"mnogi","adj:sg:gen:f:pos"] | _ -> failwith "abr_patterns"); - [O "blp"], (function [a] -> st a [1,"bez","prep:gen:nwok";1,"liczba","subst:sg:gen:f";1,"pojedynczy","adj:sg:gen:f:pos"] | _ -> failwith "abr_patterns"); - [O "błp"; S "."], (function [a;b] -> std a b [2,"błogosławiony","adj:sg:gen:f:pos";1,"pamięć","subst:sg:gen:f"] | _ -> failwith "abr_patterns"); - [O "bm"], (function [a] -> st a [1,"bieżący","adj:sg:$C:m3:pos";1,"miesiąc","subst:sg:$C:m3"] | _ -> failwith "abr_patterns"); - [O "bm"; S "."], (function [a;b] -> std a b [1,"bieżący","adj:sg:$C:m3:pos";1,"miesiąc","subst:sg:$C:m3"] | _ -> failwith "abr_patterns"); - [O "bp"; S "."], (function [a;b] -> std a b [1,"błogosławiony","adj:sg:gen:f:pos";1,"pamięć","subst:sg:gen:f"] | _ -> failwith "abr_patterns"); - [O "br"], (function [a] -> st a [1,"bieżący","adj:sg:$C:m3:pos";1,"rok","subst:sg:$C:m3"] | _ -> failwith "abr_patterns"); - [O "br"; S "."], (function [a;b] -> std a b [1,"bieżący","adj:sg:$C:m3:pos";1,"rok","subst:sg:$C:m3"] | _ -> failwith "abr_patterns"); - [O "c"; S "."; O "d"; S "."; O "n"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "ciąg" "subst:sg:nom:m3"; ct [c;d] "daleki" "adj:sg:nom:m3:com"; ct [e;f] "nastąpić" "fin:sg:ter:perf"] | _ -> failwith "abr_patterns"); - [O "ccm"], (function [a] -> st a [1,"sześcienny","adj:_:$C:m3:pos";2,"centymetr","subst:_:$C:m3"] | _ -> failwith "abr_patterns"); - [O "cd"; S "."], (function [a;b] -> std a b [1,"ciąg","subst:sg:nom:m3";1,"daleki","adj:sg:nom:m3:com"] | _ -> failwith "abr_patterns"); - [O "cdn"; S "."], (function [a;b] -> std a b [1,"ciąg","subst:sg:nom:m3";1,"daleki","adj:sg:nom:m3:com";1,"nastąpić","fin:sg:ter:perf"] | _ -> failwith "abr_patterns"); - [O "cm"; O "3"], (function [a;b] -> [ct [a] "centymetr" "subst:_:$C:m3"; ct [b] "sześcienny" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); - [O "dcn"; S "."], (function [a;b] -> std a b [1,"daleki","adj:sg:nom:m3:com";1,"ciąg","subst:sg:nom:m3";1,"nastąpić","fin:sg:ter:perf"] | _ -> failwith "abr_patterns"); - [O "dm"; O "3"], (function [a;b] -> [ct [a] "decymetr" "subst:_:$C:m3"; ct [b] "sześcienny" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); - [O "ds"; S "."], (function [a;b] -> std a b [1,"do","prep:gen";1,"sprawa","subst:pl:gen:f"] | _ -> failwith "abr_patterns"); - [O "d"; O "/"; O "s"], (function [a;b;c] -> [ct [a;b] "do" "prep:gen"; ct [c] "sprawa" "subst:pl:gen:f"] | _ -> failwith "abr_patterns"); - [O "itd"; S "."], (function [a;b] -> std a b [1,"i","conj";1,"tak","adv:pos";1,"daleko","adv:com"] | _ -> failwith "abr_patterns"); - [O "itede"; S "."], (function [a;b] -> std a b [1,"i","conj";2,"tak","adv:pos";2,"daleko","adv:com"] | _ -> failwith "abr_patterns"); - [O "itp"; S "."], (function [a;b] -> std a b [1,"i","conj";1,"tym","adv";1,"podobny","adj:pl:nom:_:pos"] | _ -> failwith "abr_patterns"); - [O "jw"; S "."], (function [a;b] -> std a b [1,"jak","adv:pos";1,"wysoko","adv:com"] | _ -> failwith "abr_patterns"); - [O "JWP"], (function [a] -> st a [1,"jaśnie","adv:pos";1,"wielmożny","adj:_:$C:m1:pos";1,"pan","subst:_:$C:m1"] | _ -> failwith "abr_patterns"); - [O "JWP"], (function [a] -> st a [1,"jaśnie","adv:pos";1,"wielmożny","adj:_:$C:f:pos";1,"pani","subst:_:$C:f"] | _ -> failwith "abr_patterns"); - [O "km"; S "."; O "2"], (function [a;b;c] -> [ct [a;b] "kilometr" "subst:_:$C:m3"; ct [c] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); - [O "km"; O "2"], (function [a;b] -> [ct [a] "kilometr" "subst:_:$C:m3"; ct [b] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); - [O "km"; O "²"], (function [a;b] -> [ct [a] "kilometr" "subst:_:$C:m3"; ct [b] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); - [O "lm"; S "."], (function [a;b] -> std a b [1,"liczba","subst:sg:$C:f";1,"mnogi","adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns"); - [O "lp"; S "."], (function [a;b] -> std a b [1,"liczba","subst:sg:$C:f";1,"pojedynczy","adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns"); - [O "m"; S "."; O "in"; S "."], (function [a;b;c;d] -> [ct [a;b] "między" "prep:inst"; ct [c;d] "inny" "adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns"); - [O "m"; S "."; O "in"], (function [a;b;c] -> [ct [a;b] "między" "prep:inst"; ct [c] "inny" "adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns"); - [O "m"; S "."; O "inn"; S "."], (function [a;b;c;d] -> [ct [a;b] "między" "prep:inst"; ct [c;d] "inny" "adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns"); - [O "m"; S "."; O "st"; S "."], (function [a;b;c;d] -> [ct [a;b] "miasto" "subst:_:$C:n2"; ct [c;d] "stołeczny" "adj:_:$C:n2:pos"] | _ -> failwith "abr_patterns"); - [O "m"; O "^"; O "2"], (function [a;b;c] -> [ct [a] "metr" "subst:_:$C:m3"; ct [b;c] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); - [O "m"; O "2"], (function [a;b] -> [ct [a] "metr" "subst:_:$C:m3"; ct [b] "kwadratowy" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); - [O "m"; O "3"], (function [a;b] -> [ct [a] "metr" "subst:_:$C:m3"; ct [b] "sześcienny" "adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); - [O "min"; S "."], (function [a;b] -> std a b [1,"między","prep:inst";2,"inny","adj:pl:inst:_:pos"] | _ -> failwith "abr_patterns"); - [O "mkw"; S "."], (function [a;b] -> std a b [1,"metr","subst:_:$C:m3";2,"kwadratowy","adj:_:$C:m3:pos"] | _ -> failwith "abr_patterns"); - [O "n"; S "."; O "e"; S "."], (function [a;b;c;d] -> [ct [a;b] "nasz" "adj:sg:gen:f:pos"; ct [c;d] "era" "subst:sg:gen:f"] | _ -> failwith "abr_patterns"); - [O "n"; S "."; O "p"; S "."; O "m"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "nad" "prep:inst"; ct [c;d] "poziom" "subst:sg:inst:m3"; ct [e;f] "morze" "subst:sg:gen:n2"] | _ -> failwith "abr_patterns"); - [O "np"; S "."], (function [a;b] -> std a b [1,"na","prep:acc";1,"przykład","subst:sg:acc:m3"] | _ -> failwith "abr_patterns"); - [O "nt"; S "."], (function [a;b] -> std a b [1,"na","prep:acc";1,"temat","subst:sg:acc:m3"] | _ -> failwith "abr_patterns"); - [O "NTG"], (function [a] -> st a [1,"nie","qub";1,"ta","adj:sg:nom:f:pos";1,"grupa","subst:sg:nom:f"] | _ -> failwith "abr_patterns"); - [O "o"; S "."; O "o"; S "."], (function [a;b;c;d] -> [ct [a;b] "ograniczony" "adj:sg:$C:f:pos"; ct [c;d] "odpowiedzialność" "subst:sg:$C:f"] | _ -> failwith "abr_patterns"); - [O "p"; S "."; O "n"; S "."; O "e"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "przed" "prep:inst"; ct [c;d] "nasz" "adj:sg:inst:f:pos"; ct [e;f] "era" "subst:sg:inst:f"] | _ -> failwith "abr_patterns"); - [O "p"; S "."; O "o"; S "."], (function [a;b;c;d] -> [ct [a;b] "pełniący" "pact:_:_:m1.m2.m3:imperf:aff"; ct [c;d] "obowiązek" "subst:pl:acc:m3"] | _ -> failwith "abr_patterns"); - [O "p"; S "."; O "p"; S "."; O "m"; S "."], (function [a;b;c;d;e;f] -> [ct [a;b] "pod" "prep:inst"; ct [c;d] "poziom" "subst:sg:inst:m3"; ct [e;f] "morze" "subst:sg:gen:n2"] | _ -> failwith "abr_patterns"); - [O "p"; S "."; O "t"; S "."], (function [a;b;c;d] -> [ct [a;b] "pod" "prep:inst:nwokc"; ct [c;d] "tytuł" "subst:sg:inst:m3"] | _ -> failwith "abr_patterns"); - [O "pn"; S "."], (function [a;b] -> std a b [1,"pod","prep:inst";1,"nazwa","subst:sg:inst:f"] | _ -> failwith "abr_patterns"); - [O "pne"; S "."], (function [a;b] -> std a b [1,"przed","prep:inst";1,"nasz","adj:sg:inst:f:pos";1,"era","subst:sg:inst:f"] | _ -> failwith "abr_patterns"); - [O "pt"; S "."], (function [a;b] -> std a b [1,"pod","prep:inst";1,"tytuł","subst:sg:inst:m3"] | _ -> failwith "abr_patterns"); - [O "PW"], (function [a] -> st a [1,"prywatny","adj:_:$C:f:pos";1,"wiadomość","subst:_:$C:f"] | _ -> failwith "abr_patterns"); - [O "pw"; S "."], (function [a;b] -> std a b [1,"pod","prep:inst";1,"wezwanie","subst:sg:inst:n2"] | _ -> failwith "abr_patterns"); -(* [O "S"; S "."; O "A"; S "."], (function [a;b;c;d] -> [ct [a;b] "spółka" "subst:sg:$C:f"; ct [c;d] "akcyjny" "adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns"); - [O "s"; S "."; O "c"; S "."], (function [a;b;c;d] -> [ct [a;b] "spółka" "subst:sg:$C:f"; ct [c;d] "cywilny" "adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns");*) -(* [O "SA"], (function [a] -> st a [1,"spółka","subst:sg:$C:f";1,"akcyjny","adj:sg:$C:f:pos"] | _ -> failwith "abr_patterns"); *) - [O "ś"; S "."; O "p"; S "."], (function [a;b;c;d] -> [ct [a;b] "święty" "adj:sg:gen:f:pos"; ct [c;d] "pamięć" "subst:sg:gen:f"] | _ -> failwith "abr_patterns"); - [O "śp"; S "."], (function [a;b] -> std a b [1,"święty","adj:sg:gen:f:pos";1,"pamięć","subst:sg:gen:f"] | _ -> failwith "abr_patterns"); - [O "tgz"; S "."], (function [a;b] -> std a b [2,"tak","adv";1,"zwać","ppas:_:_:_:_:aff"] | _ -> failwith "abr_patterns"); - [O "tj"; S "."], (function [a;b] -> std a b [1,"to","subst:sg:nom:n2";1,"być","fin:sg:ter:imperf"] | _ -> failwith "abr_patterns"); - [O "tzn"; S "."], (function [a;b] -> std a b [1,"to","subst:sg:nom:n2";2,"znaczyć","fin:sg:ter:imperf"] | _ -> failwith "abr_patterns"); - [O "tzw"; S "."], (function [a;b] -> std a b [1,"tak","adv:pos";2,"zwać","ppas:_:_:_:imperf:aff"] | _ -> failwith "abr_patterns"); - [O "ub"; S "."; O "r"; S "."], (function [a;b;c;d] -> [ct [a;b] "ubiegły" "adj:sg:$C:m3:pos"; ct [c;d] "rok" "subst:sg:$C:m3"] | _ -> failwith "abr_patterns"); - [O "w"; S "."; O "w"; S "."], (function [a;b;c;d] -> [ct [a;b] "wysoko" "adv:com"; ct [c;d] "wymienić" "ppas:_:_:_:perf:aff"] | _ -> failwith "abr_patterns"); - [O "w"; O "/"; O "m"], (function [a;b;c] -> [ct [a;b] "w" "prep:loc"; ct [c] "miejsce" "subst:_:loc:m3"] | _ -> failwith "abr_patterns"); - [O "w"; O "/"; O "w"], (function [a;b;c] -> [ct [a;b] "wysoko" "adv:com"; ct [c] "wymienić" "ppas:_:_:_:perf:aff"] | _ -> failwith "abr_patterns"); - [O "ws"; S "."], (function [a;b] -> std a b [1,"w","prep:loc:nwok";1,"sprawa","subst:sg:loc:f"] | _ -> failwith "abr_patterns"); - [O "ww"; S "."], (function [a;b] -> std a b [1,"wysoko","adv:com";1,"wymieniony","ppas:_:_:_:perf:aff"] | _ -> failwith "abr_patterns"); - ] diff --git a/tokenizer/eniam-tokenizer-1.0/ENIAMpatterns.ml b/tokenizer/eniam-tokenizer-1.0/ENIAMpatterns.ml deleted file mode 100644 index fa09369..0000000 --- a/tokenizer/eniam-tokenizer-1.0/ENIAMpatterns.ml +++ /dev/null @@ -1,415 +0,0 @@ -(* - * ENIAMtokenizer, a tokenizer for Polish - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences - * - * This library is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - *) - -open Xstd -open Printf -open ENIAMtokenizerTypes - -let rec flatten_tokens rev_variants = function - | [] -> rev_variants - | Token t :: l -> flatten_tokens (Xlist.map rev_variants (fun rev_variant -> Token t :: rev_variant)) l - | Seq seq :: l -> flatten_tokens rev_variants (seq @ l) - | Variant variants :: l -> flatten_tokens (List.flatten (Xlist.map variants (fun variant -> flatten_tokens rev_variants [variant]))) l - -let rec normalize_tokens rev = function - [] -> List.rev rev - | Token t :: l -> normalize_tokens (Token t :: rev) l - | Seq seq :: l -> normalize_tokens rev (seq @ l) - | Variant[t] :: l -> normalize_tokens rev (t :: l) - | Variant variants :: l -> - let variants = flatten_tokens [[]] [Variant variants] in - let variants = Xlist.map variants (fun rev_seq -> - match List.rev rev_seq with - [] -> failwith "normalize_tokens" - | [t] -> t - | seq -> Seq seq) in - let t = match variants with - [] -> failwith "normalize_tokens" - | [t] -> t - | variants -> Variant variants in - normalize_tokens (t :: rev) l - -let concat_orths l = - String.concat "" (Xlist.map l (fun t -> t.orth)) - -let concat_orths2 l = - String.concat "" (Xlist.map l (fun t -> ENIAMtokens.get_orth t.token)) - -let concat_intnum = function - [{token=Dig(v4,_)};_;{token=Dig(v3,_)};_;{token=Dig(v2,_)};_;{token=Dig(v1,_)}] -> v4^v3^v2^v1 - | [{token=Dig(v3,_)};_;{token=Dig(v2,_)};_;{token=Dig(v1,_)}] -> v3^v2^v1 - | [{token=Dig(v2,_)};_;{token=Dig(v1,_)}] -> v2^v1 - | [{token=Dig(v1,_)}] -> v1 - | _ -> failwith "concat_intnum" - -let dig_value t = - match t.token with - Dig(v,_) -> v - | _ -> failwith "dig_value" - -(* FIXME: problem z ordnum - wyklucza year co stanowi problem na końcu zdania *) -let digit_patterns1 = [ (* FIXME: problem z nadmiarowymi interpretacjami - trzeba uwzględnić w preprocesingu brak spacji - albo w dezambiguacji *) - [D "dig"; S "."; D "dig"; S "."; D "dig"; S "."; D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"])); - [D "dig"; S "."; D "dig"; S "."; D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"])); - [D "dig"; S "."; D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"])); - [D "dig"; S "."; D "dig"], (fun tokens -> Proper(concat_orths tokens,"obj-id",[[]],["obj-id"])); -(* [D "dig"], "obj-id"; *) - [D "pref3dig"; S "."; D "3dig"; S "."; D "3dig"; S "."; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum")); - [D "pref3dig"; S "."; D "3dig"; S "."; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum")); - [D "pref3dig"; S "."; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum")); - [D "pref3dig"; S " "; D "3dig"; S " "; D "3dig"; S " "; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum")); - [D "pref3dig"; S " "; D "3dig"; S " "; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum")); - [D "pref3dig"; S " "; D "3dig"], (fun tokens -> Dig(concat_intnum tokens,"intnum")); - [D "intnum"; S "."], (function [token;_] -> Dig(concat_intnum [token],"ordnum") | _ -> failwith "digit_patterns1"); (* FIXME: to nie powinno wykluczać innych interpretacji *) - [D "day"; S "."; D "month"; S "."; D "year"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns2"); - [D "day"; S "."; RD "month"; S "."; D "year"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns3"); - [D "day"; S " "; RD "month"; S " "; D "year"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns3"); - [D "day"; S "."; D "month"; S "."; D "2dig"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns2"); - [D "day"; S "."; RD "month"; S "."; D "2dig"], (function [day;_;month;_;year] -> Compound("date",[day.token;month.token;year.token]) | _ -> failwith "digit_patterns3"); - [D "day"; S "."; D "month"; S "."], (function [day;_;month;_] -> Compound("day-month",[day.token;month.token]) | _ -> failwith "digit_patterns4"); - [D "hour"; S "."; D "minute"], (function [hour;_;minute] -> Compound("hour-minute",[hour.token;minute.token]) | _ -> failwith "digit_patterns5"); - [D "hour"; S ":"; D "minute"], (function [hour;_;minute] -> Compound("hour-minute",[hour.token;minute.token]) | _ -> failwith "digit_patterns6"); - [D "intnum"; S ":"; D "intnum"], (function [x;_;y] -> Compound("match-result",[x.token;y.token]) | _ -> failwith "digit_patterns7"); - ] (* bez 1 i *2 *3 *4 mamy rec *) (* w morfeuszu zawsze num:pl?*) - -let digit_patterns2 = [ - [D "intnum"; S ","; D "dig"], (function [x;_;y] -> Dig(dig_value x ^ "," ^ dig_value y,"realnum") | _ -> failwith "digit_patterns8"); -(* [S "-"; D "intnum"; S ","; D "dig"], (function [_;x;_;y] -> Dig("-" ^ dig_value x ^ "," ^ dig_value y,"realnum") | _ -> failwith "digit_patterns9"); - [S "-"; D "intnum"], (function [_;x] -> Dig("-" ^ dig_value x,"realnum") | _ -> failwith "digit_patterns10");*) - [S "’"; D "2dig"], (function [_;x] -> Dig("’" ^ dig_value x,"year") | _ -> failwith "digit_patterns12"); -(* [D "intnum"], "realnum"; *) - ] - -let compose_latek_lemma t interp = - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-latek", interp) - -let compose_latka_lemma t interp = - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-latka", interp) - -let compose_latek_int_lemma t t2 interp = - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-" ^ ENIAMtokens.get_orth t2.token ^ "-latek", interp) - -let compose_latka_int_lemma t t2 interp = - ENIAMtokens.make_lemma (ENIAMtokens.get_orth t.token ^ "-" ^ ENIAMtokens.get_orth t2.token ^ "-latka", interp) - -let digit_patterns3 = [ - [S "-"; D "intnum"], (function [_;x] -> Dig("-" ^ dig_value x,"intnum") | _ -> failwith "digit_patterns10"); - [S "-"; D "realnum"], (function [_;x] -> Dig("-" ^ dig_value x,"realnum") | _ -> failwith "digit_patterns10"); - [D "intnum"; S "-"; D "intnum"], (function [x;_;y] -> Compound("intnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns11"); - [D "realnum"; S "-"; D "realnum"], (function [x;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *) - [D "intnum"; S "-"; D "realnum"], (function [x;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *) - [D "realnum"; S "-"; D "intnum"], (function [x;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *) - [C "date"; S "-"; C "date"], (function [x;_;y] -> Compound("date-interval",[x.token;y.token]) | _ -> failwith "digit_patterns13"); - [C "day-month"; S "-"; C "day-month"], (function [x;_;y] -> Compound("day-month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns14"); - [D "day"; S "-"; D "day"], (function [x;_;y] -> Compound("day-interval",[x.token;y.token]) | _ -> failwith "digit_patterns15"); - [D "month"; S "-"; D "month"], (function [x;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16"); - [RD "month"; S "-"; RD "month"], (function [x;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns17"); - [D "year"; S "-"; D "year"], (function [x;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16"); - [D "year"; S "-"; D "2dig"], (function [x;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16"); - [C "hour-minute"; S "-"; C "hour-minute"], (function [x;_;y] -> Compound("hour-minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns18"); - [D "hour"; S "-"; D "hour"], (function [x;_;y] -> Compound("hour-interval",[x.token;y.token]) | _ -> failwith "digit_patterns19"); - [D "minute"; S "-"; D "minute"], (function [x;_;y] -> Compound("minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns20"); - [RD "roman"; S "-"; RD "roman"], (function [x;_;y] -> Compound("roman-interval",[x.token;y.token]) | _ -> failwith "digit_patterns21"); - [D "intnum"; S " "; S "-"; S " "; D "intnum"], (function [x;_;_;_;y] -> Compound("intnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns11"); - [D "realnum"; S " "; S "-"; S " "; D "realnum"], (function [x;_;_;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *) - [D "intnum"; S " "; S "-"; S " "; D "realnum"], (function [x;_;_;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *) - [D "realnum"; S " "; S "-"; S " "; D "intnum"], (function [x;_;_;_;y] -> Compound("realnum-interval",[x.token;y.token]) | _ -> failwith "digit_patterns12"); (* FIXME: konflikt z liczbami ujemnymi *) - [C "date"; S " "; S "-"; S " "; C "date"], (function [x;_;_;_;y] -> Compound("date-interval",[x.token;y.token]) | _ -> failwith "digit_patterns13"); - [C "day-month"; S " "; S "-"; S " "; C "day-month"], (function [x;_;_;_;y] -> Compound("day-month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns14"); - [D "day"; S " "; S "-"; S " "; D "day"], (function [x;_;_;_;y] -> Compound("day-interval",[x.token;y.token]) | _ -> failwith "digit_patterns15"); - [D "month"; S " "; S "-"; S " "; D "month"], (function [x;_;_;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16"); - [RD "month"; S " "; S "-"; S " "; RD "month"], (function [x;_;_;_;y] -> Compound("month-interval",[x.token;y.token]) | _ -> failwith "digit_patterns17"); - [D "year"; S " "; S "-"; S " "; D "year"], (function [x;_;_;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16"); - [D "year"; S " "; S "-"; S " "; D "2dig"], (function [x;_;_;_;y] -> Compound("year-interval",[x.token;y.token]) | _ -> failwith "digit_patterns16"); - [C "hour-minute"; S " "; S "-"; S " "; C "hour-minute"], (function [x;_;_;_;y] -> Compound("hour-minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns18"); - [D "hour"; S " "; S "-"; S " "; D "hour"], (function [x;_;_;_;y] -> Compound("hour-interval",[x.token;y.token]) | _ -> failwith "digit_patterns19"); - [D "minute"; S " "; S "-"; S " "; D "minute"], (function [x;_;_;_;y] -> Compound("minute-interval",[x.token;y.token]) | _ -> failwith "digit_patterns20"); - [RD "roman"; S " "; S "-"; S " "; RD "roman"], (function [x;_;_;_;y] -> Compound("roman-interval",[x.token;y.token]) | _ -> failwith "digit_patterns21"); - [D "intnum"; S "-"; O "latek"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:nom:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latka"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:gen.acc:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latkowi"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:dat:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latkiem"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:inst:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latku"], (function [x;_;_] -> compose_latek_lemma x "subst:sg:loc.voc:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latkowie"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:nom.voc:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latków"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:gen.acc:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latkom"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:dat:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latkami"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:inst:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latkach"], (function [x;_;_] -> compose_latek_lemma x "subst:pl:loc:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latka"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:nom:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latki"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:gen:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latce"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:dat.loc:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latkę"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:acc:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latką"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:inst:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latko"], (function [x;_;_] -> compose_latka_lemma x "subst:sg:voc:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latki"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:nom.acc.voc:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latek"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:gen:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latkom"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:dat:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latkami"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:inst:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; O "latkach"], (function [x;_;_] -> compose_latka_lemma x "subst:pl:loc:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latek"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:nom:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latka"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:gen.acc:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkowi"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:dat:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkiem"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:inst:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latku"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:sg:loc.voc:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkowie"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:nom.voc:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latków"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:gen.acc:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkom"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:dat:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkami"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:inst:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkach"], (function [x;_;y;_;_] -> compose_latek_int_lemma x y "subst:pl:loc:m1" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latka"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:nom:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latki"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:gen:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latce"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:dat.loc:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkę"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:acc:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latką"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:inst:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latko"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:sg:voc:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latki"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:nom.acc.voc:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latek"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:gen:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkom"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:dat:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkami"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:inst:f" | _ -> failwith "digit_patterns22"); - [D "intnum"; S "-"; D "intnum"; S "-"; O "latkach"], (function [x;_;y;_;_] -> compose_latka_int_lemma x y "subst:pl:loc:f" | _ -> failwith "digit_patterns22"); - ] - -let url_patterns1 = [ - [L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "-"; L; S "."; O "pl"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "-"; L; S "."; O "uk"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "-"; L; S "."; O "cz"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "-"; L; S "."; O "eu"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "-"; L; S "."; O "org"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "-"; L; S "."; O "com"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "-"; L; S "."; O "net"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "."; L; S "."; L; S "."; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url")); - [L; S "."; L; S "-"; L; S "."; O "gov"], (function l -> Dig(concat_orths2 l,"url")); - ] - -let url_patterns2 = [ - [L; S "@"; D "url"], (function l -> Dig(concat_orths2 l,"email")); - [O "http"; S ":"; S "/"; S "/"; D "url"], (function l -> Dig(concat_orths2 l,"email")); - ] - -let url_patterns3 = [ - [D "url"; S "/"], (function l -> Dig(concat_orths2 l,"url")); - [D "url"; S "/"; L], (function l -> Dig(concat_orths2 l,"url")); - [D "url"; S "/"; L; S "."; L], (function l -> Dig(concat_orths2 l,"url")); - ] - -type matching = { - prefix: tokens list; - matched: token_record list; - suffix: tokens list; - pattern: pat list; - command: token_record list -> token; - command_abr: token_record list -> tokens list; - } - -let execute_command matching = - let l = List.rev matching.matched in - let len = Xlist.fold l 0 (fun len t -> t.len + len) in - Seq((List.rev matching.prefix) @ [Token{empty_token with - orth=concat_orths l; - beg=(List.hd l).beg; - len=len; - next=(List.hd l).beg+len; - token=matching.command l; - (*weight=0.;*) (* FIXME: dodać wagi do konkretnych reguł i uwzględnić wagi maczowanych tokenów *) - attrs=ENIAMtokens.merge_attrs l}] @ matching.suffix) - -let execute_abr_command matching = - let l = List.rev matching.matched in - Seq((List.rev matching.prefix) @ (matching.command_abr l) @ matching.suffix) - -let match_token = function - D cat, Dig(_,cat2) -> cat = cat2 - | C s, Compound(s2,_) -> s = s2 - | S s, Symbol s2 -> s = s2 - | RD cat, RomanDig(_,cat2) -> cat = cat2 - | O pat, Dig(s,"dig") -> pat = s - | O pat, Symbol s -> pat = s - | O pat, SmallLetter orth -> pat = orth - | O pat, CapLetter(orth,lc) -> pat = orth - | O pat, AllSmall orth -> pat = orth - | O pat, AllCap(orth,lc,lc2) -> pat = orth - | O pat, FirstCap(orth,lc,_,_) -> pat = orth - | O pat, SomeCap orth -> pat = orth - | L, SmallLetter _ -> true - | L, CapLetter _ -> true - | L, AllSmall _ -> true - | L, AllCap _ -> true - | L, FirstCap _ -> true - | L, SomeCap _ -> true - | CL, CapLetter _ -> true - | CL, AllCap _ -> true - | CL, SomeCap _ -> true - | _ -> false - -let rec find_first_token matching pat = function - Token t -> if match_token (pat,t.token) then [{matching with matched = t :: matching.matched}] else [] - | Seq l -> Xlist.map (find_first_token matching pat (List.hd (List.rev l))) (fun matching -> {matching with prefix = matching.prefix @ (List.tl (List.rev l))}) - | Variant l -> List.flatten (Xlist.map l (find_first_token matching pat)) - -let rec find_middle_token matching pat = function - Token t -> if match_token (pat,t.token) then [{matching with matched = t :: matching.matched}] else [] - | Seq _ -> [] - | Variant l -> List.flatten (Xlist.map l (find_middle_token matching pat)) - -let rec find_last_token matching pat = function - Token t -> if match_token (pat,t.token) then [{matching with matched = t :: matching.matched}] else [] - | Seq l -> Xlist.map (find_last_token matching pat (List.hd l)) (fun matching -> {matching with suffix = matching.suffix @ (List.tl l)}) - | Variant l -> List.flatten (Xlist.map l (find_last_token matching pat)) - -let rec find_pattern_tail matchings = function - [] -> raise Not_found - | token :: l -> - let found,finished = Xlist.fold matchings ([],[]) (fun (found,finished) matching -> - match matching.pattern with - [pat] -> found, (find_last_token {matching with pattern=[]} pat token) @ finished - | pat :: pattern -> (find_middle_token {matching with pattern=pattern} pat token) @ found, finished - | _ -> failwith "find_pattern: ni") in - (try - if found = [] then raise Not_found else - find_pattern_tail found l - with Not_found -> - let finished = List.flatten (Xlist.map finished (fun matching -> try [execute_command matching] with Not_found -> [])) in - if finished = [] then raise Not_found else Variant finished,l) - -(* wzorce nie mogą mieć długości 1 *) -let rec find_pattern matchings rev = function - token :: l -> - let found = Xlist.fold matchings [] (fun found matching -> - match matching.pattern with - pat :: pattern -> (find_first_token {matching with pattern=pattern} pat token) @ found - | [] -> failwith "find_pattern: empty pattern") in - if found = [] then find_pattern matchings (token :: rev) l else - (try - let token,l = find_pattern_tail found l in - find_pattern matchings (token :: rev) l - with Not_found -> find_pattern matchings (token :: rev) l) - | [] -> List.rev rev - -let find_patterns patterns tokens = - find_pattern (Xlist.map patterns (fun (pattern,command) -> - {prefix=[]; matched=[]; suffix=[]; pattern=pattern; command=command; command_abr=(fun _ -> [])})) [] tokens - -let rec find_abr_pattern_tail matchings = function - [] -> raise Not_found - | token :: l -> - let found,finished = Xlist.fold matchings ([],[]) (fun (found,finished) matching -> - match matching.pattern with - [pat] -> found, (find_last_token {matching with pattern=[]} pat token) @ finished - | pat :: pattern -> (find_middle_token {matching with pattern=pattern} pat token) @ found, finished - | [] -> found, matching :: finished) in - (try - if found = [] then raise Not_found else - find_abr_pattern_tail found l - with Not_found -> - let finished = List.flatten (Xlist.map finished (fun matching -> try [execute_abr_command matching] with Not_found -> [])) in - if finished = [] then raise Not_found else Variant finished,l) - -let rec find_abr_pattern matchings rev = function - token :: l -> - let found = Xlist.fold matchings [] (fun found matching -> - match matching.pattern with - pat :: pattern -> (find_first_token {matching with pattern=pattern} pat token) @ found - | [] -> failwith "find_abr_pattern: empty pattern") in - if found = [] then find_abr_pattern matchings (token :: rev) l else - (try - let token,l = find_abr_pattern_tail found l in - find_abr_pattern matchings (token :: rev) l - with Not_found -> find_abr_pattern matchings (token :: rev) l) - | [] -> List.rev rev - -let find_abr_patterns patterns tokens = - find_abr_pattern (Xlist.map patterns (fun (pattern,command) -> - {prefix=[]; matched=[]; suffix=[]; pattern=pattern; command=(fun _ -> Symbol ""); command_abr=command})) [] tokens - -let find_replacement_patterns tokens = - let tokens = find_patterns digit_patterns1 tokens in - let tokens = normalize_tokens [] tokens in - let tokens = find_patterns digit_patterns2 tokens in - let tokens = normalize_tokens [] tokens in - let tokens = find_patterns digit_patterns3 tokens in - let tokens = normalize_tokens [] tokens in - let tokens = find_patterns ENIAMacronyms.acronym_patterns tokens in - let tokens = normalize_tokens [] tokens in - let tokens = find_patterns ENIAMacronyms.mte_patterns tokens in - let tokens = normalize_tokens [] tokens in -(* Xlist.iter tokens (fun t -> print_endline (ENIAMtokens.string_of_tokens 0 t)); *) - let tokens = find_patterns ENIAMacronyms.name_patterns tokens in -(* Xlist.iter tokens (fun t -> print_endline (ENIAMtokens.string_of_tokens 0 t)); *) - let tokens = normalize_tokens [] tokens in - let tokens = find_patterns url_patterns1 tokens in - let tokens = normalize_tokens [] tokens in - let tokens = find_patterns url_patterns2 tokens in - let tokens = normalize_tokens [] tokens in - let tokens = find_patterns url_patterns3 tokens in - let tokens = normalize_tokens [] tokens in -(* Xlist.iter tokens (fun t -> print_endline (ENIAMtokens.string_of_tokens 0 t)); *) - tokens - -let rec set_next_id n = function - Token t -> Token{t with next=n} - | Seq l -> - (match List.rev l with - t :: l -> Seq(List.rev ((set_next_id n t) :: l)) - | [] -> failwith "set_next_id n") - | Variant l -> Variant(Xlist.map l (set_next_id n)) - -let rec remove_spaces rev = function - [] -> List.rev rev - | x :: Token{token=Symbol " "; next=n} :: l -> remove_spaces ((set_next_id n x) :: rev) l - | Token{token=Symbol " "} :: l -> remove_spaces rev l - | x :: l -> remove_spaces (x :: rev) l diff --git a/tokenizer/eniam-tokenizer-1.0/ENIAMtokenizer.ml b/tokenizer/eniam-tokenizer-1.0/ENIAMtokenizer.ml deleted file mode 100644 index c8dcd84..0000000 --- a/tokenizer/eniam-tokenizer-1.0/ENIAMtokenizer.ml +++ /dev/null @@ -1,34 +0,0 @@ -(* - * ENIAMtokenizer, a tokenizer for Polish - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences - * - * This library is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - *) - -open Xstd -open ENIAMtokenizerTypes - -let string_of = - ENIAMtokens.string_of_tokens - -let parse query = - let l = Xunicode.classified_chars_of_utf8_string query in - let l = ENIAMtokens.tokenize l in - let l = ENIAMpatterns.normalize_tokens [] l in - let l = ENIAMpatterns.find_replacement_patterns l in - let l = ENIAMpatterns.remove_spaces [] l in - let l = ENIAMpatterns.find_abr_patterns ENIAMacronyms.abr_patterns l in - let l = ENIAMpatterns.normalize_tokens [] l in - l diff --git a/tokenizer/eniam-tokenizer-1.0/ENIAMtokenizerTypes.ml b/tokenizer/eniam-tokenizer-1.0/ENIAMtokenizerTypes.ml deleted file mode 100644 index e007915..0000000 --- a/tokenizer/eniam-tokenizer-1.0/ENIAMtokenizerTypes.ml +++ /dev/null @@ -1,73 +0,0 @@ -(* - * ENIAMtokenizer, a tokenizer for Polish - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences - * - * This library is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - *) - -open Xstd - -(* Długość pojedynczego znaku w tekście *) -let factor = 100 - -type token = - SmallLetter of string (* orth *) - | CapLetter of string * string (* orth * lowercase *) - | AllSmall of string (* orth *) - | AllCap of string * string * string (* orth * lowercase * all lowercase *) - | FirstCap of string * string * string * string (* orth * all lowercase * first letter uppercase * first letter lowercase *) - | SomeCap of string (* orth *) - | RomanDig of string * string (* value * cat *) - | Interp of string (* orth *) - | Symbol of string (* orth *) - | Dig of string * string (* value * cat *) - | Other of string (* orth *) - | Lemma of string * string * string list list list (* lemma * cat * interp *) - | Proper of string * string * string list list list * string list (* lemma * cat * interp * senses *) -(* | Sense of string * string * string list list list * (string * string * string list) list (* lemma * cat * interp * senses *) *) - | Compound of string * token list (* sense * components *) - (* | Tokens of string * int list (*cat * token id list *) *) - -(* Tekst reprezentuję jako zbiór obiektów typu token_record zawierających - informacje o poszczególnych tokenach *) -and token_record = { - orth: string; (* sekwencja znaków pierwotnego tekstu składająca się na token *) - corr_orth: string; (* sekwencja znaków pierwotnego tekstu składająca się na token z poprawionymi błędami *) - beg: int; (* pozycja początkowa tokenu względem początku akapitu *) - len: int; (* długość tokenu *) - next: int; (* pozycja początkowa następnego tokenu względem początku akapitu *) - token: token; (* treść tokenu *) - attrs: string list; (* dodatkowe atrybuty *) - } - -(* Tokeny umieszczone są w strukturze danych umożliwiającej efektywne wyszukiwanie ich sekwencji, - struktura danych sama z siebie nie wnosi informacji *) -type tokens = - | Token of token_record - | Variant of tokens list - | Seq of tokens list - -type pat = L | CL | D of string | C of string | S of string | RD of string | O of string - -let empty_token = { - orth="";corr_orth="";beg=0;len=0;next=0; token=Symbol ""; attrs=[]} - -let config = - try File.load_attr_val_pairs "config-tokenizer" - with _ -> (print_endline "ENIAMtokenizer config file not found"; []) - -let mte_filename = - try Xlist.assoc config "MTE_FILENAME" - with Not_found -> (print_endline "ENIAMtokenizer MTE_FILENAME config variable undefined"; "") diff --git a/tokenizer/eniam-tokenizer-1.0/ENIAMtokens.ml b/tokenizer/eniam-tokenizer-1.0/ENIAMtokens.ml deleted file mode 100644 index 45b30d2..0000000 --- a/tokenizer/eniam-tokenizer-1.0/ENIAMtokens.ml +++ /dev/null @@ -1,923 +0,0 @@ -(* - * ENIAMtokenizer, a tokenizer for Polish - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences - * - * This library is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - *) - -open Printf -open ENIAMtokenizerTypes -open Xstd -open Xunicode - -let string_of_interps interps = - String.concat "|" (Xlist.map interps (fun interp -> - (String.concat ":" (Xlist.map interp (fun interp2 -> - (String.concat "." interp2)))))) - -let rec string_of_token = function - SmallLetter orth -> sprintf "SmallLetter(%s)" orth - | CapLetter(orth,lc) -> sprintf "CapLetter(%s,%s)" orth lc - | AllSmall orth -> sprintf "AllSmall(%s)" orth - | AllCap(orth,lc,lc2) -> sprintf "AllCap(%s,%s,%s)" orth lc lc2 - | FirstCap(orth,lc,cl,ll) -> sprintf "FirstCap(%s,%s,%s,%s)" orth lc cl ll - | SomeCap orth -> sprintf "SomeCap(%s)" orth - | RomanDig(v,t) -> sprintf "RomanDig(%s,%s)" v t - | Interp orth -> sprintf "Interp(%s)" orth - | Symbol orth -> sprintf "Symbol(%s)" orth - | Dig(v,t) -> sprintf "Dig(%s,%s)" v t - | Other orth -> sprintf "Other(%s)" orth - | Lemma(lemma,cat,interps) -> sprintf "Lemma(%s,%s,%s)" lemma cat (string_of_interps interps) - | Proper(lemma,cat,interps,senses) -> sprintf "Proper(%s,%s,%s,%s)" lemma cat (string_of_interps interps) (String.concat "|" senses) -(* | Sense(lemma,cat,interps,senses) -> sprintf "Sense(%s,%s,%s,%s)" lemma cat (string_of_interps interps) - (String.concat "|" (Xlist.map senses (fun (_,v,_) -> v)))*) - | Compound(sense,l) -> sprintf "Compound(%s,[%s])" sense (String.concat ";" (Xlist.map l string_of_token)) - (* | Tokens(cat,l) -> sprintf "Tokens(%s,%s)" cat (String.concat ";" (Xlist.map l string_of_int)) *) - -let rec spaces i = - if i = 0 then "" else " " ^ spaces (i-1) - -let rec string_of_tokens i = function - Token t -> sprintf "%s{orth=%s;beg=%d;len=%d;next=%d;token=%s;attrs=[%s]}" (spaces i) t.orth t.beg t.len t.next (string_of_token t.token) - (String.concat ";" t.attrs) - | Variant l -> sprintf "%sVariant[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1)))) - | Seq l -> sprintf "%sSeq[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1)))) - -let rec string_of_token_simple = function - SmallLetter orth -> "SmallLetter" - | CapLetter(orth,lc) -> "CapLetter" - | AllSmall orth -> "AllSmall" - | AllCap(orth,lc,lc2) -> "AllCap" - | FirstCap(orth,lc,_,_) -> "FirstCap" - | SomeCap orth -> "SomeCap" - | RomanDig(v,t) -> "RomanDig" - | Interp orth -> sprintf "Interp(%s)" orth - | Symbol orth -> sprintf "Symbol(%s)" orth - | Dig(v,t) -> "Dig" - | Other orth -> sprintf "Other(%s)" orth - | Lemma(lemma,cat,interp) -> "Lemma" - | Proper(lemma,cat,interp,sense) -> "Proper" -(* | Sense(lemma,cat,interp,sense) -> "Sense" *) - | Compound(sense,l) -> sprintf "Compound" - (* | Tokens _ -> sprintf "Tokens" *) - -let rec string_of_tokens_simple = function - Token t -> string_of_token_simple t.token - | Variant l -> sprintf "Variant[%s]" (String.concat ";" (Xlist.map l string_of_tokens_simple)) - | Seq l -> sprintf "Seq[%s]" (String.concat ";" (Xlist.map l string_of_tokens_simple)) - -let get_orth = function - SmallLetter orth -> orth - | CapLetter(orth,lc) -> orth - | AllSmall orth -> orth - | AllCap(orth,lc,lc2) -> orth - | FirstCap(orth,lc,_,_) -> orth - | SomeCap orth -> orth - | Symbol orth -> orth - | Dig(v,_) -> v - | Other orth -> orth - | _ -> ""(*failwith "get_orth"*) - - -let months = StringSet.of_list ["1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; "01"; "02"; "03"; "04"; "05"; "06"; "07"; "08"; "09"; "10"; "11"; "12"] -let hours = StringSet.of_list ["0"; "1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; "00"; "01"; "02"; "03"; "04"; "05"; "06"; "07"; "08"; "09"; - "10"; "11"; "12"; "13"; "14"; "15"; "16"; "17"; "18"; "19"; "20"; "21"; "22"; "23"; "24"] -let days = StringSet.of_list ["1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; "01"; "02"; "03"; "04"; "05"; "06"; "07"; "08"; "09"; - "10"; "11"; "12"; "13"; "14"; "15"; "16"; "17"; "18"; "19"; "20"; "21"; "22"; "23"; "24"; "25"; "26"; "27"; "28"; "29"; "30"; "31"] -let romanmonths = StringSet.of_list ["I"; "II"; "III"; "IV"; "V"; "VI"; "VII"; "VIII"; "IX"; "X"; "XI"; "XII"] - - -let s_beg i = Token{empty_token with beg=i;len=1;next=i+1; token=Interp "<sentence>"} -let c_beg i = Token{empty_token with beg=i;len=1;next=i+1; token=Interp "<clause>"} - -let dig_token orth i digs token = - Token{empty_token with orth=orth;beg=i;len=Xlist.size digs * factor;next=i+Xlist.size digs * factor; token=token; attrs=["maybe cs"]} - -let sc_dig_token orth i digs token = - Seq[s_beg i;c_beg (i+1);Token{empty_token with orth=orth;beg=i+2;len=Xlist.size digs * factor - 2;next=i+Xlist.size digs * factor; token=token; attrs=["maybe cs"]}] - -let dig_tokens orth poss_s_beg i digs v cat = - if poss_s_beg then - [dig_token orth i digs (Dig(v,cat)); - sc_dig_token orth i digs (Dig(v,cat))] - else - [dig_token orth i digs (Dig(v,cat))] - -let merge_digits poss_s_beg i digs = - let orth = String.concat "" digs in - let t = dig_tokens orth poss_s_beg i digs in - let v = try string_of_int (int_of_string orth) with _ -> failwith "merge_digits" in - let variants = - (t orth "dig") @ - [Token{empty_token with orth=orth;beg=i;len=Xlist.size digs * factor;next=i+Xlist.size digs * factor; token=Proper(orth,"obj-id",[[]],["obj-id"]); attrs=["maybe cs"]}] @ - (if digs = ["0"] || List.hd digs <> "0" then (t orth "intnum")(* @ (t orth "realnum")*) else []) @ - (if List.hd digs <> "0" then (t v "year") else []) @ - (if StringSet.mem months orth then (t v "month") else []) @ - (if StringSet.mem hours orth then (t v "hour") else []) @ - (if StringSet.mem days orth then (t v "day") else []) @ - (if Xlist.size digs = 2 && List.hd digs < "6" then (t v "minute") else []) @ - (if Xlist.size digs = 3 then (t orth "3dig") else []) @ - (if Xlist.size digs = 2 then (t orth "2dig") else []) @ - (if Xlist.size digs <= 3 && List.hd digs <> "0" then (t orth "pref3dig") else []) in -(* let t = dig_token orth i digs in - let sc_t = sc_dig_token orth i digs in - let v = try int_of_string orth with _ -> failwith "merge_digits" in - let variants = - [t (Dig(v,"dig"));sc_t (Dig(v,"dig"))] @ - (if digs = ["0"] || List.hd digs <> "0" then [t (Dig(v,"intnum"));sc_t (Dig(v,"intnum"))] else []) @ - (if List.hd digs <> "0" then [t (Dig(v,"year"));sc_t (Dig(v,"year"))] else []) @ - (if StringSet.mem months orth then [t (Dig(v,"month"));sc_t (Dig(v,"month"))] else []) @ - (if StringSet.mem hours orth then [t (Dig(v,"hour"));sc_t (Dig(v,"hour"))] else []) @ - (if StringSet.mem days orth then [t (Dig(v,"day"));sc_t (Dig(v,"day"))] else []) @ - (if Xlist.size digs = 2 && List.hd digs < "6" then [t (Dig(v,"minute"));sc_t (Dig(v,"minute"))] else []) @ - (if Xlist.size digs = 3 then [t (Dig(v,"3dig"));sc_t (Dig(v,"3dig"))] else []) @ - (if Xlist.size digs <= 3 && List.hd digs <> "0" then [t (Dig(v,"pref3dig"));sc_t (Dig(v,"pref3dig"))] else []) in*) - Variant variants - -let recognize_roman_I v = function - Capital("I",_) :: Capital("I",_) :: Capital("I",_) :: [] -> v+3,false - | Capital("I",_) :: Capital("I",_) :: [] -> v+2,false - | Capital("I",_) :: [] -> v+1,false - | [] -> v,false - | Capital("I",_) :: Capital("I",_) :: Capital("I",_) :: Small("w") :: [] -> v+3,true - | Capital("I",_) :: Capital("I",_) :: Small("w") :: [] -> v+2,true - | Capital("I",_) :: Small("w") :: [] -> v+1,true - | Small("w") :: [] -> v,true - | _ -> 0,false - -let recognize_roman_V v = function - Capital("I",_) :: ForeignCapital("V",_) :: [] -> v+4,false - | ForeignCapital("V",_) :: l -> recognize_roman_I (v+5) l - | Capital("I",_) :: ForeignCapital("X",_) :: [] -> v+9,false - | Capital("I",_) :: ForeignCapital("V",_) :: Small("w") :: [] -> v+4,true - | Capital("I",_) :: ForeignCapital("X",_) :: Small("w") :: [] -> v+9,true - | l -> recognize_roman_I v l - -let recognize_roman_X v = function - | ForeignCapital("X",_) :: ForeignCapital("X",_) :: ForeignCapital("X",_) :: l -> recognize_roman_V (v+30) l - | ForeignCapital("X",_) :: ForeignCapital("X",_) :: l -> recognize_roman_V (v+20) l - | ForeignCapital("X",_) :: l -> recognize_roman_V (v+10) l - | l -> recognize_roman_V v l - -let recognize_roman_L v = function - ForeignCapital("X",_) :: Capital("L",_) :: l -> recognize_roman_V (v+40) l - | Capital("L",_) :: l -> recognize_roman_X (v+50) l - | ForeignCapital("X",_) :: Capital("C",_) :: l -> recognize_roman_V (v+90) l - | l -> recognize_roman_X v l - -let recognize_roman_C v = function - | Capital("C",_) :: Capital("C",_) :: Capital("C",_) :: l -> recognize_roman_L (v+300) l - | Capital("C",_) :: Capital("C",_) :: l -> recognize_roman_L (v+200) l - | Capital("C",_) :: l -> recognize_roman_L (v+100) l - | l -> recognize_roman_L v l - -let recognize_roman_D v = function - Capital("C",_) :: Capital("D",_) :: l -> recognize_roman_L (v+400) l - | Capital("D",_) :: l -> recognize_roman_C (v+500) l - | Capital("C",_) :: Capital("M",_) :: l -> recognize_roman_L (v+900) l - | l -> recognize_roman_C v l - -let recognize_roman_M v = function - | Capital("M",_) :: Capital("M",_) :: Capital("M",_) :: l -> recognize_roman_D (v+3000) l - | Capital("M",_) :: Capital("M",_) :: l -> recognize_roman_D (v+2000) l - | Capital("M",_) :: l -> recognize_roman_D (v+1000) l - | l -> recognize_roman_D v l - -let rec merge l = - String.concat "" (Xlist.map l (function - Capital(s,t) -> s - | ForeignCapital(s,t) -> s - | Small s -> s - | ForeignSmall s -> s - | _ -> failwith "merge")) - -let lowercase_first = function - [] -> [] - | Capital(s,t) :: l -> Small t :: l - | ForeignCapital(s,t) :: l -> ForeignSmall t :: l - | Small s :: l -> Small s :: l - | ForeignSmall s :: l -> ForeignSmall s :: l - | _ -> failwith "lowercase_first" - -let rec lowercase_all = function - [] -> [] - | Capital(s,t) :: l -> Small t :: lowercase_all l - | ForeignCapital(s,t) :: l -> ForeignSmall t :: lowercase_all l - | Small s :: l -> Small s :: lowercase_all l - | ForeignSmall s :: l -> ForeignSmall s :: lowercase_all l - | _ -> failwith "lowercase_all" - -let lowercase_rest = function - [] -> [] - | x :: l -> x :: lowercase_all l - -let first_capital = function - Capital _ :: _ -> true - | ForeignCapital _ :: _ -> true - | Small _ :: _ -> false - | ForeignSmall _ :: _ -> false - | _ -> failwith "first_capital" - -let rec all_capital = function - Capital _ :: l -> all_capital l - | ForeignCapital _ :: l -> all_capital l - | Small _ :: l -> false - | ForeignSmall _ :: l -> false - | [] -> true - | _ -> failwith "first_capital" - -let rec all_small = function - Capital _ :: l -> false - | ForeignCapital _ :: l -> false - | Small _ :: l -> all_small l - | ForeignSmall _ :: l -> all_small l - | [] -> true - | _ -> failwith "first_capital" - -let rest_capital = function - [] -> failwith "rest_capital" - | _ :: l -> all_capital l - -let rest_small = function - [] -> failwith "rest_small" - | _ :: l -> all_small l - -let get_first_cap = function - | Capital(s,t) :: l -> s - | ForeignCapital(s,t) :: l -> s - | _ -> failwith "get_first_cap" - -let get_first_lower = function - | Capital(s,t) :: l -> t - | ForeignCapital(s,t) :: l -> t - | _ -> failwith "get_first_lower" - -(*let cs_weight = -1. -let sc_cap_weight = -0.3*) - -let is_add_attr_token = function - SmallLetter _ -> true - | CapLetter _ -> true - | AllSmall _ -> true - | AllCap _ -> true - | FirstCap _ -> true - | SomeCap _ -> true - | _ -> false - -let rec add_attr s = function - Token t -> if is_add_attr_token t.token then Token{t with attrs=s :: t.attrs} else Token t - | Variant l -> Variant(Xlist.map l (add_attr s)) - | Seq l -> Seq(Xlist.map l (add_attr s)) - -let recognize_stem poss_s_beg has_sufix i letters = - let orth = merge letters in - let t = {empty_token with orth=orth;beg=i;len=Xlist.size letters * factor;next=i+Xlist.size letters * factor} in - let t = if poss_s_beg then - if Xlist.size letters = 1 then - if first_capital letters then Variant[ - Token{t with token=SmallLetter(merge (lowercase_first letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs}; - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=SmallLetter(merge (lowercase_first letters)); attrs="maybe cs" :: t.attrs}]; - Token{t with token=CapLetter(orth,merge (lowercase_first letters)); attrs="maybe cs" :: t.attrs}; - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=CapLetter(orth,merge (lowercase_first letters)); (*weight=sc_cap_weight;*) attrs="maybe cs" :: t.attrs}]] - else Token{t with token=SmallLetter orth} - else - if first_capital letters then - if rest_small letters then Variant[ - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=AllSmall(merge (lowercase_first letters))}]; - Token{t with token=FirstCap(orth,merge (lowercase_first letters),get_first_cap letters,get_first_lower letters)}; - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=FirstCap(orth,merge (lowercase_first letters),get_first_cap letters,get_first_lower letters); (*weight=sc_cap_weight*)}]] - else if rest_capital letters then Variant([ - Token{t with token=AllSmall(merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs}; - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=AllSmall(merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs}]; - Token{t with token=FirstCap(merge (lowercase_rest letters),merge (lowercase_all letters),get_first_cap letters,get_first_lower letters); (*weight=cs_weight;*) attrs="cs" :: t.attrs}; - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=FirstCap(merge (lowercase_rest letters),merge (lowercase_all letters),get_first_cap letters,get_first_lower letters); (*weight=cs_weight+.sc_cap_weight;*) attrs="cs" :: t.attrs}]] @ - (if has_sufix then [] else [ - Token{t with token=AllCap(orth,merge (lowercase_rest letters),merge (lowercase_all letters)); attrs="maybe cs" :: t.attrs}; - Seq[s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=AllCap(orth,merge (lowercase_rest letters),merge (lowercase_all letters)); attrs="maybe cs" :: t.attrs}]])) - else Token{t with token=SomeCap orth} - else - if rest_small letters then Token{t with token=AllSmall orth} - else Token{t with token=SomeCap orth} - else - if Xlist.size letters = 1 then - if first_capital letters then Variant[ - Token{t with token=SmallLetter orth; (*weight=cs_weight;*) attrs="cs" :: t.attrs}; - Token{t with token=CapLetter(orth,merge (lowercase_first letters)); attrs="maybe cs" :: t.attrs}] - else Token{t with token=SmallLetter orth} - else - if first_capital letters then - if rest_small letters then - Token{t with token=FirstCap(orth,merge (lowercase_first letters),get_first_cap letters,get_first_lower letters)} - else if rest_capital letters then Variant([ - Token{t with token=AllSmall(merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs}; - Token{t with token=FirstCap(merge (lowercase_rest letters),merge (lowercase_all letters),get_first_cap letters,get_first_lower letters); (*weight=cs_weight;*) attrs="cs" :: t.attrs}] @ - (if has_sufix then [] else [ - Token{t with token=AllCap(orth,merge (lowercase_rest letters),merge (lowercase_all letters)); attrs="maybe cs" :: t.attrs}])) - else Token{t with token=SomeCap orth} - else - if rest_small letters then Token{t with token=AllSmall orth} - else Token{t with token=SomeCap orth} in - if has_sufix then add_attr "required validated lemmatization" t else t - -let parse_postags s = - List.map (fun s -> - match List.map (fun t -> Str.split (Str.regexp "\\.") t) (Str.split (Str.regexp ":") s) with - [pos] :: tags -> pos, tags - | _ -> failwith ("parse_postags: " ^ s)) (Str.split (Str.regexp "|") s) - -let make_lemma (lemma,interp) = - match parse_postags interp with - [pos,tags] -> Lemma(lemma,pos,[tags]) - | _ -> failwith "make_lemma" - -let merge_attrs l = -(* print_endline (String.concat " " (Xlist.map l (fun token -> "[" ^ token.orth ^ " " ^ String.concat ";" token.attrs ^ "]"))); *) - let len = Xlist.size l in - let attrs = Xlist.fold l StringQMap.empty (fun attrs token -> - Xlist.fold token.attrs attrs StringQMap.add) in - let n_cs = try StringQMap.find attrs "cs" with Not_found -> 0 in - let n_maybe_cs = try StringQMap.find attrs "maybe cs" with Not_found -> 0 in - let new_attrs = - (if n_cs > 0 then - if n_cs + n_maybe_cs = len then ["cs"] else raise Not_found - else - if n_maybe_cs = len then ["maybe cs"] else []) @ - (StringQMap.fold attrs [] (fun attrs attr _ -> if attr = "cs" || attr = "maybe cs" then attrs else attr :: attrs)) in -(* print_endline (String.concat " " new_attrs); *) - new_attrs - -let suffix_lemmata = Xlist.fold [ - "em",make_lemma ("być","aglt:sg:pri:imperf:wok"); - "eś",make_lemma ("być","aglt:sg:sec:imperf:wok"); - "eście",make_lemma ("być","aglt:pl:sec:imperf:wok"); - "eśmy",make_lemma ("być","aglt:pl:pri:imperf:wok"); - "m",make_lemma ("być","aglt:sg:pri:imperf:nwok"); - "ś",make_lemma ("być","aglt:sg:sec:imperf:nwok"); - "ście",make_lemma ("być","aglt:pl:sec:imperf:nwok"); - "śmy",make_lemma ("być","aglt:pl:pri:imperf:nwok"); - "by",make_lemma ("by","qub"); - ] StringMap.empty (fun map (suf,lemma) -> StringMap.add map suf lemma) - -let recognize_suffix i letters = - let orth = merge letters in - let t = {empty_token with orth=orth;beg=i;len=Xlist.size letters * factor;next=i+Xlist.size letters * factor} in - if all_capital letters then Token{t with token=StringMap.find suffix_lemmata (merge (lowercase_all letters)); (*weight=cs_weight;*) attrs="cs" :: t.attrs} - else if all_small letters then Token{t with token=StringMap.find suffix_lemmata orth} - else raise Not_found - -let recognize_romandig poss_s_beg i letters = - let roman,w = recognize_roman_M 0 letters in - if roman > 0 then - let letters,w = if w then let l = List.rev letters in List.rev (List.tl l), [List.hd l] else letters,[] in - let orth = merge letters in - let roman = string_of_int roman in - let t = {empty_token with orth=orth;beg=i;len=Xlist.size letters * factor;next=i+Xlist.size letters * factor} in - let w = if w = [] then [] else - let beg = i + Xlist.size letters * factor in - [Variant[Token{empty_token with orth=merge w; beg=beg; len=factor; next=beg+factor; token=SmallLetter(merge w)}; - Token{empty_token with orth=merge w; beg=beg; len=factor; next=beg+factor; token=make_lemma ("wiek","subst:sg:_:m3")}]] in - if StringSet.mem romanmonths orth then [ - Seq(Token{t with token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}::w); - Seq(Token{t with token=RomanDig(roman,"month"); attrs="maybe cs" :: t.attrs}::w)] @ - (if poss_s_beg then [ - Seq([s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}]@w); - Seq([s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=RomanDig(roman,"month"); attrs="maybe cs" :: t.attrs}]@w); - ] else []) - else [ - Seq(Token{t with token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}::w)] @ - (if poss_s_beg then [ - Seq([s_beg i;c_beg (i+1);Token{t with beg=t.beg+2; len=t.len-2; token=RomanDig(roman,"roman"); attrs="maybe cs" :: t.attrs}]@w); - ] else []) - else [] - -let sufixes1 = Xlist.map [ - ["m"]; - ["e"; "m"]; - ["ś"]; - ["e"; "ś"]; - ["ś"; "m"; "y"]; - ["e"; "ś"; "m"; "y"]; - ["ś"; "c"; "i"; "e"]; - ["e"; "ś"; "c"; "i"; "e"]; - ] List.rev - -let sufixes2 = Xlist.map [ - ["b"; "y"]; - ] List.rev - -let rec find_suffix rev = function - _, [] -> raise Not_found - | [], l -> rev, l - | s :: pat, Capital(c,t) :: l -> if s = t then find_suffix (Capital(c,t) :: rev) (pat,l) else raise Not_found - | s :: pat, Small t :: l -> if s = t then find_suffix (Small t :: rev) (pat,l) else raise Not_found - | _,_ -> raise Not_found - -let find_suffixes2 sufixes letters sufs = - Xlist.fold sufixes [] (fun l suf -> - try - let suf,rev_stem = find_suffix [] (suf,letters) in - (rev_stem,suf :: sufs) :: l - with Not_found -> l) - -let find_suffixes i letters = - let letters = List.rev letters in - let l = (letters,[]) :: find_suffixes2 sufixes1 letters [] in - let l = Xlist.fold l l (fun l (letters,sufs) -> - (find_suffixes2 sufixes2 letters sufs) @ l) in - Xlist.map l (fun (rev_stem, sufs) -> - List.rev (fst (Xlist.fold (List.rev rev_stem :: sufs) ([],i) (fun (seq,i) letters -> - (letters,i) :: seq, i + factor * Xlist.size letters)))) - -let merge_letters poss_s_beg i letters = - let l = find_suffixes i letters in - let roman = recognize_romandig poss_s_beg i letters in - let variants = Xlist.fold l roman (fun variants -> function - [] -> failwith "merge_letters" - | [stem,i] -> (recognize_stem poss_s_beg false i stem) :: variants - | (stem,i) :: suffixes -> - (try (Seq((recognize_stem poss_s_beg true i stem) :: Xlist.map suffixes (fun (suf,i) -> recognize_suffix i suf))) :: variants - with Not_found -> variants)) in - Variant variants - -let rec group_digits rev = function - [] -> List.rev rev, [] - | Digit s :: l -> group_digits (s :: rev) l - | x :: l -> List.rev rev, x :: l - -let rec group_letters rev = function - [] -> List.rev rev, [] - | Capital(s,t) :: l -> group_letters ((Capital(s,t)) :: rev) l - | ForeignCapital(s,t) :: l -> group_letters ((ForeignCapital(s,t)) :: rev) l - | Small s :: l -> group_letters ((Small s) :: rev) l - | ForeignSmall s :: l -> group_letters ((ForeignSmall s) :: rev) l - | x :: l -> List.rev rev, x :: l - -let rec group_others rev = function - [] -> List.rev rev, [] - | Other(s,_) :: l -> group_others (s :: rev) l - | x :: l -> List.rev rev, x :: l - -let create_sign_token poss_s_beg i signs l token = - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in - let len = Xlist.size signs * factor in - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=token; attrs=["maybe cs"]},i+len,l,poss_s_beg - -let create_empty_sign_token i signs = - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in - let len = Xlist.size signs * factor in - {empty_token with orth=orth;beg=i;len=len;next=i+len; attrs=["maybe cs"]},i+len - -let create_sentence_seq i signs l lemma = - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in - let len = Xlist.size signs * factor in - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"}; - Token{empty_token with orth=orth;beg=i+20;len=len-30;next=i+len-10;token=make_lemma (lemma,"sinterj")}; - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}] - -let create_sentence_seq_hapl i signs l lemma = - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in - let len = Xlist.size signs * factor in - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]}; - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</clause>"}; - Token{empty_token with orth=orth;beg=i+20;len=len-30;next=i+len-10;token=make_lemma (lemma,"sinterj")}; - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}] - -let create_sentence_seq_q i signs l lemma = - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in - let len = Xlist.size signs * factor in - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "?"}; - Token{empty_token with beg=i+20;len=10;next=i+30;token=Interp "</clause>"}; - Token{empty_token with orth=orth;beg=i+30;len=len-40;next=i+len-10;token=make_lemma (lemma,"sinterj")}; - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}] - -let create_sentence_seq_hapl_q i signs l lemma = - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in - let len = Xlist.size signs * factor in - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]}; - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "?"}; - Token{empty_token with beg=i+20;len=10;next=i+30;token=Interp "</clause>"}; - Token{empty_token with orth=orth;beg=i+30;len=len-40;next=i+len-10;token=make_lemma (lemma,"sinterj")}; - Token{empty_token with beg=i+len-10;len=10;next=i+len;token=Interp "</sentence>"}] - -let create_or_beg i signs l poss_s_beg = - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in - let len = Xlist.size signs * factor in - Variant[ - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Symbol "-"; attrs=["maybe cs"]}; - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Interp "-"; attrs=["maybe cs"]}; (* hyphen *) - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Interp "<or>"}; - (* Seq[Token{empty_token with beg=i; len=20; next=i+20; token=Interp "<sentence>"}; - Token{empty_token with orth=orth;beg=i+20; len=len-20;next=i+len; token=Interp "<or>"}]; *) - Seq[Token{empty_token with beg=i; len=21; next=i+21; token=Interp "</clause>"}; - Token{empty_token with beg=i+21; len=20; next=i+41; token=Interp "</sentence>"}; - Token{empty_token with orth=orth;beg=i+41; len=len-59;next=i+len-20;token=Interp "</or>"}; - Token{empty_token with beg=i+len-20;len=20; next=i+len; token=Interp "<clause>"}]; - Seq[Token{empty_token with orth=orth;beg=i; len=len-22;next=i+len-22;token=Interp "</or>"}; - Token{empty_token with beg=i+len-22;len=22; next=i+len; token=Interp "<clause>"}]; - ],i+len,l,poss_s_beg - -let create_or_beg2 i signs l poss_s_beg = - let orth = String.concat "" (Xlist.map signs (function Sign s -> s | _ -> failwith "create_sign_token")) in - let len = Xlist.size signs * factor in - Variant[ - Token{empty_token with orth=orth;beg=i;len=len;next=i+len;token=Interp "<or>"}; - (* Seq[Token{empty_token with beg=i; len=20; next=i+20; token=Interp "<sentence>"}; - Token{empty_token with orth=orth;beg=i+20; len=len-20;next=i+len; token=Interp "<or>"}]; *) - Seq[Token{empty_token with beg=i; len=21; next=i+21; token=Interp "</clause>"}; - Token{empty_token with beg=i+21; len=20; next=i+41; token=Interp "</sentence>"}; - Token{empty_token with orth=orth;beg=i+41; len=len-59;next=i+len-20;token=Interp "</or>"}; - Token{empty_token with beg=i+len-20;len=20; next=i+len; token=Interp "<clause>"}]; - Seq[Token{empty_token with orth=orth;beg=i; len=len-22;next=i+len-22;token=Interp "</or>"}; - Token{empty_token with beg=i+len-22;len=22; next=i+len; token=Interp "<clause>"}]; - ],i+len,l,poss_s_beg - -let is_dot_sentence_end_marker = function - [] -> true - | [Sign " "] -> true - | [Sign ""] -> true - | [Sign " "] -> true - | [Sign "\""] -> true - | [Sign "»"] -> true - | [Sign "”"] -> true - | _ -> false - -let not_dot_sentence_end_marker = function - Sign " " :: Small _ :: _ -> true - | Sign "" :: Small _ :: _ -> true - | Sign " " :: Small _ :: _ -> true - | Sign "," :: _ -> true - | Sign ":" :: _ -> true - | Sign "?" :: _ -> true - | Sign "!" :: _ -> true - | Small _ :: _ -> true - | ForeignSmall _ :: _ -> true - | Capital _ :: _ -> true - | ForeignCapital _ :: _ -> true - | Digit _ :: _ -> true - | _ -> false - -let is_comma_digit_marker = function - Digit _ :: l -> true - | _ -> false - -let is_colon_sentence_end_marker = function - [] -> true - | [Sign " "] -> true - | [Sign ""] -> true - | [Sign " "] -> true - | _ -> false - -let is_colon_symbol = function - Digit _ :: _ -> true - | Sign "/" :: _ -> true - | _ -> false - -let is_multidot_sentence_end_marker = function - [] -> true - | [Sign " "] -> true - | [Sign ""] -> true - | [Sign " "] -> true - | [Sign "\""] -> true - | [Sign "»"] -> true - | [Sign "”"] -> true -(* | "\"" :: l -> true - | "»" :: l -> true - | "”" :: l -> true - | "“" :: l -> true - | " " :: "-" :: l -> true - | " " :: "–" :: l -> true - | " " :: "—" :: l -> true - | ")" :: l -> true - | "]" :: l -> true*) - | _ -> false - -let create_quot_digit_token i signs l = - let t,i2 = create_empty_sign_token i signs in - Variant[ - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"}; - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}; - Token{t with beg=t.beg+factor; next=t.next+factor;token=Interp "”s"}]; - Seq[Token{t with token=Interp "”"}; - Token{empty_token with beg=i2;len=20;next=i2+20;token=Interp "</clause>"}; - Token{empty_token with orth=".";beg=i2+20;len=factor-20;next=i2+factor;token=Interp "</sentence>"}]; - ],i2+factor,l,true - -let rec recognize_sign_group poss_s_beg i = function - | (Sign " ") :: l -> create_sign_token poss_s_beg i [Sign " "] l (Symbol " ") - | (Sign "") :: l -> create_sign_token poss_s_beg i [Sign ""] l (Symbol " ") - | (Sign " ") :: l -> create_sign_token poss_s_beg i [Sign " "] l (Symbol " ") - | (Sign "\"") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "\""] l - | (Sign "\"") :: l -> - let t,i = create_empty_sign_token i [Sign "\""] in - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"};Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg - | (Sign "˝") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "˝"] l - | (Sign "˝") :: l -> - let t,i = create_empty_sign_token i [Sign "˝"] in - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"};Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg - | (Sign "„") :: l -> - let t,i = create_empty_sign_token i [Sign "„"] in - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"}],i,l,poss_s_beg - | (Sign "”") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "”"] l - | (Sign "”") :: l -> - let t,i = create_empty_sign_token i [Sign "”"] in - Variant[Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg - | (Sign "“") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "“"] l - | (Sign "“") :: l -> - let t,i = create_empty_sign_token i [Sign "“"] in - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"};Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg - | (Sign ",") :: (Sign ",") :: l -> - let t,i = create_empty_sign_token i [Sign ",";Sign ","] in - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"}],i,l,poss_s_beg - | (Sign ",") :: l -> - let t,i2 = create_empty_sign_token i [Sign ","] in - if is_comma_digit_marker l then - Token{t with token=Symbol ","},i2,l,false - else - Variant[Token{t with token=Interp ","}; - Seq[Token{empty_token with orth=",";beg=i;len=factor/2;next=i+factor/2;token=Interp "</clause>"}; - Token{empty_token with beg=i+factor/2;len=factor-(factor/2);next=i+factor;token=Interp "<clause>"}]],i2,l,false - | (Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: []) l (make_lemma ("(…)","sinterj")) - | (Sign "(") :: (Sign "?") :: (Sign "!") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "?") :: (Sign "!") :: (Sign ")") :: []) l (make_lemma ("(?!)","sinterj")) - | (Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign ".") :: (Sign ".") :: (Sign ")") :: []) l (make_lemma ("(…)","sinterj")) - | (Sign "(") :: (Sign "+") :: (Sign "+") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "+") :: (Sign "+") :: (Sign ")") :: []) l (make_lemma ("(++)","sinterj")) - | (Sign "(") :: (Sign "-") :: (Sign "-") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "-") :: (Sign "-") :: (Sign ")") :: []) l (make_lemma ("(--)","symbol")) - | (Sign "(") :: (Sign "…") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "…") :: (Sign ")") :: []) l (make_lemma ("(…)","sinterj")) - | (Sign "(") :: (Sign "?") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "?") :: (Sign ")") :: []) l (make_lemma ("(?)","sinterj")) - | (Sign "(") :: (Sign "+") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "+") :: (Sign ")") :: []) l (make_lemma ("(+)","symbol")) - | (Sign "(") :: (Sign "!") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "!") :: (Sign ")") :: []) l (make_lemma ("(!)","sinterj")) - | (Sign "(") :: (Sign "-") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "-") :: (Sign ")") :: []) l (make_lemma ("(-)","symbol")) - | (Sign "(") :: (Sign "*") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign "(") :: (Sign "*") :: (Sign ")") :: []) l (make_lemma ("(*)","symbol")) - | (Sign "(") :: l -> create_sign_token poss_s_beg i [Sign "("] l (Interp "(") - | (Sign ":") :: (Sign "(") :: (Sign "(") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "(") :: (Sign "(") :: (Sign "(") :: []) l (make_lemma (":(((","sinterj")) - | (Sign ":") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "(") :: []) l (make_lemma (":(","sinterj")) - | (Sign ":") :: (Sign "-") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign "(") :: []) l (make_lemma (":-(","sinterj")) - | (Sign ";") :: (Sign "(") :: (Sign "(") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "(") :: (Sign "(") :: []) l (make_lemma (";((","sinterj")) - | (Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";-))))","sinterj")) - | (Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":-))))","sinterj")) - | (Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":-)))","sinterj")) - | (Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";-)))","sinterj")) - | (Sign ";") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";)))","sinterj")) - | (Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":-))","sinterj")) - | (Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";-))","sinterj")) - | (Sign ":") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign ")") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":)))","sinterj")) - | (Sign ":") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (":))","sinterj")) - | (Sign ";") :: (Sign ")") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign ")") :: (Sign ")") :: []) l (make_lemma (";))","sinterj")) - | (Sign ";") :: (Sign "-") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "-") :: (Sign ")") :: []) l (make_lemma (";-)","sinterj")) - | (Sign ":") :: (Sign "|") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "|") :: []) l (make_lemma (":|","sinterj")) - | (Sign ":") :: (Sign "\\") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "\\") :: []) l (make_lemma (":\\","sinterj")) - | (Sign ":") :: (Sign "-") :: (Sign "/") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign "-") :: (Sign "/") :: []) l (make_lemma (":-/","sinterj")) - | (Sign ":") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ":") :: (Sign ")") :: []) l (make_lemma (":)","sinterj")) - | (Sign ";") :: (Sign ")") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign ")") :: []) l (make_lemma (";)","sinterj")) - | (Sign ")") :: l -> create_sign_token poss_s_beg i [Sign ")"] l (Interp ")") - | (Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: []) l (make_lemma ("(…)","sinterj")) - | (Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign ".") :: (Sign ".") :: (Sign "]") :: []) l (make_lemma ("(…)","sinterj")) - | (Sign "[") :: (Sign "+") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign "+") :: (Sign "]") :: []) l (make_lemma ("[+]","symbol")) - | (Sign "[") :: (Sign "-") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign "-") :: (Sign "]") :: []) l (make_lemma ("[-]","symbol")) - | (Sign "[") :: (Sign "?") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign "[") :: (Sign "?") :: (Sign "]") :: []) l (make_lemma ("[?]","sinterj")) - | (Sign ":") :: (Sign "]") :: l -> - let t,i2 = create_empty_sign_token i [Sign ":";Sign "]"] in - Variant[Token{t with token=make_lemma (":]","sinterj")}; - Seq[Token{empty_token with orth=":";beg=i;len=factor;next=i+factor;token=Interp ":"; attrs=["maybe cs"]}; - Token{empty_token with orth="]";beg=i+factor;len=factor;next=i+2*factor;token=Interp "]"; attrs=["maybe cs"]}]],i2,l,false - | (Sign ";") :: (Sign "]") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "]") :: []) l (make_lemma (";]","sinterj")) - | (Sign "]") :: l -> create_sign_token poss_s_beg i [Sign "]"] l (Interp "]") - | (Sign "[") :: l -> create_sign_token poss_s_beg i [Sign "["] l (Interp "[") - | (Sign ":") :: l -> - if is_colon_symbol l then - Token{empty_token with orth=":";beg=i;len=factor;next=i+factor;token=Symbol ":"; attrs=["maybe cs"]},i+factor,l,false - else - Variant[ - Seq[Token{empty_token with beg=i;len=11;next=i+11;token=Interp "</clause>"}; (* wyliczenie*) - Token{empty_token with orth=":";beg=i+11;len=factor-11;next=i+factor;token=Interp "<clause>"}]; - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"}; - Token{empty_token with orth=":";beg=i+10;len=factor-30;next=i+factor-20;token=Interp ":"}; (* mowa zależna, koniec zdania *) - Token{empty_token with beg=i+factor-20;len=20;next=i+factor;token=Interp "</sentence>"}]; - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"}; - Token{empty_token with orth=":";beg=i+10;len=factor-40;next=i+factor-30;token=Interp ":"}; (* po ':' zdanie z małej litery *) - Token{empty_token with beg=i+factor-30;len=10;next=i+factor-20;token=Interp "</sentence>"}; - Token{empty_token with beg=i+factor-20;len=10;next=i+factor-10;token=Interp "<sentence>"}; - Token{empty_token with beg=i+factor-10;len=10;next=i+factor;token=Interp "<clause>"}]; - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"}; - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</sentence>"}; - Token{empty_token with orth=":";beg=i+20;len=factor-20;next=i+factor;token=Interp ":s"}]; (* speaker *) - ],i+factor,l,true -(* if is_colon_sentence_end_marker l then - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"}; - Token{empty_token with orth=":";beg=i+10;len=10;next=i+20;token=Interp ":"}; - Token{empty_token with beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}],i+factor,l,true - else - else - Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Interp "</clause>"}; - Token{empty_token with orth=":";beg=i+10;len=10;next=i+20;token=Interp ""}; - Token{empty_token with beg=i+20;len=factor-20;next=i+factor;token=Interp "<clause>"}],i+factor,l,false*) - | (Sign "'") :: (Sign "'") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "'";Sign "'"] l - | (Sign "'") :: (Sign "'") :: l -> - let t,i = create_empty_sign_token i [Sign "”"] in - Variant[Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg - | (Sign "'") :: l -> create_sign_token poss_s_beg i [Sign "'"] l (Symbol "’") - | (Sign "’") :: (Sign "’") :: (Sign ".") :: l -> create_quot_digit_token i [Sign "’";Sign "’"] l - | (Sign "’") :: (Sign "’") :: l -> - let t,i = create_empty_sign_token i [Sign "”"] in - Variant[Token{t with token=Interp "”"};Token{t with token=Interp "”s"}],i,l,poss_s_beg - | (Sign "’") :: l -> create_sign_token poss_s_beg i [Sign "’"] l (Symbol "’") - | (Sign ";") :: (Sign "*") :: l -> create_sign_token poss_s_beg i ((Sign ";") :: (Sign "*") :: []) l (make_lemma (";*","sinterj")) - | (Sign ";") :: l -> - Variant[Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"}; - Token{empty_token with orth=";";beg=i+20;len=20;next=i+40;token=Interp "</sentence>"}; - Token{empty_token with beg=i+40;len=20;next=i+60;token=Interp "<sentence>"}; - Token{empty_token with beg=i+60;len=factor-60;next=i+factor;token=Interp "<clause>"}]; - Token{empty_token with orth=";";beg=i;len=factor;next=i+factor;token=Interp ";"; attrs=["maybe cs"]}],i+factor,l,false - | (Sign "?") :: (Sign "!") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "?!...",i+5*factor,l,true - | (Sign "?") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> - create_sentence_seq_q i ((Sign "?") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "?...",i+4*factor,l,true - | (Sign "?") :: (Sign "?") :: (Sign "?") :: (Sign "?") :: l -> - create_sentence_seq_q i ((Sign "?") :: (Sign "?") :: (Sign "?") :: (Sign "?") :: []) l "????",i+4*factor,l,true - | (Sign "?") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: l -> - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: []) l "?!!!",i+4*factor,l,true - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "?") :: l -> - Variant[create_sentence_seq_hapl_q i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "?") :: []) l "…?"; - create_sentence_seq_q i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "?") :: []) l "…?"],i+4*factor,l,true - | (Sign "?") :: (Sign "!") :: (Sign "?") :: l -> - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: (Sign "?") :: []) l "?!?",i+3*factor,l,true - | (Sign "?") :: (Sign "?") :: (Sign "?") :: l -> - create_sentence_seq_q i ((Sign "?") :: (Sign "?") :: (Sign "?") :: []) l "???",i+3*factor,l,true - | (Sign "?") :: (Sign "!") :: l -> - create_sentence_seq_q i ((Sign "?") :: (Sign "!") :: []) l "?!",i+2*factor,l,true - | (Sign "?") :: (Sign "?") :: l -> - create_sentence_seq_q i ((Sign "?") :: (Sign "?") :: []) l "??",i+2*factor,l,true -(* | (Sign "?") :: (Sign ".") :: l -> *) - | (Sign "!") :: (Sign "?") :: l -> - create_sentence_seq_q i ((Sign "!") :: (Sign "?") :: []) l "!?",i+2*factor,l,true - | (Sign "?") :: (Sign "…") :: l -> - create_sentence_seq_q i ((Sign "?") :: (Sign "…") :: []) l "?…",i+2*factor,l,true - | (Sign "…") :: (Sign "?") :: l -> - Variant[create_sentence_seq_hapl_q i ((Sign "…") :: (Sign "?") :: []) l "…?"; - create_sentence_seq_q i ((Sign "…") :: (Sign "?") :: []) l "…?"],i+2*factor,l,true - | (Sign "?") :: l -> - create_sentence_seq_q i ((Sign "?") :: []) l "?",i+factor,l,true - | (Sign "!") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: l -> - create_sentence_seq i ((Sign "!") :: (Sign "!") :: (Sign "!") :: (Sign "!") :: []) l "!!!!",i+4*factor,l,true - | (Sign "!") :: (Sign "!") :: (Sign "!") :: l -> - create_sentence_seq i ((Sign "!") :: (Sign "!") :: (Sign "!") :: []) l "!!!",i+3*factor,l,true - | (Sign "!") :: (Sign "!") :: l -> - create_sentence_seq i ((Sign "!") :: (Sign "!") :: []) l "!!",i+2*factor,l,true - | (Sign "!") :: l -> - create_sentence_seq i ((Sign "!") :: []) l "!",i+factor,l,true - | (Sign "…") :: l -> - if is_multidot_sentence_end_marker l then - Variant[create_sentence_seq_hapl i ((Sign "…") :: []) l "…"; - create_sentence_seq i ((Sign "…") :: []) l "…"],i+factor,l,true - else - Variant[create_sentence_seq_hapl i ((Sign "…") :: []) l "…"; - create_sentence_seq i ((Sign "…") :: []) l "…"; - Token{empty_token with orth="…";beg=i;len=factor;next=i+factor;token=make_lemma ("…","sinterj"); attrs=["maybe cs"]}],i+factor,l,true - | (Sign "/") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "/") :: l -> create_sign_token poss_s_beg i ((Sign "/") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign "/") :: []) l (make_lemma ("(…)","sinterj")) - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> (* Różne natęrzenia wielokropka i wypunktowania *) - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"; - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+8*factor,l,true - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"; - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+7*factor,l,true - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"; - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+6*factor,l,true - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"; - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+5*factor,l,true - | (Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"; - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "……"],i+4*factor,l,true - | (Sign ".") :: (Sign ".") :: (Sign ".") :: l -> - if is_multidot_sentence_end_marker l then - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"; - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"],i+3*factor,l,true - else - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"; - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"; - Token{empty_token with orth="...";beg=i;len=3*factor;next=i+3*factor;token=make_lemma ("…","sinterj"); attrs=["maybe cs"]}],i+3*factor,l,true - | (Sign ".") :: (Sign ".") :: l -> - if is_multidot_sentence_end_marker l then - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: []) l "…"; - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"],i+2*factor,l,true - else - Variant[create_sentence_seq_hapl i ((Sign ".") :: (Sign ".") :: []) l "…"; - create_sentence_seq i ((Sign ".") :: (Sign ".") :: (Sign ".") :: []) l "…"; - Token{empty_token with orth="..";beg=i;len=2*factor;next=i+2*factor;token=make_lemma ("…","sinterj"); attrs=["maybe cs"]}],i+2*factor,l,true - | (Sign ".") :: l -> - if is_dot_sentence_end_marker l then - Variant[Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]}; - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</clause>"}; - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}]; - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"}; - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}]],i+factor,l,true - else if not_dot_sentence_end_marker l then - Token{empty_token with orth=".";beg=i;len=factor;next=i+factor;token=Symbol "."; attrs=["maybe cs"]},i+factor,l,false - else - Variant[Seq[Token{empty_token with beg=i;len=10;next=i+10;token=Symbol "."; attrs=["maybe cs"]}; - Token{empty_token with beg=i+10;len=10;next=i+20;token=Interp "</clause>"}; - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}]; - Seq[Token{empty_token with beg=i;len=20;next=i+20;token=Interp "</clause>"}; - Token{empty_token with orth=".";beg=i+20;len=factor-20;next=i+factor;token=Interp "</sentence>"}]; - Token{empty_token with orth=".";beg=i;len=factor;next=i+factor;token=Symbol "."; attrs=["maybe cs"]}],i+factor,l,true - | (Sign "*") :: (Sign "*") :: (Sign "*") :: (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*";Sign "*";Sign "*";Sign "*"] l (Interp "*****") (* zastępniki liter *) - | (Sign "*") :: (Sign "*") :: (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*";Sign "*";Sign "*"] l (Interp "****") - | (Sign "*") :: (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*";Sign "*"] l (Interp "***") - | (Sign "*") :: (Sign "*") :: l -> create_sign_token poss_s_beg i [Sign "*";Sign "*"] l (Interp "**") - | (Sign "*") :: l -> (* Interp zastępnik liter i cudzysłów, symbol listy *) - let t,i = create_empty_sign_token i [Sign "*"] in - Variant[Token{t with token=Interp "*"};Token{t with token=Symbol "*"}],i,l,poss_s_beg - | (Sign "+") :: l -> create_sign_token poss_s_beg i [Sign "+"] l (Symbol "+") - | (Sign "«") :: l -> - let t,i = create_empty_sign_token i [Sign "«"] in - Variant[Token{t with token=Interp "«"};Token{t with token=Interp "«s"}],i,l,poss_s_beg - | (Sign "»") :: l -> - let t,i = create_empty_sign_token i [Sign "»"] in - Variant[Token{t with token=Interp "»"};Token{t with token=Interp "»s"}],i,l,poss_s_beg - | (Sign "<") :: (Sign "<") :: l -> create_sign_token poss_s_beg i [Sign "<";Sign "<"] l (Interp "«") (* prawy cudzysłów *) - | (Sign "<") :: l -> (* prawy cudzysłów i element wzoru matematycznego *) - let t,i = create_empty_sign_token i [Sign "<"] in - Variant[Token{t with token=Interp "«"};Token{t with token=Symbol "<"}],i,l,poss_s_beg - | (Sign ">") :: (Sign ">") :: l -> create_sign_token poss_s_beg i [Sign ">";Sign ">"] l (Interp "»") (* lewy cudzysłów *) - | (Sign ">") :: l -> create_sign_token poss_s_beg i [Sign ">"] l (Symbol ">") - | (Sign "-") :: (Sign "-") :: (Sign "-") :: l -> create_or_beg2 i [Sign "-";Sign "-";Sign "-"] l poss_s_beg - | (Sign "-") :: (Sign "-") :: l -> create_or_beg2 i [Sign "-";Sign "-"] l poss_s_beg - | (Sign "-") :: l -> create_or_beg i [Sign "-"] l poss_s_beg - | (Sign "‐") :: l -> create_or_beg i [Sign "‐"] l poss_s_beg - | (Sign "‑") :: l -> create_or_beg i [Sign "‑"] l poss_s_beg - | (Sign "‒") :: l -> create_or_beg i [Sign "‒"] l poss_s_beg - | (Sign "−") :: l -> create_or_beg i [Sign "−"] l poss_s_beg - | (Sign "–") :: l -> create_or_beg i [Sign "–"] l poss_s_beg - | (Sign "—") :: l -> create_or_beg i [Sign "—"] l poss_s_beg - | (Sign "‘") :: l -> create_sign_token poss_s_beg i [Sign "‘"] l (Interp "‘") - | (Sign "´") :: l -> create_sign_token poss_s_beg i [Sign "´"] l (Symbol "’") - | (Sign "`") :: (Sign "`") :: l -> - let t,i = create_empty_sign_token i [Sign "`";Sign "`"] in - Variant[Token{t with token=Interp "„"};Token{t with token=Interp "„s"}],i,l,poss_s_beg - | (Sign "`") :: l -> create_sign_token poss_s_beg i [Sign "`"] l (Symbol "’") - | (Sign "·") :: l -> create_sign_token poss_s_beg i [Sign "·"] l (Interp "·") - | (Sign "•") :: l -> create_sign_token poss_s_beg i [Sign "•"] l (Interp "•") - | (Sign "¨") :: l -> create_sign_token poss_s_beg i [Sign "¨"] l (Interp "¨") - | (Sign "~") :: l -> - let t,i = create_empty_sign_token i [Sign "~"] in - Variant[Token{t with token=Symbol "~"};Token{t with token=make_lemma ("około","prep:gen")}],i,l,false - | (Sign "{") :: l -> - let t,i = create_empty_sign_token i [Sign "{"] in - Variant[Token{t with token=Symbol "{"};Token{t with token=Interp "{"}],i,l,poss_s_beg - | (Sign "}") :: l -> - let t,i = create_empty_sign_token i [Sign "}"] in - Variant[Token{t with token=Symbol "}"};Token{t with token=Interp "}"}],i,l,poss_s_beg - | (Sign "#") :: l -> create_sign_token poss_s_beg i [Sign ""] l (Symbol "") - | (Sign "^") :: (Sign "^") :: l -> create_sign_token poss_s_beg i [Sign "^";Sign "^"] l (make_lemma ("^^","sinterj")) - | (Sign "^") :: l -> create_sign_token poss_s_beg i [Sign "^"] l (Symbol "^") - | (Sign "|") :: l -> create_sign_token poss_s_beg i [Sign "|"] l (Symbol "|") - | (Sign "&") :: l -> create_sign_token poss_s_beg i [Sign "&"] l (Symbol "&") - | (Sign "=") :: l -> create_sign_token poss_s_beg i [Sign "="] l (Symbol "=") - | (Sign "/") :: l -> - let t,i = create_empty_sign_token i [Sign "/"] in - Variant[Token{t with token=Symbol "/"};Token{t with token=make_lemma ("na","prep:acc")}],i,l,false - | (Sign "_") :: l -> create_sign_token poss_s_beg i [Sign "_"] l (Symbol "_") - | (Sign "@") :: l -> create_sign_token poss_s_beg i [Sign "@"] l (Symbol "@") - | (Sign "×") :: l -> create_sign_token poss_s_beg i [Sign "×"] l (Symbol "×") - | (Sign "%") :: l -> - let t,i = create_empty_sign_token i [Sign "%"] in - Variant[Token{t with token=Symbol "%"};Token{t with token=make_lemma ("procent","subst:_:_:m3")}],i,l,false - | (Sign "$") :: l -> - let t,i = create_empty_sign_token i [Sign "$"] in - Variant[Token{t with token=Symbol "$"};Token{t with token=make_lemma ("dolar","subst:_:_:m2")}],i,l,false - | (Sign "€") :: l -> create_sign_token poss_s_beg i [Sign "€"] l (make_lemma ("euro","subst:_:_:n2")) - | (Sign "²") :: l -> create_sign_token poss_s_beg i [Sign "²"] l (Symbol "²") - | (Sign "°") :: l -> create_sign_token poss_s_beg i [Sign "°"] l (make_lemma ("stopień","subst:_:_:m3")) - | (Sign "§") :: l -> create_sign_token false i [Sign "§"] l (make_lemma ("paragraf","subst:_:_:m3")) - | (Sign s) :: l -> print_endline ("recognize_sign_group: " ^ s); create_sign_token poss_s_beg i [Sign s] l (Symbol s) - | l -> failwith "recognize_sign_group" - -(* FIXME: "„Szpak” frunie." trzeba przenie przenieść <sentence> przed „, ale zostawić po „s. *) - -let rec group_chars poss_s_beg i rev = function - [] -> List.rev ((Token{empty_token with beg=i;len=factor;next=i+factor;token=Interp "</query>"}) :: rev) - | (Digit s) :: l -> let x,l = group_digits [] ((Digit s) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_digits poss_s_beg i x) :: rev) l - | (Sign s) :: l -> let x,i,l,poss_s_beg = recognize_sign_group poss_s_beg i ((Sign s) :: l) in group_chars poss_s_beg i (x :: rev) l - | (Capital(s,t)) :: l -> let x,l = group_letters [] ((Capital(s,t)) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l - | (ForeignCapital(s,t)) :: l -> let x,l = group_letters [] ((ForeignCapital(s,t)) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l - | (Small s) :: l -> let x,l = group_letters [] ((Small s) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l - | (ForeignSmall s) :: l -> let x,l = group_letters [] ((ForeignSmall s) :: l) in group_chars false (i + Xlist.size x * factor) ((merge_letters poss_s_beg i x) :: rev) l - | (Other(s,x)) :: l -> - let x,l = group_others [] ((Other(s,x)) :: l) in - group_chars false (i + Xlist.size x * factor) - ((Token{empty_token with orth=String.concat "" x;beg=i;len=Xlist.size x * factor;next=i+factor;token=Other(String.concat "" x)}) :: rev) l - -let tokenize l = - (Token{empty_token with beg=0;len=factor;next=factor;token=Interp "<query>"}) :: (group_chars true factor [] l) diff --git a/tokenizer/eniam-tokenizer-1.0/README b/tokenizer/eniam-tokenizer-1.0/README deleted file mode 100644 index 393363d..0000000 --- a/tokenizer/eniam-tokenizer-1.0/README +++ /dev/null @@ -1,50 +0,0 @@ -ENIAMtokenizer Version 1.0 : ------------------------ - -ENIAMtokenizer is a library that provides a tokenizer for Polish. - -Install -------- - -ENIAMtokenizer requires OCaml version 4.02.3 compiler -together with Xlib library version 3.1 or later. - -In order to install type: - -make install - -by default, ENIAMtokenizer is installed in the 'ocamlc -where'/eniam directory. -you can change it by editing the Makefile. - -In order to test library type: -make test -./test - -Credits -------- -Copyright © 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> -Copyright © 2016 Institute of Computer Science Polish Academy of Sciences - -The parser uses the following licensed resources: - -SGJP: Grammatical Dictionary of Polish, version 20151020 -Copyright © 2007–2015 Zygmunt Saloni, Włodzimierz Gruszczyński, Marcin -Woliński, Robert Wołosz, Danuta Skowrońska -http://sgjp.pl - -Licence -------- - -This library is free software: you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. - diff --git a/tokenizer/eniam-tokenizer-1.0/config-tokenizer b/tokenizer/eniam-tokenizer-1.0/config-tokenizer deleted file mode 100644 index 5c6adc2..0000000 --- a/tokenizer/eniam-tokenizer-1.0/config-tokenizer +++ /dev/null @@ -1,2 +0,0 @@ -# Localization of definitions of multi-token-expressions -MTE_FILENAME=/usr/share/eniam/resources/SGJP/mte_20151215.tab diff --git a/tokenizer/eniam-tokenizer-1.0/lgpl-3.0.txt b/tokenizer/eniam-tokenizer-1.0/lgpl-3.0.txt deleted file mode 100644 index 65c5ca8..0000000 --- a/tokenizer/eniam-tokenizer-1.0/lgpl-3.0.txt +++ /dev/null @@ -1,165 +0,0 @@ - GNU LESSER GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - - This version of the GNU Lesser General Public License incorporates -the terms and conditions of version 3 of the GNU General Public -License, supplemented by the additional permissions listed below. - - 0. Additional Definitions. - - As used herein, "this License" refers to version 3 of the GNU Lesser -General Public License, and the "GNU GPL" refers to version 3 of the GNU -General Public License. - - "The Library" refers to a covered work governed by this License, -other than an Application or a Combined Work as defined below. - - An "Application" is any work that makes use of an interface provided -by the Library, but which is not otherwise based on the Library. -Defining a subclass of a class defined by the Library is deemed a mode -of using an interface provided by the Library. - - A "Combined Work" is a work produced by combining or linking an -Application with the Library. The particular version of the Library -with which the Combined Work was made is also called the "Linked -Version". - - The "Minimal Corresponding Source" for a Combined Work means the -Corresponding Source for the Combined Work, excluding any source code -for portions of the Combined Work that, considered in isolation, are -based on the Application, and not on the Linked Version. - - The "Corresponding Application Code" for a Combined Work means the -object code and/or source code for the Application, including any data -and utility programs needed for reproducing the Combined Work from the -Application, but excluding the System Libraries of the Combined Work. - - 1. Exception to Section 3 of the GNU GPL. - - You may convey a covered work under sections 3 and 4 of this License -without being bound by section 3 of the GNU GPL. - - 2. Conveying Modified Versions. - - If you modify a copy of the Library, and, in your modifications, a -facility refers to a function or data to be supplied by an Application -that uses the facility (other than as an argument passed when the -facility is invoked), then you may convey a copy of the modified -version: - - a) under this License, provided that you make a good faith effort to - ensure that, in the event an Application does not supply the - function or data, the facility still operates, and performs - whatever part of its purpose remains meaningful, or - - b) under the GNU GPL, with none of the additional permissions of - this License applicable to that copy. - - 3. Object Code Incorporating Material from Library Header Files. - - The object code form of an Application may incorporate material from -a header file that is part of the Library. You may convey such object -code under terms of your choice, provided that, if the incorporated -material is not limited to numerical parameters, data structure -layouts and accessors, or small macros, inline functions and templates -(ten or fewer lines in length), you do both of the following: - - a) Give prominent notice with each copy of the object code that the - Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the object code with a copy of the GNU GPL and this license - document. - - 4. Combined Works. - - You may convey a Combined Work under terms of your choice that, -taken together, effectively do not restrict modification of the -portions of the Library contained in the Combined Work and reverse -engineering for debugging such modifications, if you also do each of -the following: - - a) Give prominent notice with each copy of the Combined Work that - the Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the Combined Work with a copy of the GNU GPL and this license - document. - - c) For a Combined Work that displays copyright notices during - execution, include the copyright notice for the Library among - these notices, as well as a reference directing the user to the - copies of the GNU GPL and this license document. - - d) Do one of the following: - - 0) Convey the Minimal Corresponding Source under the terms of this - License, and the Corresponding Application Code in a form - suitable for, and under terms that permit, the user to - recombine or relink the Application with a modified version of - the Linked Version to produce a modified Combined Work, in the - manner specified by section 6 of the GNU GPL for conveying - Corresponding Source. - - 1) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (a) uses at run time - a copy of the Library already present on the user's computer - system, and (b) will operate properly with a modified version - of the Library that is interface-compatible with the Linked - Version. - - e) Provide Installation Information, but only if you would otherwise - be required to provide such information under section 6 of the - GNU GPL, and only to the extent that such information is - necessary to install and execute a modified version of the - Combined Work produced by recombining or relinking the - Application with a modified version of the Linked Version. (If - you use option 4d0, the Installation Information must accompany - the Minimal Corresponding Source and Corresponding Application - Code. If you use option 4d1, you must provide the Installation - Information in the manner specified by section 6 of the GNU GPL - for conveying Corresponding Source.) - - 5. Combined Libraries. - - You may place library facilities that are a work based on the -Library side by side in a single library together with other library -facilities that are not Applications and are not covered by this -License, and convey such a combined library under terms of your -choice, if you do both of the following: - - a) Accompany the combined library with a copy of the same work based - on the Library, uncombined with any other library facilities, - conveyed under the terms of this License. - - b) Give prominent notice with the combined library that part of it - is a work based on the Library, and explaining where to find the - accompanying uncombined form of the same work. - - 6. Revised Versions of the GNU Lesser General Public License. - - The Free Software Foundation may publish revised and/or new versions -of the GNU Lesser General Public License from time to time. Such new -versions will be similar in spirit to the present version, but may -differ in detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the -Library as you received it specifies that a certain numbered version -of the GNU Lesser General Public License "or any later version" -applies to it, you have the option of following the terms and -conditions either of that published version or of any later version -published by the Free Software Foundation. If the Library as you -received it does not specify a version number of the GNU Lesser -General Public License, you may choose any version of the GNU Lesser -General Public License ever published by the Free Software Foundation. - - If the Library as you received it specifies that a proxy can decide -whether future versions of the GNU Lesser General Public License shall -apply, that proxy's public statement of acceptance of any version is -permanent authorization for you to choose that version for the -Library. diff --git a/tokenizer/eniam-tokenizer-1.0/makefile b/tokenizer/eniam-tokenizer-1.0/makefile deleted file mode 100755 index a444d38..0000000 --- a/tokenizer/eniam-tokenizer-1.0/makefile +++ /dev/null @@ -1,51 +0,0 @@ -OCAMLC=ocamlc -OCAMLOPT=ocamlopt -OCAMLDEP=ocamldep -INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam -OCAMLFLAGS=$(INCLUDES) -g -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa -INSTALLDIR=`ocamlc -where`/eniam - -SOURCES= ENIAMtokenizerTypes.ml ENIAMtokens.ml ENIAMacronyms.ml ENIAMpatterns.ml ENIAMtokenizer.ml - -all: eniam-tokenizer.cma eniam-tokenizer.cmxa - -install: all - mkdir -p $(INSTALLDIR) - cp eniam-tokenizer.cmxa eniam-tokenizer.a eniam-tokenizer.cma config-tokenizer $(INSTALLDIR) - cp ENIAMtokenizerTypes.cmi ENIAMtokens.cmi ENIAMacronyms.cmi ENIAMpatterns.cmi ENIAMtokenizer.cmi $(INSTALLDIR) - cp ENIAMtokenizerTypes.cmx ENIAMtokens.cmx ENIAMacronyms.cmx ENIAMpatterns.cmx ENIAMtokenizer.cmx $(INSTALLDIR) - mkdir -p /usr/share/eniam/resources/SGJP - cp resources/SGJP/* /usr/share/eniam/resources/SGJP - -eniam-tokenizer.cma: $(SOURCES) - ocamlc -linkall -a -o eniam-tokenizer.cma $(OCAMLFLAGS) $^ - -eniam-tokenizer.cmxa: $(SOURCES) - ocamlopt -linkall -a -o eniam-tokenizer.cmxa $(INCLUDES) $^ - -test: test.ml - $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml - -.SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx - -.mll.ml: - ocamllex $< - -.mly.mli: - ocamlyacc $< - -.mly.ml: - ocamlyacc $< - -.ml.cmo: - $(OCAMLC) $(OCAMLFLAGS) -c $< - -.mli.cmi: - $(OCAMLC) $(OCAMLFALGS) -c $< - -.ml.cmx: - $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $< - -clean: - rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a test diff --git a/tokenizer/eniam-tokenizer-1.0/test.ml b/tokenizer/eniam-tokenizer-1.0/test.ml deleted file mode 100644 index a98b001..0000000 --- a/tokenizer/eniam-tokenizer-1.0/test.ml +++ /dev/null @@ -1,73 +0,0 @@ -(* - * ENIAMtokenizer, a tokenizer for Polish - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences - * - * This library is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - *) - - -let test_strings = [ -(* "a gdybym miałem"; - "A Gdy Miałem"; - "GDY MIAŁEM"; - "I II III IV V VI VII VIII IX X MCXIV MXC"; - "Kiedy Piotr Prabucki, przewodniczący Komisji Budżetu PeKaO"; - "25 idzie 20."; - "Kot. Kot. kot."; - "25."; - "25.888.231"; - "Ala 25.888.231.111 ma."; - "Ala 25.888.031,011."; - "Ala -25.888.031,011."; - "Ala -25 ."; - "Ala -1° C 3° ciepła 20—30°C od 180° do 260°C około 6° poniżej horyzontu."; - "Ala 22-25 ."; - "Ala 22.5.2000-25.5.2001 ."; - "Szpak frunie.";*) - "Kot miauczy."; -(* "Np. Ala.";*) - "w. dom."; - "tzn."; - "c.d.n."; -(* "Arabia Saudyjska biegnie."; - "Cauchy'ego ONZ-owska biegnie.";*) - "TE-cie E-e."; - "MS-DOS-owska CI-cie KRRi-cie UJ-ocie UJ-OCIE."; - "rock'n'rollowy d’Alembertowi staro-cerkiewno-słowiańskimi"; -(* "Tom idzie.";*) - "Miałem miał."; -(* "Szpak śpiewa."; - "Ala ma kota."; - "Ale mają kota:"*) - ] - -let _ = - print_endline "Testy wbudowane"; - Xlist.iter test_strings (fun s -> - print_endline ("\nTEST: " ^ s); - let tokens = ENIAMtokenizer.parse s in - (* print_endline (ENIAMtokenizer.xml_of tokens); *) - Xlist.iter tokens (fun token -> print_endline (ENIAMtokenizer.string_of 0 token))); - print_endline "Testy użytkownika."; - print_endline "Wpisz tekst i naciśnij ENTER, pusty tekst kończy."; - let s = ref (read_line ()) in - while !s <> "" do - let tokens = ENIAMtokenizer.parse !s in - (* print_endline (ENIAMtokenizer.xml_of tokens); *) - Xlist.iter tokens (fun token -> print_endline (ENIAMtokenizer.string_of 0 token)); - print_endline "Wpisz tekst i naciśnij ENTER, pusty tekst kończy."; - s := read_line () - done; - () diff --git a/tokenizer/makefile b/tokenizer/makefile index a444d38..5d0f6ff 100755 --- a/tokenizer/makefile +++ b/tokenizer/makefile @@ -12,11 +12,13 @@ all: eniam-tokenizer.cma eniam-tokenizer.cmxa install: all mkdir -p $(INSTALLDIR) - cp eniam-tokenizer.cmxa eniam-tokenizer.a eniam-tokenizer.cma config-tokenizer $(INSTALLDIR) + cp eniam-tokenizer.cmxa eniam-tokenizer.a eniam-tokenizer.cma $(INSTALLDIR) cp ENIAMtokenizerTypes.cmi ENIAMtokens.cmi ENIAMacronyms.cmi ENIAMpatterns.cmi ENIAMtokenizer.cmi $(INSTALLDIR) cp ENIAMtokenizerTypes.cmx ENIAMtokens.cmx ENIAMacronyms.cmx ENIAMpatterns.cmx ENIAMtokenizer.cmx $(INSTALLDIR) - mkdir -p /usr/share/eniam/resources/SGJP - cp resources/SGJP/* /usr/share/eniam/resources/SGJP + mkdir -p /usr/share/eniam/tokenizer + cp resources/mte_20151215.tab /usr/share/eniam/tokenizer/mte_20151215.tab + cp resources/README /usr/share/eniam/tokenizer/README + ln -s /usr/share/eniam/tokenizer/mte_20151215.tab /usr/share/eniam/tokenizer/mte.tab eniam-tokenizer.cma: $(SOURCES) ocamlc -linkall -a -o eniam-tokenizer.cma $(OCAMLFLAGS) $^ diff --git a/tokenizer/resources/SGJP/README b/tokenizer/resources/README index cbc51d3..cbc51d3 100644 --- a/tokenizer/resources/SGJP/README +++ b/tokenizer/resources/README diff --git a/tokenizer/resources/SGJP/mte_20151215.tab b/tokenizer/resources/mte_20151215.tab index 82f4b5a..82f4b5a 100644 --- a/tokenizer/resources/SGJP/mte_20151215.tab +++ b/tokenizer/resources/mte_20151215.tab