Commit 3cc102a98cc2053d8403b1fa2bc104134b70355c
1 parent
bd0de0b5
dodanie cech semantycznych do gramatyki
Showing
14 changed files
with
1780 additions
and
187 deletions
LCGlexicon/ENIAM_LCGlexicon.ml
... | ... | @@ -235,6 +235,8 @@ let make_node id orth lemma pos syntax weight cat_list is_raised = |
235 | 235 | | Pos2 -> attrs |
236 | 236 | | Cat -> ("CAT",SubstVar "cat") :: attrs |
237 | 237 | | Coerced -> ("COERCED",SubstVar "coerced") :: attrs |
238 | + | Role -> ("ROLE",SubstVar "role") :: attrs | |
239 | + | SNode -> ("NODE",SubstVar "node") :: attrs | |
238 | 240 | | Number -> ("NUM",SubstVar "number") :: attrs |
239 | 241 | | Case -> ("CASE",SubstVar "case") :: attrs |
240 | 242 | | Gender -> ("GEND",SubstVar "gender") :: attrs |
... | ... |
LCGlexicon/ENIAM_LCGlexiconTypes.ml
... | ... | @@ -17,7 +17,7 @@ |
17 | 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | 18 | *) |
19 | 19 | |
20 | -type categories = {lemma: string; pos: string; pos2: string; cat: string; coerced: string list; | |
20 | +type categories = {lemma: string; pos: string; pos2: string; cat: string; coerced: string list; roles: string list; snode: string list; | |
21 | 21 | numbers: string list; cases: string list; genders: string list; persons: string list; |
22 | 22 | grads: string list; praeps: string list; acms: string list; |
23 | 23 | aspects: string list; negations: string list; moods: string list; tenses: string list; |
... | ... | @@ -25,7 +25,8 @@ type categories = {lemma: string; pos: string; pos2: string; cat: string; coerce |
25 | 25 | } |
26 | 26 | |
27 | 27 | type selector = |
28 | - Lemma | (*NewLemma |*) Pos | Pos2 | Cat | Coerced | Number | Case | Gender | Person | Grad | Praep | | |
28 | + Lemma | (*NewLemma |*) Pos | Pos2 | Cat | Coerced | Role | SNode | | |
29 | + Number | Case | Gender | Person | Grad | Praep | | |
29 | 30 | Acm | Aspect | Negation | Mood | Tense | Nsyn | Nsem | Ctype | Mode | Psem | |
30 | 31 | Icat | Inumber | Igender | Iperson | Nperson | Ncat | Plemma | |
31 | 32 | Unumber | Ucase | Ugender | Uperson | Amode |
... | ... | @@ -73,7 +74,7 @@ type selector_relation = Eq | Neq (*| StrictEq*) |
73 | 74 | |
74 | 75 | *) |
75 | 76 | |
76 | -let empty_cats = {lemma=""; pos=""; pos2=""; cat="X"; coerced=[]; | |
77 | +let empty_cats = {lemma=""; pos=""; pos2=""; cat="X"; coerced=[]; roles=[]; snode=[]; | |
77 | 78 | numbers=[]; cases=[]; genders=[]; persons=[]; |
78 | 79 | grads=[]; praeps=[]; acms=[]; aspects=[]; negations=[]; moods=[]; tenses=[]; |
79 | 80 | nsyn=[]; nsem=[]; modes=[]; psem=[]; |
... | ... |
LCGlexicon/ENIAM_LCGlexiconTypes_old.ml
0 → 100644
1 | +(* | |
2 | + * ENIAM_LCGlexicon is a library that provides LCG lexicon form Polish | |
3 | + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | + * | |
6 | + * This library is free software: you can redistribute it and/or modify | |
7 | + * it under the terms of the GNU Lesser General Public License as published by | |
8 | + * the Free Software Foundation, either version 3 of the License, or | |
9 | + * (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + * GNU Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public License | |
17 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | + *) | |
19 | + | |
20 | +type categories = {lemma: string; pos: string; pos2: string; cat: string; coerced: string list; | |
21 | + numbers: string list; cases: string list; genders: string list; persons: string list; | |
22 | + grads: string list; praeps: string list; acms: string list; | |
23 | + aspects: string list; negations: string list; moods: string list; tenses: string list; | |
24 | + nsyn: string list; nsem: string list; modes: string list; psem: string list; | |
25 | + } | |
26 | + | |
27 | +type selector = | |
28 | + Lemma | (*NewLemma |*) Pos | Pos2 | Cat | Coerced | Number | Case | Gender | Person | Grad | Praep | | |
29 | + Acm | Aspect | Negation | Mood | Tense | Nsyn | Nsem | Ctype | Mode | Psem | | |
30 | + Icat | Inumber | Igender | Iperson | Nperson | Ncat | Plemma | | |
31 | + Unumber | Ucase | Ugender | Uperson | Amode | |
32 | + | |
33 | +module OrderedSelector = struct | |
34 | + type t = selector | |
35 | + let compare = compare | |
36 | +end | |
37 | + | |
38 | +module SelectorMap=Xmap.Make(OrderedSelector) | |
39 | +module SelectorSet=Xset.Make(OrderedSelector) | |
40 | + | |
41 | +type rule = | |
42 | + Bracket | |
43 | + | Quant of (selector * ENIAM_LCGtypes.internal_grammar_symbol) list | |
44 | + | Raised of selector list | |
45 | + | Syntax of ENIAM_LCGtypes.grammar_symbol | |
46 | + | Sem of string | |
47 | + | |
48 | +type rule_sem = | |
49 | + BasicSem of selector list | |
50 | + | RaisedSem of selector list * selector list | |
51 | + | TermSem of selector list * string | |
52 | + | QuotSem of selector list | |
53 | + | InclusionSem of selector list | |
54 | + | ConjSem of selector list | |
55 | + | |
56 | +type selector_relation = Eq | Neq (*| StrictEq*) | |
57 | + | |
58 | +(* x="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jedną z wartości atrybutu x, reguła zostanie wykonana dla x z usuniętymi pozostałymi wartościami *) | |
59 | +(* x!="s" oznacza, że żeby reguła została użyta token musi mieć jako jedną z wartości atrybutu x symbol inny od "s", reguła zostanie wykonana dla x z usuniętą wartością "s" *) | |
60 | +(* x=="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jednyną z wartość atrybutu x *) | |
61 | + | |
62 | +(* wzajemne zależności między kategoriami (np między case i person w subst) są rozstrzygane w ENIAMcategories *) | |
63 | + | |
64 | +(* Basic oznacza że kwantyfikacja i term są generowane zgodnie ze standardowymi regułami: | |
65 | + - kwantyfikacja przebiega po wszystkich zdefiniowanych kategoriariach i wartościach wziętych z cats | |
66 | + - typ jest zadany bezpośrednio | |
67 | + - term tworzy wierzchołek w strukturze zależnościowej etykietowany wszystkimi zdefiniowanymi kategoriami | |
68 | + | |
69 | + Quant oznacza że typ i term są generowane zgodnie ze standardowymi regułami: | |
70 | + - kwantyfikacja jest zadana bezpośrednio | |
71 | + - typ jest zadany bezpośrednio | |
72 | + - term tworzy wierzchołek w strukturze zależnościowej etykietowany wszystkimi zdefiniowanymi kategoriami | |
73 | + | |
74 | +*) | |
75 | + | |
76 | +let empty_cats = {lemma=""; pos=""; pos2=""; cat="X"; coerced=[]; | |
77 | + numbers=[]; cases=[]; genders=[]; persons=[]; | |
78 | + grads=[]; praeps=[]; acms=[]; aspects=[]; negations=[]; moods=[]; tenses=[]; | |
79 | + nsyn=[]; nsem=[]; modes=[]; psem=[]; | |
80 | + } | |
81 | + | |
82 | +let default_category_flag = ref true | |
83 | + | |
84 | +let resource_path = | |
85 | + try Sys.getenv "ENIAM_RESOURCE_PATH" | |
86 | + with Not_found -> | |
87 | + if Sys.file_exists "/usr/share/eniam" then "/usr/share/eniam" else | |
88 | + if Sys.file_exists "/usr/local/share/eniam" then "/usr/local/share/eniam" else | |
89 | + if Sys.file_exists "resources" then "resources" else | |
90 | + failwith "resource directory does not exists" | |
91 | + | |
92 | +let data_path = | |
93 | + try Sys.getenv "ENIAM_USER_DATA_PATH" | |
94 | + with Not_found -> "data" | |
95 | + | |
96 | +let rules_filename = resource_path ^ "/LCGlexicon/lexicon-pl.dic" | |
97 | +let user_lexicon_filename = data_path ^ "/lexicon.dic" | |
98 | +let user_cats_filename = data_path ^ "/senses.tab" | |
99 | +let user_coerced_filename = data_path ^ "/coercions.tab" | |
100 | + | |
101 | +let subst_uncountable_lexemes_filename = resource_path ^ "/LCGlexicon/subst_uncountable.dat" | |
102 | +let subst_uncountable_lexemes_filename2 = resource_path ^ "/LCGlexicon/subst_uncountable_stare.dat" | |
103 | +let subst_container_lexemes_filename = resource_path ^ "/LCGlexicon/subst_container.dat" | |
104 | +let subst_numeral_lexemes_filename = resource_path ^ "/LCGlexicon/subst_numeral.dat" | |
105 | +let subst_time_lexemes_filename = resource_path ^ "/LCGlexicon/subst_time.dat" | |
106 | + | |
107 | +let adv_modes_filename = resource_path ^ "/Walenty/adv_modes.tab" | |
108 | +let num_nsems_filename = resource_path ^ "/LCGlexicon/num.tab" | |
... | ... |
LCGlexicon/ENIAM_LCGlexicon_old.ml
0 → 100644
1 | +(* | |
2 | + * ENIAM_LCGlexicon is a library that provides LCG lexicon form Polish | |
3 | + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | + * | |
6 | + * This library is free software: you can redistribute it and/or modify | |
7 | + * it under the terms of the GNU Lesser General Public License as published by | |
8 | + * the Free Software Foundation, either version 3 of the License, or | |
9 | + * (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + * GNU Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public License | |
17 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | + *) | |
19 | + | |
20 | +open Xstd | |
21 | +open ENIAM_LCGtypes | |
22 | +open ENIAM_LCGlexiconTypes | |
23 | +open ENIAMcategoriesPL | |
24 | + | |
25 | +let rec find_selector s = function | |
26 | + (t,Eq,x :: _) :: l -> if t = s then x else find_selector s l | |
27 | + | (t,_,_) :: l -> if t = s then failwith "find_selector 1" else find_selector s l | |
28 | + | [] -> failwith "find_selector 2" | |
29 | + | |
30 | +let rec get_syntax rev = function | |
31 | + Syntax syntax :: rule -> syntax, (List.rev rev) @ rule | |
32 | + | t :: rule -> get_syntax (t :: rev) rule | |
33 | + | [] -> failwith "get_syntax" | |
34 | + | |
35 | +let rec get_quant rev = function | |
36 | + Quant quant :: rule -> quant, (List.rev rev) @ rule | |
37 | + | t :: rule -> get_quant (t :: rev) rule | |
38 | + | [] -> [], List.rev rev | |
39 | + | |
40 | +let rec get_bracket rev = function | |
41 | + Bracket :: rule -> true, (List.rev rev) @ rule | |
42 | + | t :: rule -> get_bracket (t :: rev) rule | |
43 | + | [] -> false, List.rev rev | |
44 | + | |
45 | +let rec get_raised rev = function | |
46 | + Raised raised :: rule -> raised, (List.rev rev) @ rule | |
47 | + | t :: rule -> get_raised (t :: rev) rule | |
48 | + | [] -> raise Not_found | |
49 | + | |
50 | +let rec get_sem_term rev = function | |
51 | + Sem sem_term :: rule -> sem_term, (List.rev rev) @ rule | |
52 | + | t :: rule -> get_sem_term (t :: rev) rule | |
53 | + | [] -> raise Not_found | |
54 | + | |
55 | +let merge_quant pos_quants quants = | |
56 | + let map = Xlist.fold quants SelectorMap.empty (fun map (k,v) -> SelectorMap.add map k v) in | |
57 | + let l,map = Xlist.fold pos_quants ([],map) (fun (l,map) (cat,v) -> | |
58 | + if SelectorMap.mem map cat then (cat,SelectorMap.find map cat) :: l, SelectorMap.remove map cat | |
59 | + else (cat,v) :: l, map) in | |
60 | + List.rev (SelectorMap.fold map l (fun l cat v -> (cat,v) :: l)) | |
61 | + | |
62 | +let assign_quantifiers (selectors,rule,weight) = | |
63 | + let pos = find_selector Pos selectors in | |
64 | + let categories = | |
65 | + try StringMap.find pos_categories pos | |
66 | + with Not_found -> failwith ("assign_quantifiers: unknown part of speech " ^ pos) in | |
67 | + let categories = Xlist.map categories (fun s -> s,Top) in | |
68 | + let syntax,rule = get_syntax [] rule in | |
69 | + let quant,rule = get_quant [] rule in | |
70 | + let bracket,rule = get_bracket [] rule in | |
71 | + let quant = merge_quant categories quant in | |
72 | + selectors, (bracket,quant,syntax),(rule,weight) | |
73 | + | |
74 | +let rec check_quantifiers_int_rec (selectors,syntax) quants = function | |
75 | + Atom x -> () | |
76 | + | AVar "schema" -> () | |
77 | + | AVar x -> | |
78 | + if not (SelectorSet.mem quants (selector_of_string x)) | |
79 | + then failwith ("Variable '" ^ x ^ "' is not quantified in rule " ^ string_of_selectors selectors ^ ": " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax) | |
80 | + | With l -> Xlist.iter l (check_quantifiers_int_rec (selectors,syntax) quants) | |
81 | + | Zero -> () | |
82 | + | Top -> () | |
83 | + | |
84 | +let rec check_quantifiers_rec rule quants = function | |
85 | + Tensor l -> Xlist.iter l (check_quantifiers_int_rec rule quants) | |
86 | + | Plus l -> Xlist.iter l (check_quantifiers_rec rule quants) | |
87 | + | Imp(s,d,t) -> check_quantifiers_rec rule quants s; check_quantifiers_rec rule quants t | |
88 | + | One -> () | |
89 | + | ImpSet(s,l) -> check_quantifiers_rec rule quants s; Xlist.iter l (fun (_,t) -> check_quantifiers_rec rule quants t) | |
90 | + | Star s -> check_quantifiers_rec rule quants s | |
91 | + | Maybe s -> check_quantifiers_rec rule quants s | |
92 | + | _ -> failwith "check_quantifiers_rec" | |
93 | + | |
94 | +let check_quantifiers (selectors,(bracket,quant,syntax),_) = | |
95 | + let quants = Xlist.fold quant SelectorSet.empty (fun quants (q,_) -> SelectorSet.add quants q) in | |
96 | + check_quantifiers_rec (selectors,syntax) quants syntax | |
97 | + | |
98 | +let assign_semantics (selectors,(bracket,quant,syntax),(rule,weight)) = | |
99 | + let semantics = try | |
100 | + let raised,rule = get_raised [] rule in | |
101 | + if rule <> [] then failwith "assign_semantics 1" else | |
102 | + RaisedSem(Xlist.map quant fst, raised) | |
103 | + with Not_found -> (try | |
104 | + let term,rule = get_sem_term [] rule in | |
105 | + if rule <> [] then failwith "assign_semantics 2" else | |
106 | + TermSem(Xlist.map quant fst,term) | |
107 | + with Not_found -> BasicSem(Xlist.map quant fst)) in | |
108 | + selectors,(bracket,quant,syntax),(semantics,weight) | |
109 | + | |
110 | +let rec add_x_args_rec = function | |
111 | + Imp(s,d,t) -> Imp(add_x_args_rec s,d,t) | |
112 | + | ImpSet(s,l) -> ImpSet(add_x_args_rec s,l) | |
113 | + | Tensor[Atom "<conll_root>"] -> Tensor[Atom "<conll_root>"] | |
114 | + | Tensor l -> ImpSet(Tensor l,[Backward,Maybe(Tensor[Atom "X"]);Forward,Maybe(Tensor[Atom "X"])]) | |
115 | + | t -> failwith ("add_x_args_rec: " ^ ENIAM_LCGstringOf.grammar_symbol 0 t) | |
116 | + | |
117 | +let is_raised_semantics = function | |
118 | + RaisedSem _ -> true | |
119 | + | _ -> false | |
120 | + | |
121 | +let rec is_raised_arg = function | |
122 | + Imp _ -> true | |
123 | + | Tensor _ -> false | |
124 | + | Plus l -> Xlist.fold l false (fun b t -> is_raised_arg t || b) | |
125 | + | Maybe t -> is_raised_arg t | |
126 | + | One -> false | |
127 | + | t -> failwith ("is_raised_arg: " ^ ENIAM_LCGstringOf.grammar_symbol 0 t) | |
128 | + | |
129 | +let rec is_raised_syntax = function | |
130 | + Imp(s,d,t) -> is_raised_syntax s || is_raised_arg t | |
131 | + | ImpSet(s,l) -> is_raised_syntax s || Xlist.fold l false (fun b (_,t) -> is_raised_arg t || b) | |
132 | + | Tensor _ -> false | |
133 | + | t -> failwith ("is_raised_syntax: " ^ ENIAM_LCGstringOf.grammar_symbol 0 t) | |
134 | + | |
135 | + | |
136 | +let add_x_args (selectors,(bracket,quant,syntax),(semantics,weight)) = | |
137 | + if is_raised_syntax syntax then (selectors,(bracket,quant,syntax),(semantics,weight)) | |
138 | + else (selectors,(bracket,quant,add_x_args_rec syntax),(semantics,weight)) | |
139 | + | |
140 | +let rec extract_category pat rev = function | |
141 | + (cat,rel,v) :: l -> if cat = pat then rel,v,(List.rev rev @ l) else extract_category pat ((cat,rel,v) :: rev) l | |
142 | + | [] -> raise Not_found | |
143 | + | |
144 | +let dict_of_grammar grammar = | |
145 | + (* print_endline "dict_of_grammar"; *) | |
146 | + Xlist.fold grammar StringMap.empty (fun dict (selectors,(bracket,quant,syntax),semantics) -> | |
147 | + let pos_rel,poss,selectors = try extract_category Pos [] selectors with Not_found -> failwith "dict_of_grammar 1" in | |
148 | + let lemma_rel,lemmas,selectors = try extract_category Lemma [] selectors with Not_found -> Eq,[],selectors in | |
149 | + if pos_rel <> Eq || lemma_rel <> Eq then failwith "dict_of_grammar 2" else | |
150 | + let rule = selectors,(bracket,quant,syntax),semantics in | |
151 | + Xlist.fold poss dict (fun dict pos -> | |
152 | + let dict2,l = try StringMap.find dict pos with Not_found -> StringMap.empty,[] in | |
153 | + let dict2,l = | |
154 | + if lemmas = [] then dict2,rule :: l else | |
155 | + Xlist.fold lemmas dict2 (fun dict2 lemma -> | |
156 | + StringMap.add_inc dict2 lemma [rule] (fun l -> rule :: l)),l in | |
157 | + StringMap.add dict pos (dict2,l))) | |
158 | + | |
159 | +let make_rules x_flag filename = | |
160 | + let lexicon = ENIAM_LCGlexiconParser.load_lexicon filename in | |
161 | + let lexicon = List.rev (Xlist.rev_map lexicon assign_quantifiers) in | |
162 | + Xlist.iter lexicon check_quantifiers; | |
163 | + let lexicon = List.rev (Xlist.rev_map lexicon assign_semantics) in | |
164 | + let lexicon = if x_flag then List.rev (Xlist.rev_map lexicon add_x_args) else lexicon in | |
165 | + dict_of_grammar lexicon | |
166 | + | |
167 | +let find_rules rules cats = | |
168 | + let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith ("find_rules: unable to find rules for category '" ^ cats.pos ^ "' lemma='" ^ cats.lemma ^ "'") in | |
169 | + (* Printf.printf "find_rules: %s %s |rules|=%d\n" cats.lemma cats.pos (Xlist.size rules); *) | |
170 | + let rules = try StringMap.find lex_rules cats.lemma @ rules with Not_found -> rules in | |
171 | + Xlist.fold rules [] (fun rules (selectors,syntax,semantics) -> | |
172 | + try | |
173 | + let cats = apply_selectors cats selectors in | |
174 | + (cats,syntax,semantics) :: rules | |
175 | + with Not_found -> rules) | |
176 | + | |
177 | +let prepare_lex_entries rules lex_entries cats = | |
178 | + Xlist.fold lex_entries rules (fun rules (selectors,rule) -> | |
179 | + let selectors = (Pos,Eq,[cats.pos]) :: selectors in | |
180 | + let selectors,(bracket,quant,syntax),(rule,weight) = assign_quantifiers (selectors,[Syntax rule],0.) in | |
181 | + let selectors,(bracket,quant,syntax),(semantics,weight) = assign_semantics (selectors,(bracket,quant,syntax),(rule,weight)) in | |
182 | + try | |
183 | + let cats = apply_selectors cats selectors in | |
184 | + (cats,(bracket,quant,syntax),(semantics,weight)) :: rules | |
185 | + with Not_found -> rules) | |
186 | + | |
187 | +let assign_valence valence rules = | |
188 | + Xlist.fold rules [] (fun l (cats,(bracket,quant,syntax),semantics) -> | |
189 | + (* Printf.printf "%s %s |valence|=%d\n" cats.lemma cats.pos (Xlist.size valence); *) | |
190 | + if ENIAM_LCGrenderer.count_avar "schema" syntax > 0 then | |
191 | + Xlist.fold valence l (fun l (selectors,schema) -> | |
192 | + try | |
193 | + let cats = apply_selectors cats selectors in | |
194 | + (cats,(bracket,quant,ENIAM_LCGrenderer.substitute_schema "schema" schema syntax),semantics) :: l | |
195 | + with Not_found -> l) | |
196 | + else (cats,(bracket,quant,syntax),semantics) :: l) | |
197 | + | |
198 | +type labels = { | |
199 | + number: string; | |
200 | + case: string; | |
201 | + gender: string; | |
202 | + person: string; | |
203 | + aspect: string; | |
204 | +} | |
205 | + | |
206 | +let get_label e = function | |
207 | + Number -> e.number | |
208 | + | Case -> e.case | |
209 | + | Gender -> e.gender | |
210 | + | Person -> e.person | |
211 | + | Aspect -> e.aspect | |
212 | + | _ -> ENIAM_LCGreductions.get_variant_label () | |
213 | + | |
214 | +let get_labels () = { | |
215 | + number=ENIAM_LCGreductions.get_variant_label (); | |
216 | + case=ENIAM_LCGreductions.get_variant_label (); | |
217 | + gender=ENIAM_LCGreductions.get_variant_label (); | |
218 | + person=ENIAM_LCGreductions.get_variant_label (); | |
219 | + aspect=ENIAM_LCGreductions.get_variant_label (); | |
220 | +} | |
221 | + | |
222 | +let make_quantification e rules = | |
223 | + Xlist.map rules (fun (cats,(bracket,quant,syntax),semantics) -> | |
224 | + let syntax = Xlist.fold (List.rev quant) syntax (fun syntax (cat,t) -> | |
225 | + let t = if t = Top then ENIAM_LCGrenderer.make_quant_restriction (match_selector cats cat) else t in | |
226 | + let category = string_of_selector cat in | |
227 | + WithVar(category,t,get_label e cat,syntax)) in | |
228 | + let syntax = if bracket then ENIAM_LCGtypes.Bracket(true,true,syntax) else ENIAM_LCGtypes.Bracket(false,false,syntax) in | |
229 | + cats,syntax,semantics) | |
230 | + | |
231 | +let make_node id orth lemma pos syntax weight cat_list is_raised = | |
232 | + let attrs = Xlist.fold cat_list [] (fun attrs -> function | |
233 | + | Lemma -> attrs | |
234 | + | Pos -> attrs | |
235 | + | Pos2 -> attrs | |
236 | + | Cat -> ("CAT",SubstVar "cat") :: attrs | |
237 | + | Coerced -> ("COERCED",SubstVar "coerced") :: attrs | |
238 | + | Number -> ("NUM",SubstVar "number") :: attrs | |
239 | + | Case -> ("CASE",SubstVar "case") :: attrs | |
240 | + | Gender -> ("GEND",SubstVar "gender") :: attrs | |
241 | + | Person -> ("PERS",SubstVar "person") :: attrs | |
242 | + | Grad -> ("GRAD",SubstVar "grad") :: attrs | |
243 | + | Praep -> attrs | |
244 | + | Acm -> ("ACM",SubstVar "acm") :: attrs | |
245 | + | Aspect -> ("ASPECT", SubstVar "aspect") :: attrs | |
246 | + | Negation -> ("NEGATION",SubstVar "negation") :: attrs | |
247 | + | Mood -> ("MOOD", SubstVar "mood") :: attrs | |
248 | + | Tense -> ("TENSE", SubstVar "tense") :: attrs | |
249 | + | Nsyn -> ("NSYN", SubstVar "nsyn") :: attrs | |
250 | + | Nsem -> ("NSEM", SubstVar "nsem") :: attrs | |
251 | + | Ctype -> ("CTYPE", SubstVar "ctype") :: attrs | |
252 | + | Mode -> ("MODE", SubstVar "mode") :: attrs | |
253 | + | Psem -> ("PSEM", SubstVar "psem") :: attrs | |
254 | + | Icat -> attrs | |
255 | + | Inumber -> attrs | |
256 | + | Igender -> attrs | |
257 | + | Iperson -> attrs | |
258 | + | Nperson -> attrs | |
259 | + | Ncat -> attrs | |
260 | + | Plemma -> attrs | |
261 | + | Unumber -> attrs | |
262 | + | Ucase -> attrs | |
263 | + | Ugender -> attrs | |
264 | + | Uperson -> attrs | |
265 | + | Amode -> attrs) in | |
266 | + (* | s -> (string_of_selector s, Dot) :: attrs) in *) | |
267 | + (* | "lex" -> ("LEX",Val "+") :: attrs *) | |
268 | + (* | s -> failwith ("make_node: " ^ (string_of_selector s))) in *) | |
269 | + let symbol = if is_raised then | |
270 | + ENIAM_LCGrenderer.make_raised_symbol syntax | |
271 | + else ENIAM_LCGrenderer.make_symbol syntax in | |
272 | + {ENIAM_LCGrenderer.empty_node with | |
273 | + orth=orth; lemma=lemma; pos=pos; symbol=symbol; | |
274 | + weight=weight; id=id; attrs=List.rev attrs; args=Dot} | |
275 | + | |
276 | +let or_frame node = | |
277 | + (*Imp(Imp(Imp(Tensor[Atom "<root>"],Forward, | |
278 | + Tensor[Atom "</speaker>"]),Forward, | |
279 | + Imp(Tensor[Atom "ip"; Top; Top; Top],Forward,Tensor[Atom "or"])),Forward, | |
280 | + Tensor[Atom "or2"]),*) | |
281 | + (* Lambda("x",Lambda("y",Lambda("z",Node{node with gs=make_gs [] ["<root>"]; args=Tuple[ | |
282 | + Cut(SetAttr("AROLE",Val "Clause",SetAttr("GF",Gf CLAUSE,App(Var "y",Var "x"))))]}))) *) | |
283 | + VariantVar("lemma",Lambda("x",Lambda("y",Lambda("z",Node{node with args=Tuple[ | |
284 | + Cut(SetAttr("ARG_SYMBOL",Tuple[Val "TODO"],App(Var "y",Var "x")))]})))) | |
285 | + | |
286 | +let make_term id orth rules = | |
287 | + Xlist.map rules (fun (cats,syntax,(semantics,weight)) -> | |
288 | + ENIAM_LCGrenderer.reset_variable_names (); | |
289 | + ENIAM_LCGrenderer.add_variable_numbers (); | |
290 | + (* print_endline ("make_term 0: " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax); *) | |
291 | + match semantics with | |
292 | + BasicSem cat_list -> | |
293 | + let node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list false in | |
294 | + (* print_endline ("make_term 1: " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax); *) | |
295 | + let semantics = ENIAM_LCGrenderer.make_term node syntax in | |
296 | + ENIAM_LCGrenderer.simplify (syntax,semantics) | |
297 | + | RaisedSem(cat_list,outer_cat_list) -> | |
298 | + (* FIXME: jakie atrybuty powinien mieć outer node (w szczególności jaką wagę?) *) | |
299 | + let node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list true in | |
300 | + let outer_node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) outer_cat_list false in | |
301 | + (* print_endline ("make_term 2: " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax); *) | |
302 | + let semantics = ENIAM_LCGrenderer.make_raised_term node outer_node syntax in | |
303 | + ENIAM_LCGrenderer.simplify (syntax,semantics) | |
304 | + | TermSem(cat_list,"λxλyλz.NODE(yx,z)") -> | |
305 | + let node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list false in | |
306 | + (* print_endline ("make_term 3: " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax); *) | |
307 | + let semantics = or_frame node in | |
308 | + ENIAM_LCGrenderer.simplify (syntax,semantics) | |
309 | + | _ -> failwith "make_term: ni") | |
310 | + | |
311 | +let create_entries rules id orth cats valence lex_entries = | |
312 | + Xlist.fold cats [] (fun l cats -> | |
313 | + (* Printf.printf "create_entries: orth=%s lemma=%s pos=%s\n" orth cats.lemma cats.pos; *) | |
314 | + (* variable_name_ref := []; *) | |
315 | + if cats.pos="interp" && cats.lemma="<clause>" then (BracketSet(Forward),Dot) :: l else | |
316 | + if cats.pos="interp" && cats.lemma="</clause>" then (BracketSet(Backward),Dot) :: l else | |
317 | + if (cats.pos2="noun" || cats.pos2="verb" || cats.pos2="adj" || cats.pos2="adv") && cats.cat="X" && not !default_category_flag && cats.pos <> "aglt" then l else | |
318 | + let e = get_labels () in | |
319 | + (* print_endline "create_entries 1"; *) | |
320 | + let rules = find_rules rules cats in | |
321 | + let rules = prepare_lex_entries rules lex_entries cats in | |
322 | + (* Printf.printf "create_entries 2: %s %s |rules|=%d\n" cats.lemma cats.pos (Xlist.size rules); *) | |
323 | + let rules = assign_valence valence rules in | |
324 | + (* print_endline "create_entries 3"; *) | |
325 | + let rules = make_quantification e rules in | |
326 | + (* print_endline "create_entries 4"; *) | |
327 | + let rules = make_term id orth rules in | |
328 | + (* print_endline "create_entries 5"; *) | |
329 | + rules @ l) | |
... | ... |
LCGlexicon/ENIAMcategoriesPL.ml
... | ... | @@ -41,6 +41,8 @@ let selector_values = Xlist.fold [ |
41 | 41 | Pos2, []; |
42 | 42 | Cat, []; |
43 | 43 | Coerced, []; |
44 | + Role, []; | |
45 | + SNode, ["concept";"context";"dot";"relations"]; | |
44 | 46 | Number, all_numbers; |
45 | 47 | Case, "postp" :: "pred" :: all_cases; |
46 | 48 | Gender, all_genders; |
... | ... | @@ -150,7 +152,7 @@ let num_nsem lemma = |
150 | 152 | |
151 | 153 | let part_set = StringSet.of_list ["się"; "nie"; "by"; "niech"; "niechaj"; "niechże"; "niechajże"; "czy"; "gdyby"] |
152 | 154 | |
153 | -let clarify_categories proper cat coerced = function | |
155 | +let clarify_categories proper cat coerced snode = function | |
154 | 156 | lemma,"subst",[numbers;cases;genders] -> |
155 | 157 | let numbers = expand_numbers numbers in |
156 | 158 | let cases = expand_cases cases in |
... | ... | @@ -158,9 +160,9 @@ let clarify_categories proper cat coerced = function |
158 | 160 | let cases,voc = split_voc cases in |
159 | 161 | let nsyn,nsem = noun_type proper lemma "subst" in |
160 | 162 | (if cases = [] then [] else |
161 | - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ | |
163 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ | |
162 | 164 | (if voc = [] then [] else |
163 | - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | |
165 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | |
164 | 166 | | lemma,"subst",[numbers;cases;genders;_] -> |
165 | 167 | let numbers = expand_numbers numbers in |
166 | 168 | let cases = expand_cases cases in |
... | ... | @@ -168,9 +170,9 @@ let clarify_categories proper cat coerced = function |
168 | 170 | let cases,voc = split_voc cases in |
169 | 171 | let nsyn,nsem = noun_type proper lemma "subst" in |
170 | 172 | (if cases = [] then [] else |
171 | - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ | |
173 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ | |
172 | 174 | (if voc = [] then [] else |
173 | - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | |
175 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | |
174 | 176 | | lemma,"depr",[numbers;cases;genders] -> |
175 | 177 | let numbers = expand_numbers numbers in |
176 | 178 | let cases = expand_cases cases in |
... | ... | @@ -178,9 +180,9 @@ let clarify_categories proper cat coerced = function |
178 | 180 | let cases,voc = split_voc cases in |
179 | 181 | let nsyn,nsem = noun_type proper lemma "depr" in |
180 | 182 | (if cases = [] then [] else |
181 | - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ | |
183 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ | |
182 | 184 | (if voc = [] then [] else |
183 | - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | |
185 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | |
184 | 186 | | lemma,"ppron12",[numbers;cases;genders;persons] -> |
185 | 187 | let numbers = expand_numbers numbers in |
186 | 188 | let cases = expand_cases cases in |
... | ... | @@ -326,82 +328,82 @@ let clarify_categories proper cat coerced = function |
326 | 328 | let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in |
327 | 329 | let genders = expand_genders genders in |
328 | 330 | let pos,pos2 = if StringSet.mem adj_pronoun_lexemes lemma then "apron","pron" else "adj","adj" in |
329 | - [{empty_cats with lemma=lemma; pos=pos; pos2=pos2; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; grads=grads}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *) | |
331 | + [{empty_cats with lemma=lemma; pos=pos; pos2=pos2; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; grads=grads}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *) | |
330 | 332 | | lemma,"adjc",[] -> |
331 | - [{empty_cats with lemma=lemma; pos="adjc"; pos2="adj"; cat=cat; coerced=coerced; numbers=["sg"]; cases=["pred"]; genders=["m1";"m2";"m3"]; grads=["pos"]}] | |
333 | + [{empty_cats with lemma=lemma; pos="adjc"; pos2="adj"; cat=cat; coerced=coerced; snode=snode; numbers=["sg"]; cases=["pred"]; genders=["m1";"m2";"m3"]; grads=["pos"]}] | |
332 | 334 | | lemma,"adjp",[] -> |
333 | - [{empty_cats with lemma=lemma; pos="adjp"; pos2="adj"; cat=cat; coerced=coerced; numbers=all_numbers; cases=["postp"]; genders=all_genders; grads=["pos"]}] | |
334 | - | lemma,"adja",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; pos="adja"; pos2="adja"}] | |
335 | - | lemma,"adv",[grads] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; pos="adv"; pos2="adv"; grads=grads; modes=adv_mode lemma}] | |
336 | - | lemma,"adv",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; pos="adv"; pos2="adv"; grads=["pos"]; modes=adv_mode lemma}] | |
335 | + [{empty_cats with lemma=lemma; pos="adjp"; pos2="adj"; cat=cat; coerced=coerced; snode=snode; numbers=all_numbers; cases=["postp"]; genders=all_genders; grads=["pos"]}] | |
336 | + | lemma,"adja",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; snode=snode; pos="adja"; pos2="adja"}] | |
337 | + | lemma,"adv",[grads] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; snode=snode; pos="adv"; pos2="adv"; grads=grads; modes=adv_mode lemma}] | |
338 | + | lemma,"adv",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; snode=snode; pos="adv"; pos2="adv"; grads=["pos"]; modes=adv_mode lemma}] | |
337 | 339 | | lemma,"ger",[numbers;cases;genders;aspects;negations] -> |
338 | 340 | let numbers = expand_numbers numbers in |
339 | 341 | let cases = expand_cases cases in |
340 | 342 | let genders = expand_genders genders in |
341 | - [{empty_cats with lemma=lemma; pos="ger"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; aspects=aspects; negations=negations}] (* FIXME: kwestia osoby przy voc *) | |
343 | + [{empty_cats with lemma=lemma; pos="ger"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; aspects=aspects; negations=negations}] (* FIXME: kwestia osoby przy voc *) | |
342 | 344 | | lemma,"pact",[numbers;cases;genders;aspects;negations] -> |
343 | 345 | let numbers = expand_numbers numbers in |
344 | 346 | let cases = expand_cases cases in |
345 | 347 | let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in |
346 | 348 | let genders = expand_genders genders in |
347 | - [{empty_cats with lemma=lemma; pos="pact"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}] | |
349 | + [{empty_cats with lemma=lemma; pos="pact"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}] | |
348 | 350 | | lemma,"ppas",[numbers;cases;genders;aspects;negations] -> |
349 | 351 | let numbers = expand_numbers numbers in |
350 | 352 | let cases = expand_cases cases in |
351 | 353 | let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in |
352 | 354 | let genders = expand_genders genders in |
353 | - [{empty_cats with lemma=lemma; pos="ppas"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}] | |
355 | + [{empty_cats with lemma=lemma; pos="ppas"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}] | |
354 | 356 | | lemma,"fin",[numbers;persons;aspects] -> (* FIXME: genders bez przymnogich *) |
355 | 357 | let numbers = expand_numbers numbers in |
356 | 358 | let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in |
357 | - let cats = {empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; negations=["aff"; "neg"]; moods=["indicative"]} in | |
359 | + let cats = {empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; negations=["aff"; "neg"]; moods=["indicative"]} in | |
358 | 360 | (Xlist.map aspects (function |
359 | 361 | "imperf" -> {cats with aspects=["imperf"]; tenses=["pres"]} |
360 | 362 | | "perf" -> {cats with aspects=["perf"]; tenses=["fut"]} |
361 | 363 | | _ -> failwith "clarify_categories")) @ |
362 | 364 | (if persons2 = [] then [] else |
363 | - [{empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]) | |
365 | + [{empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]) | |
364 | 366 | | lemma,"bedzie",[numbers;persons;aspects] -> |
365 | 367 | let numbers = expand_numbers numbers in |
366 | 368 | let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in |
367 | - [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] @ | |
369 | + [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] @ | |
368 | 370 | (if persons2 = [] then [] else |
369 | - [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]) | |
371 | + [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]) | |
370 | 372 | | lemma,"praet",[numbers;genders;aspects;nagl] -> |
371 | 373 | let numbers = expand_numbers numbers in |
372 | 374 | let genders = expand_genders genders in |
373 | - [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @ | |
375 | + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @ | |
374 | 376 | (if Xlist.mem aspects "imperf" then |
375 | - [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] | |
377 | + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] | |
376 | 378 | else []) |
377 | 379 | | lemma,"praet",[numbers;genders;aspects] -> |
378 | 380 | let numbers = expand_numbers numbers in |
379 | 381 | let genders = expand_genders genders in |
380 | - [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @ | |
382 | + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @ | |
381 | 383 | (if Xlist.mem aspects "imperf" then |
382 | - [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] | |
384 | + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] | |
383 | 385 | else []) |
384 | 386 | | lemma,"winien",[numbers;genders;aspects] -> |
385 | 387 | let numbers = expand_numbers numbers in |
386 | 388 | let genders = expand_genders genders in |
387 | - [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["pres"]}; | |
388 | - {empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] @ | |
389 | + [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["pres"]}; | |
390 | + {empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] @ | |
389 | 391 | (if Xlist.mem aspects "imperf" then |
390 | - [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] | |
392 | + [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] | |
391 | 393 | else []) |
392 | 394 | | lemma,"impt",[numbers;persons;aspects] -> |
393 | 395 | let numbers = expand_numbers numbers in |
394 | - [{empty_cats with lemma=lemma; pos="impt"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}] | |
396 | + [{empty_cats with lemma=lemma; pos="impt"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}] | |
395 | 397 | | lemma,"imps",[aspects] -> |
396 | - [{empty_cats with lemma=lemma; pos="imps"; pos2="verb"; cat=cat; coerced=coerced; numbers=all_numbers; genders=all_genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] | |
398 | + [{empty_cats with lemma=lemma; pos="imps"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=all_numbers; genders=all_genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] | |
397 | 399 | | lemma,"pred",[] -> (* FIXME: czy predykatyw zawsze jest niedokonany? *) |
398 | - [{empty_cats with lemma=lemma; pos="pred"; pos2="verb"; cat=cat; coerced=coerced; numbers=["sg"]; genders=[(*"n2"*)"n"]; persons=["ter"]; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["pres";"past";"fut"]}] | |
400 | + [{empty_cats with lemma=lemma; pos="pred"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=["sg"]; genders=[(*"n2"*)"n"]; persons=["ter"]; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["pres";"past";"fut"]}] | |
399 | 401 | | lemma,"aglt",[numbers;persons;aspects;wok] -> |
400 | 402 | let numbers = expand_numbers numbers in |
401 | 403 | [{empty_cats with lemma=lemma; pos="aglt"; pos2="verb"; numbers=numbers; persons=persons; aspects=aspects}] |
402 | - | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; cat=cat; coerced=coerced; aspects=aspects; negations=["aff"; "neg"]}] | |
403 | - | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="pcon"; pos2="verb"; cat=cat; coerced=coerced; aspects=aspects; negations=["aff"; "neg"]}] | |
404 | - | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="pant"; pos2="verb"; cat=cat; coerced=coerced; aspects=aspects; negations=["aff"; "neg"]}] | |
404 | + | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] | |
405 | + | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="pcon"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] | |
406 | + | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="pant"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] | |
405 | 407 | | lemma,"qub",[] -> |
406 | 408 | if StringSet.mem part_set lemma then [{empty_cats with lemma=lemma; pos="part"; pos2="qub"}] |
407 | 409 | else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"; cat=cat}] |
... | ... | @@ -429,7 +431,7 @@ let clarify_categories proper cat coerced = function |
429 | 431 | | _ -> [] *) |
430 | 432 | |
431 | 433 | let selector_names = StringSet.of_list [ |
432 | - "lemma";"pos";"pos2";"cat";"coerced";"number";"case";"gender";"person";"grad"; | |
434 | + "lemma";"pos";"pos2";"cat";"coerced";"role";"node";"number";"case";"gender";"person";"grad"; | |
433 | 435 | "praep";"acm";"aspect";"negation";"mood";"tense";"nsyn";"nsem";"ctype";"mode";"psem"; |
434 | 436 | "icat";"inumber";"igender";"iperson";"nperson";"ncat";"plemma"; |
435 | 437 | "unumber";"ucase";"ugender";"uperson";"amode"] |
... | ... | @@ -442,6 +444,8 @@ let string_of_selector = function |
442 | 444 | | Pos2 -> "pos2" |
443 | 445 | | Cat -> "cat" |
444 | 446 | | Coerced -> "coerced" |
447 | + | Role -> "role" | |
448 | + | SNode -> "node" | |
445 | 449 | | Number -> "number" |
446 | 450 | | Case -> "case" |
447 | 451 | | Gender -> "gender" |
... | ... | @@ -483,6 +487,8 @@ let selector_of_string = function |
483 | 487 | | "pos2" -> Pos2 |
484 | 488 | | "cat" -> Cat |
485 | 489 | | "coerced" -> Coerced |
490 | + | "role" -> Role | |
491 | + | "node" -> SNode | |
486 | 492 | | "number" -> Number |
487 | 493 | | "case" -> Case |
488 | 494 | | "gender" -> Gender |
... | ... | @@ -519,6 +525,8 @@ let match_selector cats = function |
519 | 525 | | Pos -> [cats.pos] |
520 | 526 | | Cat -> [cats.cat] |
521 | 527 | | Coerced -> cats.coerced |
528 | + | Role -> cats.roles | |
529 | + | SNode -> cats.snode | |
522 | 530 | | Number -> cats.numbers |
523 | 531 | | Case -> cats.cases |
524 | 532 | | Gender -> cats.genders |
... | ... | @@ -556,6 +564,8 @@ let set_selector cats vals = function |
556 | 564 | | Pos -> (match vals with [v] -> {cats with pos=v} | _ -> failwith "set_selector: Pos") |
557 | 565 | | Cat -> (match vals with [v] -> {cats with cat=v} | _ -> failwith "set_selector: Cat") |
558 | 566 | | Coerced -> {cats with coerced=vals} |
567 | + | Role -> {cats with roles=vals} | |
568 | + | SNode -> {cats with snode=vals} | |
559 | 569 | | c -> failwith ("set_selector: " ^ string_of_selector c) |
560 | 570 | |
561 | 571 | let rec apply_selectors cats = function |
... | ... | @@ -570,75 +580,75 @@ let rec apply_selectors cats = function |
570 | 580 | apply_selectors (set_selector cats (StringSet.to_list vals) sel) l |
571 | 581 | |
572 | 582 | let pos_categories = Xlist.fold [ |
573 | - "subst",[Lemma;Cat;Coerced;Number;Case;Gender;Person;Nsyn;Nsem;]; | |
574 | - "depr",[Lemma;Cat;Coerced;Number;Case;Gender;Person;Nsyn;Nsem;]; | |
575 | - "ppron12",[Lemma;Number;Case;Gender;Person;]; | |
576 | - "ppron3",[Lemma;Number;Case;Gender;Person;Praep;]; | |
577 | - "siebie",[Lemma;Number;Case;Gender;Person;]; | |
578 | - "prep",[Lemma;Cat;Coerced;Psem;Case;]; | |
579 | - "compar",[Lemma;Cat;Coerced;Case;]; | |
580 | - "num",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
581 | - "numcomp",[Lemma]; | |
582 | - "intnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
583 | - "realnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
584 | - "intnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
585 | - "realnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
586 | - "symbol",[Lemma;Number;Case;Gender;Person;]; | |
587 | - "ordnum",[Lemma;Number;Case;Gender;Grad;]; | |
588 | - "date",[Lemma;Nsyn;Nsem;]; | |
589 | - "date-interval",[Lemma;Nsyn;Nsem;]; | |
590 | - "hour-minute",[Lemma;Nsyn;Nsem;]; | |
591 | - "hour",[Lemma;Nsyn;Nsem;]; | |
592 | - "hour-minute-interval",[Lemma;Nsyn;Nsem;]; | |
593 | - "hour-interval",[Lemma;Nsyn;Nsem;]; | |
594 | - "year",[Lemma;Nsyn;Nsem;]; | |
595 | - "year-interval",[Lemma;Nsyn;Nsem;]; | |
596 | - "day",[Lemma;Nsyn;Nsem;]; | |
597 | - "day-interval",[Lemma;Nsyn;Nsem;]; | |
598 | - "day-month",[Lemma;Nsyn;Nsem;]; | |
599 | - "day-month-interval",[Lemma;Nsyn;Nsem;]; | |
600 | - "month-interval",[Lemma;Nsyn;Nsem;]; | |
601 | - "roman-ordnum",[Lemma;Number;Case;Gender;Grad;]; | |
602 | - "roman",[Lemma;Nsyn;Nsem;]; | |
603 | - "roman-interval",[Lemma;Nsyn;Nsem;]; | |
604 | - "match-result",[Lemma;Nsyn;Nsem;]; | |
605 | - "url",[Lemma;Nsyn;Nsem;]; | |
606 | - "email",[Lemma;Nsyn;Nsem;]; | |
607 | - "phone-number",[Lemma;Nsyn;Nsem;]; | |
608 | - "postal-code",[Lemma;Nsyn;Nsem;]; | |
609 | - "obj-id",[Lemma;Nsyn;Nsem;]; | |
610 | - "building-number",[Lemma;Nsyn;Nsem;]; | |
611 | - "fixed",[Lemma;]; | |
612 | - "adj",[Lemma;Cat;Coerced;Number;Case;Gender;Grad;]; | |
613 | - "adjc",[Lemma;Cat;Coerced;Number;Case;Gender;Grad;]; | |
614 | - "adjp",[Lemma;Cat;Coerced;Number;Case;Gender;Grad;]; | |
615 | - "apron",[Lemma;Number;Case;Gender;Grad;]; | |
616 | - "adja",[Lemma;Cat;Coerced;]; | |
617 | - "adv",[Lemma;Cat;Coerced;Grad;Mode];(* ctype *) | |
618 | - "ger",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Case;Gender;Person;Aspect;Negation;]; | |
619 | - "pact",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Case;Gender;Aspect;Negation;]; | |
620 | - "ppas",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Case;Gender;Aspect;Negation;]; | |
621 | - "fin",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
622 | - "bedzie",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
623 | - "praet",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
624 | - "winien",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
625 | - "impt",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
626 | - "imps",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
627 | - "pred",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
628 | - "aglt",[Lemma;Number;Person;Aspect;]; | |
629 | - "inf",[Lemma;(*NewLemma;*)Cat;Coerced;Aspect;Negation;]; | |
630 | - "pcon",[Lemma;(*NewLemma;*)Cat;Coerced;Aspect;Negation;]; | |
631 | - "pant",[Lemma;(*NewLemma;*)Cat;Coerced;Aspect;Negation;]; | |
632 | - "qub",[Lemma;Cat;]; | |
633 | - "part",[Lemma;]; | |
634 | - "comp",[Lemma;];(* ctype *) | |
635 | - "conj",[Lemma;];(* ctype *) | |
636 | - "interj",[Lemma;Cat;Coerced;]; | |
637 | - "sinterj",[Lemma;]; | |
638 | - "burk",[Lemma;]; | |
639 | - "interp",[Lemma;]; | |
640 | - "unk",[Lemma;Number;Case;Gender;Person;]; | |
641 | - "xxx",[Lemma;Number;Case;Gender;Person;]; | |
642 | - "html-tag",[Lemma;]; | |
643 | - "list-item",[Lemma;]; | |
583 | + "subst",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Person;Nsyn;Nsem;]; | |
584 | + "depr",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Person;Nsyn;Nsem;]; | |
585 | + "ppron12",[Lemma;SNode;Number;Case;Gender;Person;]; | |
586 | + "ppron3",[Lemma;SNode;Number;Case;Gender;Person;Praep;]; | |
587 | + "siebie",[Lemma;SNode;Number;Case;Gender;Person;]; | |
588 | + "prep",[Lemma;Cat;Coerced;Role;SNode;Psem;Case;]; | |
589 | + "compar",[Lemma;Cat;Coerced;Role;SNode;Case;]; | |
590 | + "num",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;]; | |
591 | + "numcomp",[Lemma;SNode]; | |
592 | + "intnum",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;]; | |
593 | + "realnum",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;]; | |
594 | + "intnum-interval",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;]; | |
595 | + "realnum-interval",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;]; | |
596 | + "symbol",[Lemma;SNode;Number;Case;Gender;Person;]; | |
597 | + "ordnum",[Lemma;SNode;Number;Case;Gender;Grad;]; | |
598 | + "date",[Lemma;SNode;Nsyn;Nsem;]; | |
599 | + "date-interval",[Lemma;SNode;Nsyn;Nsem;]; | |
600 | + "hour-minute",[Lemma;SNode;Nsyn;Nsem;]; | |
601 | + "hour",[Lemma;SNode;Nsyn;Nsem;]; | |
602 | + "hour-minute-interval",[Lemma;SNode;Nsyn;Nsem;]; | |
603 | + "hour-interval",[Lemma;SNode;Nsyn;Nsem;]; | |
604 | + "year",[Lemma;SNode;Nsyn;Nsem;]; | |
605 | + "year-interval",[Lemma;SNode;Nsyn;Nsem;]; | |
606 | + "day",[Lemma;SNode;Nsyn;Nsem;]; | |
607 | + "day-interval",[Lemma;SNode;Nsyn;Nsem;]; | |
608 | + "day-month",[Lemma;SNode;Nsyn;Nsem;]; | |
609 | + "day-month-interval",[Lemma;SNode;Nsyn;Nsem;]; | |
610 | + "month-interval",[Lemma;SNode;Nsyn;Nsem;]; | |
611 | + "roman-ordnum",[Lemma;SNode;Number;Case;Gender;Grad;]; | |
612 | + "roman",[Lemma;SNode;Nsyn;Nsem;]; | |
613 | + "roman-interval",[Lemma;SNode;Nsyn;Nsem;]; | |
614 | + "match-result",[Lemma;SNode;Nsyn;Nsem;]; | |
615 | + "url",[Lemma;SNode;Nsyn;Nsem;]; | |
616 | + "email",[Lemma;SNode;Nsyn;Nsem;]; | |
617 | + "phone-number",[Lemma;SNode;Nsyn;Nsem;]; | |
618 | + "postal-code",[Lemma;SNode;Nsyn;Nsem;]; | |
619 | + "obj-id",[Lemma;SNode;Nsyn;Nsem;]; | |
620 | + "building-number",[Lemma;SNode;Nsyn;Nsem;]; | |
621 | + "fixed",[Lemma;SNode;]; | |
622 | + "adj",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Grad;]; | |
623 | + "adjc",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Grad;]; | |
624 | + "adjp",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Grad;]; | |
625 | + "apron",[Lemma;SNode;Number;Case;Gender;Grad;]; | |
626 | + "adja",[Lemma;Cat;Coerced;Role;SNode;]; | |
627 | + "adv",[Lemma;Cat;Coerced;Role;SNode;Grad;Mode];(* ctype *) | |
628 | + "ger",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Case;Gender;Person;Aspect;Negation;]; | |
629 | + "pact",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Case;Gender;Aspect;Negation;]; | |
630 | + "ppas",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Case;Gender;Aspect;Negation;]; | |
631 | + "fin",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
632 | + "bedzie",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
633 | + "praet",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
634 | + "winien",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
635 | + "impt",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
636 | + "imps",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
637 | + "pred",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
638 | + "aglt",[Lemma;SNode;Number;Person;Aspect;]; | |
639 | + "inf",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; | |
640 | + "pcon",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; | |
641 | + "pant",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; | |
642 | + "qub",[Lemma;Cat;SNode;]; | |
643 | + "part",[Lemma;SNode]; | |
644 | + "comp",[Lemma;SNode;];(* ctype *) | |
645 | + "conj",[Lemma;SNode;];(* ctype *) | |
646 | + "interj",[Lemma;Cat;Coerced;Role;SNode;]; | |
647 | + "sinterj",[Lemma;SNode;]; | |
648 | + "burk",[Lemma;SNode;]; | |
649 | + "interp",[Lemma;SNode;]; | |
650 | + "unk",[Lemma;SNode;Number;Case;Gender;Person;]; | |
651 | + "xxx",[Lemma;SNode;Number;Case;Gender;Person;]; | |
652 | + "html-tag",[Lemma;SNode;]; | |
653 | + "list-item",[Lemma;SNode;]; | |
644 | 654 | ] StringMap.empty (fun map (k,l) -> StringMap.add map k l) |
... | ... |
LCGlexicon/ENIAMcategoriesPL_old.ml
0 → 100644
1 | +(* | |
2 | + * ENIAM_LCGlexicon is a library that provides LCG lexicon form Polish | |
3 | + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | + * | |
6 | + * This library is free software: you can redistribute it and/or modify | |
7 | + * it under the terms of the GNU Lesser General Public License as published by | |
8 | + * the Free Software Foundation, either version 3 of the License, or | |
9 | + * (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + * GNU Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public License | |
17 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | + *) | |
19 | + | |
20 | +open ENIAM_LCGlexiconTypes | |
21 | +open Xstd | |
22 | + | |
23 | +let all_numbers = ["sg";"pl"] | |
24 | +let all_cases = ["nom";"gen";"dat";"acc";"inst";"loc";"voc"] | |
25 | +(* let all_genders = ["m1";"m2";"m3";"f";"n1";"n2";"p1";"p2";"p3"] *) | |
26 | +let all_genders = ["m1";"m2";"m3";"f";"n"] | |
27 | +let all_persons = ["pri";"sec";"ter"] | |
28 | +(* FIXME: zamiast wszystkich możliwych wartości można używać Zero gdy nie ma uzgodnienia *) | |
29 | + | |
30 | +let selector_values = Xlist.fold [ | |
31 | + Lemma, []; | |
32 | + Pos, ["subst";"depr";"ppron12";"ppron3";"siebie";"prep";"fixed";"num";"numcomp";"intnum"; | |
33 | + "realnum";"intnum-interval";"realnum-interval";"symbol";"ordnum"; | |
34 | + "date";"date-interval";"hour-minute";"hour";"hour-minute-interval"; | |
35 | + "hour-interval";"year";"year-interval";"day";"day-interval";"day-month"; | |
36 | + "day-month-interval";"month-interval";"roman";"roman-interval";"roman-ordnum"; | |
37 | + "match-result";"url";"email";"phone-number";"postal-code";"obj-id";"building-number";"list-item";"adj";"adjc";"adjp";"adja"; | |
38 | + "adv";"ger";"pact";"ppas";"fin";"bedzie";"praet";"winien";"impt"; | |
39 | + "imps";"pred";"aglt";"inf";"pcon";"pant";"qub";"part";"comp";"conj";"interj"; | |
40 | + "sinterj";"burk";"interp";"xxx";"unk";"html-tag";"apron";"compar"]; | |
41 | + Pos2, []; | |
42 | + Cat, []; | |
43 | + Coerced, []; | |
44 | + Number, all_numbers; | |
45 | + Case, "postp" :: "pred" :: all_cases; | |
46 | + Gender, all_genders; | |
47 | + Person, all_persons; | |
48 | + Grad, ["pos";"com";"sup"]; | |
49 | + Praep, ["praep";"npraep";"praep-npraep"]; | |
50 | + Acm, ["congr";"rec"]; | |
51 | + Ctype, ["int";"rel";"sub";"coord"]; | |
52 | + Mode, ["abl";"adl";"locat";"perl";"dur";"temp";"mod"]; | |
53 | + Aspect, ["perf";"imperf"]; | |
54 | + Negation, ["neg";"aff"]; | |
55 | + Mood, ["indicative";"imperative";"conditional"]; | |
56 | + Tense, ["past";"pres";"fut"]; | |
57 | + Nsyn, ["proper";"pronoun";"common"]; | |
58 | + Nsem, ["count";"time";"mass";"measure"]; | |
59 | + Psem, ["sem";"nosem"]; | |
60 | + Ucase, all_cases; | |
61 | +] SelectorMap.empty (fun map (selector,vals) -> SelectorMap.add map selector vals) | |
62 | + | |
63 | + | |
64 | +let expand_numbers numbers = | |
65 | + if Xlist.mem numbers "_" then all_numbers else numbers | |
66 | + | |
67 | +let expand_genders genders = | |
68 | + if Xlist.mem genders "_" then all_genders else genders | |
69 | + | |
70 | +let expand_cases cases = | |
71 | + if Xlist.mem cases "_" || Xlist.mem cases "$C" then all_cases else cases | |
72 | + | |
73 | +let expand_akcs akcs = | |
74 | + if Xlist.mem akcs "_" then ["akc";"nakc"] else akcs | |
75 | + | |
76 | +let split_voc cases = | |
77 | + Xlist.fold cases ([],[]) (fun (cases,voc) -> function | |
78 | + "voc" -> cases, "voc" :: voc | |
79 | + | s -> s :: cases, voc) | |
80 | + | |
81 | +let load_subst_data filename _ = | |
82 | + StringSet.of_list (File.load_lines filename) | |
83 | + | |
84 | +let subst_uncountable_lexemes = ref StringSet.empty | |
85 | +let subst_uncountable_lexemes2 = ref StringSet.empty | |
86 | +let subst_container_lexemes = ref StringSet.empty | |
87 | +let subst_numeral_lexemes = ref StringSet.empty | |
88 | +let subst_time_lexemes = ref StringSet.empty | |
89 | + | |
90 | +let subst_pronoun_lexemes = StringSet.of_list ["co"; "kto"; "cokolwiek"; "ktokolwiek"; "nic"; "nikt"; "coś"; "ktoś"; "to"] | |
91 | +let adj_pronoun_lexemes = StringSet.of_list ["czyj"; "jaki"; "który"; "jakiś"; "ten"; "taki"] | |
92 | +let compar_lexemes = StringSet.of_list ["jak"; "jako"; "niż"; "niczym"; "niby"; "co"; "zamiast"] | |
93 | + | |
94 | +(* let adj_quant_lexemes = StringSet.of_list ["każdy"; "wszelki"; "wszystek"; "żaden"; "jakiś"; "pewien"; "niektóry"; "jedyny"; "sam"] *) | |
95 | + | |
96 | +let load_adv_modes filename adv_modes = | |
97 | + File.fold_tab filename adv_modes (fun adv_modes -> function | |
98 | + [adv;mode] -> StringMap.add_inc adv_modes adv [mode] (fun l -> mode :: l) | |
99 | + | _ -> failwith "load_adv_modes") | |
100 | + | |
101 | +let load_num_nsems filename num_nsems = | |
102 | + File.fold_tab filename num_nsems (fun num_nsems -> function | |
103 | + lemma :: _ :: nsems :: _ -> | |
104 | + Xlist.fold (Xstring.split "," nsems) num_nsems (fun num_nsems nsem -> | |
105 | + StringMap.add_inc num_nsems lemma [nsem] (fun l -> nsem :: l)) | |
106 | + | _ -> failwith "load_num_nsems") | |
107 | + | |
108 | +let adv_modes = ref (StringMap.empty : string list StringMap.t) | |
109 | +let num_nsems = ref (StringMap.empty : string list StringMap.t) | |
110 | + | |
111 | +let initialize () = | |
112 | + subst_uncountable_lexemes := File.catch_no_file (load_subst_data subst_uncountable_lexemes_filename) StringSet.empty; | |
113 | + subst_uncountable_lexemes2 := File.catch_no_file (load_subst_data subst_uncountable_lexemes_filename2) StringSet.empty; | |
114 | + subst_container_lexemes := File.catch_no_file (load_subst_data subst_container_lexemes_filename) StringSet.empty; | |
115 | + subst_numeral_lexemes := File.catch_no_file (load_subst_data subst_numeral_lexemes_filename) StringSet.empty; | |
116 | + subst_time_lexemes := File.catch_no_file (load_subst_data subst_time_lexemes_filename) StringSet.empty; | |
117 | + adv_modes := File.catch_no_file (load_adv_modes adv_modes_filename) StringMap.empty; | |
118 | + num_nsems := File.catch_no_file (load_num_nsems num_nsems_filename) StringMap.empty; | |
119 | + () | |
120 | + | |
121 | +let noun_type proper lemma pos = | |
122 | + let nsyn = | |
123 | + if proper then "proper" else | |
124 | + if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then "pronoun" else | |
125 | + if pos = "symbol" || pos = "date" || pos = "date-interval" || pos = "hour" || pos = "hour-minute" || pos = "hour-interval" || pos = "hour-minute-interval" || | |
126 | + pos = "year" || pos = "year-interval" || pos = "day" || pos = "day-interval" || pos = "day-month" || pos = "day-month-interval" || | |
127 | + pos = "match-result" || pos = "month-interval" || pos = "roman" || pos = "roman-interval" || pos = "url" || pos = "email" || pos = "phone-number" || pos = "postal-code" || pos = "obj-id" || pos = "building-number" || pos = "date" then "proper" else | |
128 | + if StringSet.mem subst_pronoun_lexemes lemma then "pronoun" else | |
129 | + "common" in | |
130 | + let nsem = | |
131 | + if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then ["count"] else | |
132 | + if StringSet.mem !subst_time_lexemes lemma then ["time"] else | |
133 | + let l = ["count"] in | |
134 | + let l = if StringSet.mem !subst_uncountable_lexemes lemma || StringSet.mem !subst_uncountable_lexemes2 lemma then "mass" :: l else l in | |
135 | + if StringSet.mem !subst_container_lexemes lemma then "measure" :: l else l in | |
136 | + [nsyn],nsem | |
137 | + | |
138 | +let adv_mode lemma = | |
139 | + try | |
140 | + StringMap.find !adv_modes lemma | |
141 | + with Not_found -> ["mod"] | |
142 | + | |
143 | +let num_nsem lemma = | |
144 | + try | |
145 | + StringMap.find !num_nsems lemma | |
146 | + with Not_found -> (*try | |
147 | + StringMap.find !num_nsems (String.lowercase lemma) | |
148 | + with Not_found ->*) failwith ("num_nsem: " ^ lemma) | |
149 | + | |
150 | + | |
151 | +let part_set = StringSet.of_list ["się"; "nie"; "by"; "niech"; "niechaj"; "niechże"; "niechajże"; "czy"; "gdyby"] | |
152 | + | |
153 | +let clarify_categories proper cat coerced = function | |
154 | + lemma,"subst",[numbers;cases;genders] -> | |
155 | + let numbers = expand_numbers numbers in | |
156 | + let cases = expand_cases cases in | |
157 | + let genders = expand_genders genders in | |
158 | + let cases,voc = split_voc cases in | |
159 | + let nsyn,nsem = noun_type proper lemma "subst" in | |
160 | + (if cases = [] then [] else | |
161 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ | |
162 | + (if voc = [] then [] else | |
163 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | |
164 | + | lemma,"subst",[numbers;cases;genders;_] -> | |
165 | + let numbers = expand_numbers numbers in | |
166 | + let cases = expand_cases cases in | |
167 | + let genders = expand_genders genders in | |
168 | + let cases,voc = split_voc cases in | |
169 | + let nsyn,nsem = noun_type proper lemma "subst" in | |
170 | + (if cases = [] then [] else | |
171 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ | |
172 | + (if voc = [] then [] else | |
173 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | |
174 | + | lemma,"depr",[numbers;cases;genders] -> | |
175 | + let numbers = expand_numbers numbers in | |
176 | + let cases = expand_cases cases in | |
177 | + let genders = expand_genders genders in | |
178 | + let cases,voc = split_voc cases in | |
179 | + let nsyn,nsem = noun_type proper lemma "depr" in | |
180 | + (if cases = [] then [] else | |
181 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ | |
182 | + (if voc = [] then [] else | |
183 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | |
184 | + | lemma,"ppron12",[numbers;cases;genders;persons] -> | |
185 | + let numbers = expand_numbers numbers in | |
186 | + let cases = expand_cases cases in | |
187 | + let genders = expand_genders genders in | |
188 | + [{empty_cats with lemma=lemma; pos="ppron12"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons}] | |
189 | + | lemma,"ppron12",[numbers;cases;genders;persons;akcs] -> | |
190 | + let numbers = expand_numbers numbers in | |
191 | + let cases = expand_cases cases in | |
192 | + let genders = expand_genders genders in | |
193 | + [{empty_cats with lemma=lemma; pos="ppron12"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons}] | |
194 | + | lemma,"ppron3",[numbers;cases;genders;persons] -> | |
195 | + let numbers = expand_numbers numbers in | |
196 | + let cases = expand_cases cases in | |
197 | + let genders = expand_genders genders in | |
198 | + [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=["praep-npraep"]}] | |
199 | + | lemma,"ppron3",[numbers;cases;genders;persons;akcs] -> | |
200 | + let numbers = expand_numbers numbers in | |
201 | + let cases = expand_cases cases in | |
202 | + let genders = expand_genders genders in | |
203 | + [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=["praep-npraep"]}] | |
204 | + | lemma,"ppron3",[numbers;cases;genders;persons;akcs;praep] -> | |
205 | + let numbers = expand_numbers numbers in | |
206 | + let cases = expand_cases cases in | |
207 | + let genders = expand_genders genders in | |
208 | + let praep = match praep with | |
209 | + ["praep";"npraep"] -> ["praep-npraep"] | |
210 | + | ["npraep";"praep"] -> ["praep-npraep"] | |
211 | + | _ -> praep in | |
212 | + [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=praep}] | |
213 | + | lemma,"siebie",[cases] -> (* FIXME: czy tu określać numbers genders persons? *) | |
214 | + let cases = expand_cases cases in | |
215 | + [{empty_cats with lemma=lemma; pos="siebie"; pos2="pron"; numbers=all_numbers; cases=cases; genders=all_genders; persons=["ter"]}] | |
216 | + | lemma,"prep",[cases;woks] -> | |
217 | + if StringSet.mem compar_lexemes lemma then | |
218 | + [{empty_cats with lemma=lemma; pos="compar"; pos2="prep"}] else | |
219 | + let cases = expand_cases cases in | |
220 | + [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; cases=cases; psem=["sem";"nosem"]}] | |
221 | + | lemma,"prep",[cases] -> | |
222 | + if StringSet.mem compar_lexemes lemma then | |
223 | + [{empty_cats with lemma=lemma; pos="compar"; pos2="prep"}] else | |
224 | + let cases = expand_cases cases in | |
225 | + [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; cases=cases; psem=["sem";"nosem"]}] | |
226 | + | lemma,"num",[numbers;cases;genders;acms] -> | |
227 | + let numbers = expand_numbers numbers in | |
228 | + let cases = expand_cases cases in | |
229 | + let genders = expand_genders genders in | |
230 | + let nsem = num_nsem lemma in | |
231 | + [{empty_cats with lemma=lemma; pos="num"; pos2="num"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms; nsem=nsem}] | |
232 | + | lemma,"num",[numbers;cases;genders;acms;_] -> | |
233 | + let numbers = expand_numbers numbers in | |
234 | + let cases = expand_cases cases in | |
235 | + let genders = expand_genders genders in | |
236 | + let nsem = num_nsem lemma in | |
237 | + [{empty_cats with lemma=lemma; pos="num"; pos2="num"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms; nsem=nsem}] | |
238 | + | lemma,"numcomp",[] -> [{empty_cats with lemma=lemma; pos="numcomp"; pos2="numcomp"}] | |
239 | + | lemma,"intnum",[] -> | |
240 | + let numbers,acms = | |
241 | + if lemma = "1" || lemma = "-1" then ["sg"],["congr"] else | |
242 | + let s = String.get lemma (String.length lemma - 1) in | |
243 | + ["pl"],if s = '2' || s = '3' || s = '4' then ["rec";"congr"] else ["rec"] in | |
244 | + [{empty_cats with lemma=lemma; pos="intnum"; pos2="num"; numbers=numbers; cases=all_cases; genders=all_genders; persons=["ter"]; acms=acms; nsem=["count"]}] | |
245 | + | lemma,"realnum",[] -> | |
246 | + [{empty_cats with lemma=lemma; pos="realnum"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}] | |
247 | + | lemma,"intnum-interval",[] -> | |
248 | + [{empty_cats with lemma=lemma; pos="intnum-interval"; pos2="num"; numbers=["pl"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec";"congr"]; nsem=["count"]}] | |
249 | + | lemma,"realnum-interval",[] -> | |
250 | + [{empty_cats with lemma=lemma; pos="realnum-interval"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}] | |
251 | + | lemma,"symbol",[] -> | |
252 | + [{empty_cats with lemma=lemma; pos="symbol"; pos2="noun"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]}] | |
253 | + | lemma,"ordnum",[] -> | |
254 | + [{empty_cats with lemma=lemma; pos="ordnum"; pos2="adj"; numbers=all_numbers; cases=all_cases; genders=all_genders; grads=["pos"]}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *) | |
255 | + | lemma,"date",[] -> | |
256 | + let nsyn,nsem = noun_type proper lemma "date" in | |
257 | + [{empty_cats with lemma=lemma; pos="date"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
258 | + | lemma,"date-interval",[] -> | |
259 | + let nsyn,nsem = noun_type proper lemma "date-interval" in | |
260 | + [{empty_cats with lemma=lemma; pos="date-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
261 | + | lemma,"hour-minute",[] -> | |
262 | + let nsyn,nsem = noun_type proper lemma "hour-minute" in | |
263 | + [{empty_cats with lemma=lemma; pos="hour-minute"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
264 | + | lemma,"hour",[] -> | |
265 | + let nsyn,nsem = noun_type proper lemma "hour" in | |
266 | + [{empty_cats with lemma=lemma; pos="hour"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
267 | + | lemma,"hour-minute-interval",[] -> | |
268 | + let nsyn,nsem = noun_type proper lemma "hour-minute-interval" in | |
269 | + [{empty_cats with lemma=lemma; pos="hour-minute-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
270 | + | lemma,"hour-interval",[] -> | |
271 | + let nsyn,nsem = noun_type proper lemma "hour-interval" in | |
272 | + [{empty_cats with lemma=lemma; pos="hour-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
273 | + | lemma,"year",[] -> | |
274 | + let nsyn,nsem = noun_type proper lemma "year" in | |
275 | + [{empty_cats with lemma=lemma; pos="year"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
276 | + | lemma,"year-interval",[] -> | |
277 | + let nsyn,nsem = noun_type proper lemma "year-interval" in | |
278 | + [{empty_cats with lemma=lemma; pos="year-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
279 | + | lemma,"day",[] -> | |
280 | + let nsyn,nsem = noun_type proper lemma "day" in | |
281 | + [{empty_cats with lemma=lemma; pos="day"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
282 | + | lemma,"day-interval",[] -> | |
283 | + let nsyn,nsem = noun_type proper lemma "day-interval" in | |
284 | + [{empty_cats with lemma=lemma; pos="day-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
285 | + | lemma,"day-month",[] -> | |
286 | + let nsyn,nsem = noun_type proper lemma "day-month" in | |
287 | + [{empty_cats with lemma=lemma; pos="day-month"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
288 | + | lemma,"day-month-interval",[] -> | |
289 | + let nsyn,nsem = noun_type proper lemma "day-month-interval" in | |
290 | + [{empty_cats with lemma=lemma; pos="day-month-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
291 | + | lemma,"month-interval",[] -> | |
292 | + let nsyn,nsem = noun_type proper lemma "month-interval" in | |
293 | + [{empty_cats with lemma=lemma; pos="month-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
294 | + | lemma,"roman",[] -> | |
295 | + let nsyn,nsem = noun_type proper lemma "roman" in | |
296 | + [{empty_cats with lemma=lemma; pos="roman-ordnum"; pos2="adj"; numbers=all_numbers; cases=all_cases; genders=all_genders; grads=["pos"]}; | |
297 | + {empty_cats with lemma=lemma; pos="roman"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
298 | + | lemma,"roman-interval",[] -> | |
299 | + let nsyn,nsem = noun_type proper lemma "roman-interval" in | |
300 | + [{empty_cats with lemma=lemma; pos="roman-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
301 | + | lemma,"match-result",[] -> | |
302 | + let nsyn,nsem = noun_type proper lemma "match-result" in | |
303 | + [{empty_cats with lemma=lemma; pos="match-result"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
304 | + | lemma,"url",[] -> | |
305 | + let nsyn,nsem = noun_type proper lemma "url" in | |
306 | + [{empty_cats with lemma=lemma; pos="url"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
307 | + | lemma,"email",[] -> | |
308 | + let nsyn,nsem = noun_type proper lemma "email" in | |
309 | + [{empty_cats with lemma=lemma; pos="email"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
310 | + | lemma,"phone-number",[] -> | |
311 | + let nsyn,nsem = noun_type proper lemma "phone-number" in | |
312 | + [{empty_cats with lemma=lemma; pos="phone-number"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
313 | + | lemma,"postal-code",[] -> | |
314 | + let nsyn,nsem = noun_type proper lemma "postal-code" in | |
315 | + [{empty_cats with lemma=lemma; pos="postal-code"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
316 | + | lemma,"obj-id",[] -> | |
317 | + let nsyn,nsem = noun_type proper lemma "obj-id" in | |
318 | + [{empty_cats with lemma=lemma; pos="obj-id"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
319 | + | lemma,"building-number",[] -> | |
320 | + let nsyn,nsem = noun_type proper lemma "building-number" in | |
321 | + [{empty_cats with lemma=lemma; pos="building-number"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
322 | + | lemma,"fixed",[] -> [{empty_cats with lemma=lemma; pos="fixed"; pos2="fixed"}] | |
323 | + | lemma,"adj",[numbers;cases;genders;grads] -> (* FIXME: adjsyn *) | |
324 | + let numbers = expand_numbers numbers in | |
325 | + let cases = expand_cases cases in | |
326 | + let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in | |
327 | + let genders = expand_genders genders in | |
328 | + let pos,pos2 = if StringSet.mem adj_pronoun_lexemes lemma then "apron","pron" else "adj","adj" in | |
329 | + [{empty_cats with lemma=lemma; pos=pos; pos2=pos2; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; grads=grads}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *) | |
330 | + | lemma,"adjc",[] -> | |
331 | + [{empty_cats with lemma=lemma; pos="adjc"; pos2="adj"; cat=cat; coerced=coerced; numbers=["sg"]; cases=["pred"]; genders=["m1";"m2";"m3"]; grads=["pos"]}] | |
332 | + | lemma,"adjp",[] -> | |
333 | + [{empty_cats with lemma=lemma; pos="adjp"; pos2="adj"; cat=cat; coerced=coerced; numbers=all_numbers; cases=["postp"]; genders=all_genders; grads=["pos"]}] | |
334 | + | lemma,"adja",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; pos="adja"; pos2="adja"}] | |
335 | + | lemma,"adv",[grads] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; pos="adv"; pos2="adv"; grads=grads; modes=adv_mode lemma}] | |
336 | + | lemma,"adv",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; pos="adv"; pos2="adv"; grads=["pos"]; modes=adv_mode lemma}] | |
337 | + | lemma,"ger",[numbers;cases;genders;aspects;negations] -> | |
338 | + let numbers = expand_numbers numbers in | |
339 | + let cases = expand_cases cases in | |
340 | + let genders = expand_genders genders in | |
341 | + [{empty_cats with lemma=lemma; pos="ger"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; aspects=aspects; negations=negations}] (* FIXME: kwestia osoby przy voc *) | |
342 | + | lemma,"pact",[numbers;cases;genders;aspects;negations] -> | |
343 | + let numbers = expand_numbers numbers in | |
344 | + let cases = expand_cases cases in | |
345 | + let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in | |
346 | + let genders = expand_genders genders in | |
347 | + [{empty_cats with lemma=lemma; pos="pact"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}] | |
348 | + | lemma,"ppas",[numbers;cases;genders;aspects;negations] -> | |
349 | + let numbers = expand_numbers numbers in | |
350 | + let cases = expand_cases cases in | |
351 | + let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in | |
352 | + let genders = expand_genders genders in | |
353 | + [{empty_cats with lemma=lemma; pos="ppas"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}] | |
354 | + | lemma,"fin",[numbers;persons;aspects] -> (* FIXME: genders bez przymnogich *) | |
355 | + let numbers = expand_numbers numbers in | |
356 | + let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in | |
357 | + let cats = {empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; negations=["aff"; "neg"]; moods=["indicative"]} in | |
358 | + (Xlist.map aspects (function | |
359 | + "imperf" -> {cats with aspects=["imperf"]; tenses=["pres"]} | |
360 | + | "perf" -> {cats with aspects=["perf"]; tenses=["fut"]} | |
361 | + | _ -> failwith "clarify_categories")) @ | |
362 | + (if persons2 = [] then [] else | |
363 | + [{empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]) | |
364 | + | lemma,"bedzie",[numbers;persons;aspects] -> | |
365 | + let numbers = expand_numbers numbers in | |
366 | + let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in | |
367 | + [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] @ | |
368 | + (if persons2 = [] then [] else | |
369 | + [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]) | |
370 | + | lemma,"praet",[numbers;genders;aspects;nagl] -> | |
371 | + let numbers = expand_numbers numbers in | |
372 | + let genders = expand_genders genders in | |
373 | + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @ | |
374 | + (if Xlist.mem aspects "imperf" then | |
375 | + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] | |
376 | + else []) | |
377 | + | lemma,"praet",[numbers;genders;aspects] -> | |
378 | + let numbers = expand_numbers numbers in | |
379 | + let genders = expand_genders genders in | |
380 | + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @ | |
381 | + (if Xlist.mem aspects "imperf" then | |
382 | + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] | |
383 | + else []) | |
384 | + | lemma,"winien",[numbers;genders;aspects] -> | |
385 | + let numbers = expand_numbers numbers in | |
386 | + let genders = expand_genders genders in | |
387 | + [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["pres"]}; | |
388 | + {empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] @ | |
389 | + (if Xlist.mem aspects "imperf" then | |
390 | + [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] | |
391 | + else []) | |
392 | + | lemma,"impt",[numbers;persons;aspects] -> | |
393 | + let numbers = expand_numbers numbers in | |
394 | + [{empty_cats with lemma=lemma; pos="impt"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}] | |
395 | + | lemma,"imps",[aspects] -> | |
396 | + [{empty_cats with lemma=lemma; pos="imps"; pos2="verb"; cat=cat; coerced=coerced; numbers=all_numbers; genders=all_genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] | |
397 | + | lemma,"pred",[] -> (* FIXME: czy predykatyw zawsze jest niedokonany? *) | |
398 | + [{empty_cats with lemma=lemma; pos="pred"; pos2="verb"; cat=cat; coerced=coerced; numbers=["sg"]; genders=[(*"n2"*)"n"]; persons=["ter"]; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["pres";"past";"fut"]}] | |
399 | + | lemma,"aglt",[numbers;persons;aspects;wok] -> | |
400 | + let numbers = expand_numbers numbers in | |
401 | + [{empty_cats with lemma=lemma; pos="aglt"; pos2="verb"; numbers=numbers; persons=persons; aspects=aspects}] | |
402 | + | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; cat=cat; coerced=coerced; aspects=aspects; negations=["aff"; "neg"]}] | |
403 | + | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="pcon"; pos2="verb"; cat=cat; coerced=coerced; aspects=aspects; negations=["aff"; "neg"]}] | |
404 | + | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="pant"; pos2="verb"; cat=cat; coerced=coerced; aspects=aspects; negations=["aff"; "neg"]}] | |
405 | + | lemma,"qub",[] -> | |
406 | + if StringSet.mem part_set lemma then [{empty_cats with lemma=lemma; pos="part"; pos2="qub"}] | |
407 | + else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"; cat=cat}] | |
408 | + | lemma,"comp",[] -> [{empty_cats with lemma=lemma; pos="comp"; pos2="comp"}] | |
409 | + | lemma,"conj",[] -> [{empty_cats with lemma=lemma; pos="conj"; pos2="conj"}] | |
410 | + | lemma,"interj",[] -> [{empty_cats with lemma=lemma; pos="interj"; pos2="interj"; cat=cat; coerced=coerced}] | |
411 | + | lemma,"sinterj",[] -> [{empty_cats with lemma=lemma; pos="sinterj"; pos2="sinterj"; (*cat=cat; coerced=coerced*)}] | |
412 | + | lemma,"burk",[] -> [{empty_cats with lemma=lemma; pos="burk"; pos2="burk"}] | |
413 | + | ",","interp",[] -> [{empty_cats with lemma=","; pos="conj"; pos2="conj"}] | |
414 | + | lemma,"interp",[] -> [{empty_cats with lemma=lemma; pos="interp"; pos2="interp"}] | |
415 | + | lemma,"unk",[] -> | |
416 | + [{empty_cats with lemma=lemma; pos="unk"; pos2="noun"; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}] | |
417 | + | lemma,"xxx",[] -> | |
418 | + [{empty_cats with lemma=lemma; pos="xxx"; pos2="noun"; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}] | |
419 | + | lemma,"html-tag",[] -> [{empty_cats with lemma=lemma; pos="html-tag"; pos2="html-tag"}] | |
420 | + | lemma,"list-item",[] -> [{empty_cats with lemma=lemma; pos="list-item"; pos2="list-item"}] | |
421 | + | lemma,c,l -> failwith ("clarify_categories: " ^ lemma ^ ":" ^ c ^ ":" ^ (String.concat ":" (Xlist.map l (String.concat ".")))) | |
422 | + | |
423 | +(* FIXME: przenieść gdzieś indziej *) | |
424 | +(* let assign token = | |
425 | + match token.ENIAMtokenizerTypes.token with | |
426 | + ENIAMtokenizerTypes.Lemma(lemma,pos,interp) -> List.flatten (Xlist.map interp (fun interp -> clarify_categories false (lemma,pos,interp))) | |
427 | + | ENIAMtokenizerTypes.Proper(lemma,pos,interp,_) -> List.flatten (Xlist.map interp (fun interp -> clarify_categories true (lemma,pos,interp))) | |
428 | + | ENIAMtokenizerTypes.Interp lemma -> clarify_categories false (lemma,"interp",[]) | |
429 | + | _ -> [] *) | |
430 | + | |
431 | +let selector_names = StringSet.of_list [ | |
432 | + "lemma";"pos";"pos2";"cat";"coerced";"number";"case";"gender";"person";"grad"; | |
433 | + "praep";"acm";"aspect";"negation";"mood";"tense";"nsyn";"nsem";"ctype";"mode";"psem"; | |
434 | + "icat";"inumber";"igender";"iperson";"nperson";"ncat";"plemma"; | |
435 | + "unumber";"ucase";"ugender";"uperson";"amode"] | |
436 | + | |
437 | + | |
438 | +let string_of_selector = function | |
439 | + Lemma -> "lemma" | |
440 | + (* | NewLemma -> "newlemma" *) | |
441 | + | Pos -> "pos" | |
442 | + | Pos2 -> "pos2" | |
443 | + | Cat -> "cat" | |
444 | + | Coerced -> "coerced" | |
445 | + | Number -> "number" | |
446 | + | Case -> "case" | |
447 | + | Gender -> "gender" | |
448 | + | Person -> "person" | |
449 | + | Grad -> "grad" | |
450 | + | Praep -> "praep" | |
451 | + | Acm -> "acm" | |
452 | + | Aspect -> "aspect" | |
453 | + | Negation -> "negation" | |
454 | + | Mood -> "mood" | |
455 | + | Tense -> "tense" | |
456 | + | Nsyn -> "nsyn" | |
457 | + | Nsem -> "nsem" | |
458 | + | Ctype -> "ctype" | |
459 | + | Mode -> "mode" | |
460 | + | Psem -> "psem" | |
461 | + | Icat -> "icat" | |
462 | + | Inumber -> "inumber" | |
463 | + | Igender -> "igender" | |
464 | + | Iperson -> "iperson" | |
465 | + | Nperson -> "nperson" | |
466 | + | Ncat -> "ncat" | |
467 | + | Plemma -> "plemma" | |
468 | + | Unumber -> "unumber" | |
469 | + | Ucase -> "ucase" | |
470 | + | Ugender -> "ugender" | |
471 | + | Uperson -> "uperson" | |
472 | + | Amode -> "amode" | |
473 | + | |
474 | +let string_of_selectors selectors = | |
475 | + String.concat ", " (Xlist.map selectors (fun (cat,rel,l) -> | |
476 | + let rel = if rel = Eq then "=" else "!=" in | |
477 | + string_of_selector cat ^ rel ^ (String.concat "|" l))) | |
478 | + | |
479 | +let selector_of_string = function | |
480 | + "lemma" -> Lemma | |
481 | + (* | NewLemma -> "newlemma" *) | |
482 | + | "pos" -> Pos | |
483 | + | "pos2" -> Pos2 | |
484 | + | "cat" -> Cat | |
485 | + | "coerced" -> Coerced | |
486 | + | "number" -> Number | |
487 | + | "case" -> Case | |
488 | + | "gender" -> Gender | |
489 | + | "person" -> Person | |
490 | + | "grad" -> Grad | |
491 | + | "praep" -> Praep | |
492 | + | "acm" -> Acm | |
493 | + | "aspect" -> Aspect | |
494 | + | "negation" -> Negation | |
495 | + | "mood" -> Mood | |
496 | + | "tense" -> Tense | |
497 | + | "nsyn" -> Nsyn | |
498 | + | "nsem" -> Nsem | |
499 | + | "ctype" -> Ctype | |
500 | + | "mode" -> Mode | |
501 | + | "psem" -> Psem | |
502 | + | "icat" -> Icat | |
503 | + | "inumber" -> Inumber | |
504 | + | "igender" -> Igender | |
505 | + | "iperson" -> Iperson | |
506 | + | "nperson" -> Nperson | |
507 | + | "ncat" -> Ncat | |
508 | + | "plemma" -> Plemma | |
509 | + | "unumber" -> Unumber | |
510 | + | "ucase" -> Ucase | |
511 | + | "ugender" -> Ugender | |
512 | + | "uperson" -> Uperson | |
513 | + | "amode" -> Amode | |
514 | + | s -> failwith ("selector_of_string: " ^ s) | |
515 | + | |
516 | +let match_selector cats = function | |
517 | + Lemma -> [cats.lemma] | |
518 | +(* | NewLemma -> [] *) | |
519 | + | Pos -> [cats.pos] | |
520 | + | Cat -> [cats.cat] | |
521 | + | Coerced -> cats.coerced | |
522 | + | Number -> cats.numbers | |
523 | + | Case -> cats.cases | |
524 | + | Gender -> cats.genders | |
525 | + | Person -> cats.persons | |
526 | + | Grad -> cats.grads | |
527 | + | Praep -> cats.praeps | |
528 | + | Acm -> cats.acms | |
529 | + | Aspect -> cats.aspects | |
530 | + | Negation -> cats.negations | |
531 | + | Mood -> cats.moods | |
532 | + | Tense -> cats.tenses | |
533 | + | Nsyn -> cats.nsyn | |
534 | + | Nsem -> cats.nsem | |
535 | + | Mode -> cats.modes | |
536 | + | Psem -> cats.psem | |
537 | + | c -> failwith ("match_selector: " ^ string_of_selector c) | |
538 | + | |
539 | +let set_selector cats vals = function | |
540 | + Number -> {cats with numbers=vals} | |
541 | + | Case -> {cats with cases=vals} | |
542 | + | Gender -> {cats with genders=vals} | |
543 | + | Person -> {cats with persons=vals} | |
544 | + | Grad -> {cats with grads=vals} | |
545 | + | Praep -> {cats with praeps=vals} | |
546 | + | Acm -> {cats with acms=vals} | |
547 | + | Aspect -> {cats with aspects=vals} | |
548 | + | Negation -> {cats with negations=vals} | |
549 | + | Mood -> {cats with moods=vals} | |
550 | + | Tense -> {cats with tenses=vals} | |
551 | + | Nsyn -> {cats with nsyn=vals} | |
552 | + | Nsem -> {cats with nsem=vals} | |
553 | + | Mode -> {cats with modes=vals} | |
554 | + | Psem -> {cats with psem=vals} | |
555 | + | Lemma -> (match vals with [v] -> {cats with lemma=v} | _ -> failwith "set_selector: Lemma") | |
556 | + | Pos -> (match vals with [v] -> {cats with pos=v} | _ -> failwith "set_selector: Pos") | |
557 | + | Cat -> (match vals with [v] -> {cats with cat=v} | _ -> failwith "set_selector: Cat") | |
558 | + | Coerced -> {cats with coerced=vals} | |
559 | + | c -> failwith ("set_selector: " ^ string_of_selector c) | |
560 | + | |
561 | +let rec apply_selectors cats = function | |
562 | + [] -> cats | |
563 | + | (sel,Eq,vals) :: l -> | |
564 | + let vals = StringSet.intersection (StringSet.of_list (match_selector cats sel)) (StringSet.of_list vals) in | |
565 | + if StringSet.is_empty vals then raise Not_found else | |
566 | + apply_selectors (set_selector cats (StringSet.to_list vals) sel) l | |
567 | + | (sel,Neq,vals) :: l -> | |
568 | + let vals = StringSet.difference (StringSet.of_list (match_selector cats sel)) (StringSet.of_list vals) in | |
569 | + if StringSet.is_empty vals then raise Not_found else | |
570 | + apply_selectors (set_selector cats (StringSet.to_list vals) sel) l | |
571 | + | |
572 | +let pos_categories = Xlist.fold [ | |
573 | + "subst",[Lemma;Cat;Coerced;Number;Case;Gender;Person;Nsyn;Nsem;]; | |
574 | + "depr",[Lemma;Cat;Coerced;Number;Case;Gender;Person;Nsyn;Nsem;]; | |
575 | + "ppron12",[Lemma;Number;Case;Gender;Person;]; | |
576 | + "ppron3",[Lemma;Number;Case;Gender;Person;Praep;]; | |
577 | + "siebie",[Lemma;Number;Case;Gender;Person;]; | |
578 | + "prep",[Lemma;Cat;Coerced;Psem;Case;]; | |
579 | + "compar",[Lemma;Cat;Coerced;Case;]; | |
580 | + "num",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
581 | + "numcomp",[Lemma]; | |
582 | + "intnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
583 | + "realnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
584 | + "intnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
585 | + "realnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
586 | + "symbol",[Lemma;Number;Case;Gender;Person;]; | |
587 | + "ordnum",[Lemma;Number;Case;Gender;Grad;]; | |
588 | + "date",[Lemma;Nsyn;Nsem;]; | |
589 | + "date-interval",[Lemma;Nsyn;Nsem;]; | |
590 | + "hour-minute",[Lemma;Nsyn;Nsem;]; | |
591 | + "hour",[Lemma;Nsyn;Nsem;]; | |
592 | + "hour-minute-interval",[Lemma;Nsyn;Nsem;]; | |
593 | + "hour-interval",[Lemma;Nsyn;Nsem;]; | |
594 | + "year",[Lemma;Nsyn;Nsem;]; | |
595 | + "year-interval",[Lemma;Nsyn;Nsem;]; | |
596 | + "day",[Lemma;Nsyn;Nsem;]; | |
597 | + "day-interval",[Lemma;Nsyn;Nsem;]; | |
598 | + "day-month",[Lemma;Nsyn;Nsem;]; | |
599 | + "day-month-interval",[Lemma;Nsyn;Nsem;]; | |
600 | + "month-interval",[Lemma;Nsyn;Nsem;]; | |
601 | + "roman-ordnum",[Lemma;Number;Case;Gender;Grad;]; | |
602 | + "roman",[Lemma;Nsyn;Nsem;]; | |
603 | + "roman-interval",[Lemma;Nsyn;Nsem;]; | |
604 | + "match-result",[Lemma;Nsyn;Nsem;]; | |
605 | + "url",[Lemma;Nsyn;Nsem;]; | |
606 | + "email",[Lemma;Nsyn;Nsem;]; | |
607 | + "phone-number",[Lemma;Nsyn;Nsem;]; | |
608 | + "postal-code",[Lemma;Nsyn;Nsem;]; | |
609 | + "obj-id",[Lemma;Nsyn;Nsem;]; | |
610 | + "building-number",[Lemma;Nsyn;Nsem;]; | |
611 | + "fixed",[Lemma;]; | |
612 | + "adj",[Lemma;Cat;Coerced;Number;Case;Gender;Grad;]; | |
613 | + "adjc",[Lemma;Cat;Coerced;Number;Case;Gender;Grad;]; | |
614 | + "adjp",[Lemma;Cat;Coerced;Number;Case;Gender;Grad;]; | |
615 | + "apron",[Lemma;Number;Case;Gender;Grad;]; | |
616 | + "adja",[Lemma;Cat;Coerced;]; | |
617 | + "adv",[Lemma;Cat;Coerced;Grad;Mode];(* ctype *) | |
618 | + "ger",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Case;Gender;Person;Aspect;Negation;]; | |
619 | + "pact",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Case;Gender;Aspect;Negation;]; | |
620 | + "ppas",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Case;Gender;Aspect;Negation;]; | |
621 | + "fin",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
622 | + "bedzie",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
623 | + "praet",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
624 | + "winien",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
625 | + "impt",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
626 | + "imps",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
627 | + "pred",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
628 | + "aglt",[Lemma;Number;Person;Aspect;]; | |
629 | + "inf",[Lemma;(*NewLemma;*)Cat;Coerced;Aspect;Negation;]; | |
630 | + "pcon",[Lemma;(*NewLemma;*)Cat;Coerced;Aspect;Negation;]; | |
631 | + "pant",[Lemma;(*NewLemma;*)Cat;Coerced;Aspect;Negation;]; | |
632 | + "qub",[Lemma;Cat;]; | |
633 | + "part",[Lemma;]; | |
634 | + "comp",[Lemma;];(* ctype *) | |
635 | + "conj",[Lemma;];(* ctype *) | |
636 | + "interj",[Lemma;Cat;Coerced;]; | |
637 | + "sinterj",[Lemma;]; | |
638 | + "burk",[Lemma;]; | |
639 | + "interp",[Lemma;]; | |
640 | + "unk",[Lemma;Number;Case;Gender;Person;]; | |
641 | + "xxx",[Lemma;Number;Case;Gender;Person;]; | |
642 | + "html-tag",[Lemma;]; | |
643 | + "list-item",[Lemma;]; | |
644 | + ] StringMap.empty (fun map (k,l) -> StringMap.add map k l) | |
... | ... |
exec/ENIAMexec.ml
... | ... | @@ -63,16 +63,19 @@ let rec translate_text = function |
63 | 63 | | ENIAMsubsyntaxTypes.AltText l -> AltText(Xlist.map l (fun (mode,text) -> |
64 | 64 | translate_mode mode, translate_text text)) |
65 | 65 | |
66 | -let clarify_categories cats token = | |
66 | +let clarify_categories cats has_context token = | |
67 | + let snode = match has_context with | |
68 | + false -> ["concept"] | |
69 | + | true -> ["context"] in | |
67 | 70 | match token.ENIAMtokenizerTypes.token with |
68 | 71 | ENIAMtokenizerTypes.Lemma(lemma,pos,interp) -> |
69 | 72 | List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,coerced) -> |
70 | 73 | (* Printf.printf "lemma=%s pos=%s cat=%s coerced=%s\n%!" lemma pos cat (String.concat "," coerced); *) |
71 | - ENIAMcategoriesPL.clarify_categories false cat coerced (lemma,pos,interp))))) | |
74 | + ENIAMcategoriesPL.clarify_categories false cat coerced snode (lemma,pos,interp))))) | |
72 | 75 | | ENIAMtokenizerTypes.Proper(lemma,pos,interp,senses2) -> |
73 | - List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,coerced) -> ENIAMcategoriesPL.clarify_categories true cat coerced (lemma,pos,interp))))) | |
76 | + List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,coerced) -> ENIAMcategoriesPL.clarify_categories true cat coerced snode (lemma,pos,interp))))) | |
74 | 77 | | ENIAMtokenizerTypes.Interp lemma -> |
75 | - List.flatten (Xlist.map cats (fun (cat,coerced) -> ENIAMcategoriesPL.clarify_categories false cat coerced (lemma,"interp",[]))) | |
78 | + List.flatten (Xlist.map cats (fun (cat,coerced) -> ENIAMcategoriesPL.clarify_categories false cat coerced snode (lemma,"interp",[]))) | |
76 | 79 | | _ -> [] |
77 | 80 | |
78 | 81 | let create_chart rules tokens lex_sems paths last = |
... | ... | @@ -84,8 +87,8 @@ let create_chart rules tokens lex_sems paths last = |
84 | 87 | ENIAM_LCGrenderer.reset_variable_names (); |
85 | 88 | ENIAM_LCGrenderer.add_variable_numbers (); |
86 | 89 | (* if s.ENIAMlexSemanticsTypes.schemata = [] then failwith ("create_chart: no schema for token=" ^ t.ENIAMtokenizerTypes.orth ^ " lemma=" ^ ENIAMtokens.get_lemma t.ENIAMtokenizerTypes.token) else *) |
87 | - Xlist.fold s.ENIAMlexSemanticsTypes.schemata chart (fun chart (selectors,cats,schema) -> | |
88 | - let cats = clarify_categories cats t in | |
90 | + Xlist.fold s.ENIAMlexSemanticsTypes.schemata chart (fun chart (selectors,cats,has_context,schema) -> | |
91 | + let cats = clarify_categories cats has_context t in | |
89 | 92 | (* let chart = ENIAM_LCGchart.add_inc_list chart lnode rnode s.ENIAMlexSemanticsTypes.lex_entries 0 in *) |
90 | 93 | let l = ENIAM_LCGlexicon.create_entries rules id t.ENIAMtokenizerTypes.orth cats [selectors,schema] s.ENIAMlexSemanticsTypes.lex_entries in |
91 | 94 | ENIAM_LCGchart.add_inc_list chart lnode rnode l 0)) in |
... | ... | @@ -115,7 +118,7 @@ let create_dep_chart dep_rules tokens lex_sems paths = |
115 | 118 | ENIAM_LCGrenderer.reset_variable_names (); |
116 | 119 | ENIAM_LCGrenderer.add_variable_numbers (); |
117 | 120 | let cats = clarify_categories ["X",["X"]] t in |
118 | - let schemata = Xlist.map s.ENIAMlexSemanticsTypes.schemata (fun (selectors,_,schema) -> selectors,schema) in | |
121 | + let schemata = Xlist.map s.ENIAMlexSemanticsTypes.schemata (fun (selectors,_,_,schema) -> selectors,schema) in | |
119 | 122 | let l = ENIAM_LCGlexicon.create_entries dep_rules id t.ENIAMtokenizerTypes.orth cats schemata s.ENIAMlexSemanticsTypes.lex_entries in |
120 | 123 | IntMap.add nodes i l) in |
121 | 124 | (* print_endline "create_dep_chart 3"; *) |
... | ... |
lexSemantics/ENIAMadjuncts.ml
... | ... | @@ -253,19 +253,19 @@ let simplify_schemata lexemes pos pos2 lemma schemata = |
253 | 253 | "{" ^ String.concat ";" (PhraseSet.fold morfs [] (fun l m -> ENIAMwalStringOf.phrase m :: l)) ^ "}")))); *) |
254 | 254 | schemata |
255 | 255 | |
256 | -let add_adjuncts preps compreps compars pos2 (selectors,cat,schema) = | |
256 | +let add_adjuncts preps compreps compars pos2 (selectors,cat,has_context,schema) = | |
257 | 257 | let compreps = Xlist.rev_map compreps ENIAMwalRenderer.render_comprep in |
258 | 258 | let prepnps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_prepnp prep cases) in |
259 | 259 | let prepadjps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_prepadjp prep cases) in |
260 | 260 | let compars = Xlist.rev_map compars ENIAMwalRenderer.render_compar in |
261 | 261 | match pos2 with |
262 | - "verb" -> [selectors,cat,schema @ ENIAMwalRenderer.verb_adjuncts_simp @ prepnps @ prepadjps @ compreps @ compars] | |
262 | + "verb" -> [selectors,cat,has_context,schema @ ENIAMwalRenderer.verb_adjuncts_simp @ prepnps @ prepadjps @ compreps @ compars] | |
263 | 263 | | "noun" -> [ |
264 | - [Nsyn,Eq,["proper"]] @ selectors,cat,ENIAMwalRenderer.proper_noun_adjuncts_simp @ prepnps @ compreps @ compars; | |
265 | - [Nsyn,Eq,["common"];Nsem,Eq,["measure"]] @ selectors,cat,ENIAMwalRenderer.measure_noun_adjuncts_simp @ prepnps @ compreps @ compars; | |
266 | - [Nsyn,Eq,["common"];Nsem,Neq,["measure"]] @ selectors,cat,ENIAMwalRenderer.common_noun_adjuncts_simp @ prepnps @ compreps @ compars] | |
267 | - | "adj" -> [selectors,cat,schema @ ENIAMwalRenderer.adj_adjuncts_simp @ compars] | |
268 | - | "adv" -> [selectors,cat,schema @ ENIAMwalRenderer.adv_adjuncts_simp @ compars] | |
264 | + [Nsyn,Eq,["proper"]] @ selectors,cat,has_context,ENIAMwalRenderer.proper_noun_adjuncts_simp @ prepnps @ compreps @ compars; | |
265 | + [Nsyn,Eq,["common"];Nsem,Eq,["measure"]] @ selectors,cat,has_context,ENIAMwalRenderer.measure_noun_adjuncts_simp @ prepnps @ compreps @ compars; | |
266 | + [Nsyn,Eq,["common"];Nsem,Neq,["measure"]] @ selectors,cat,has_context,ENIAMwalRenderer.common_noun_adjuncts_simp @ prepnps @ compreps @ compars] | |
267 | + | "adj" -> [selectors,cat,has_context,schema @ ENIAMwalRenderer.adj_adjuncts_simp @ compars] | |
268 | + | "adv" -> [selectors,cat,has_context,schema @ ENIAMwalRenderer.adv_adjuncts_simp @ compars] | |
269 | 269 | | _ -> [] |
270 | 270 | |
271 | 271 | open ENIAMlexSemanticsTypes |
... | ... |
lexSemantics/ENIAMlexSemantics.ml
... | ... | @@ -303,9 +303,9 @@ let assign_valence tokens lex_sems group = |
303 | 303 | let schemata = ENIAMadjuncts.simplify_schemata lexemes pos pos2 lemma schemata1 in |
304 | 304 | (* Printf.printf "C %s |schemata|=%d\n" lemma (Xlist.size schemata); *) |
305 | 305 | let schemata = Xlist.rev_map schemata (fun (selectors,schema) -> |
306 | - selectors,["X",["X"]],ENIAMwalRenderer.render_simple_schema schema) in | |
306 | + selectors,["X",["X"]],false,ENIAMwalRenderer.render_simple_schema schema) in | |
307 | 307 | let schemata = List.flatten (Xlist.rev_map schemata (ENIAMadjuncts.add_adjuncts preps compreps compars pos2)) in |
308 | - let schemata = if schemata = [] then [[],["X",["X"]],[]] else schemata in | |
308 | + let schemata = if schemata = [] then [[],["X",["X"]],false,[]] else schemata in | |
309 | 309 | (* Printf.printf "D %s |schemata|=%d\n" lemma (Xlist.size schemata); *) |
310 | 310 | let entries = List.flatten (Xlist.rev_map entries (ENIAMvalence.transform_lex_entry pos lemma)) in |
311 | 311 | let entries = Xlist.map entries (fun (selectors,entry) -> |
... | ... |
lexSemantics/ENIAMlexSemanticsHTMLof.ml
... | ... | @@ -60,8 +60,9 @@ let html_of_lex_sems tokens lex_sems = |
60 | 60 | let core = Printf.sprintf "%3d %s %s" id orth lemma in |
61 | 61 | let lex_entries = Xlist.map t.lex_entries (fun (selectors,s) -> |
62 | 62 | "  [" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] " ^ ENIAM_LCGstringOf.grammar_symbol 0 s) in |
63 | - let schemata = Xlist.map t.schemata (fun (selectors,cat,l) -> | |
63 | + let schemata = Xlist.map t.schemata (fun (selectors,cat,has_context,l) -> | |
64 | 64 | "  [" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "]" ^ |
65 | + (if has_context then "T" else "F") ^ | |
65 | 66 | String.concat "," (Xlist.map cat (fun (m,l) -> m ^ "[" ^ String.concat "," l ^ "]")) ^ |
66 | 67 | " {" ^ String.concat ", " (Xlist.map l (fun (d,s) -> |
67 | 68 | ENIAM_LCGstringOf.direction d ^ ENIAM_LCGstringOf.grammar_symbol 0 s)) ^ "}") in |
... | ... |
lexSemantics/ENIAMlexSemanticsStringOf.ml
... | ... | @@ -40,8 +40,9 @@ let string_of_lex_sems tokens lex_sems = |
40 | 40 | let core = Printf.sprintf "%3d %s %s" id orth lemma in |
41 | 41 | let lex_entries = Xlist.map t.lex_entries (fun (selectors,s) -> |
42 | 42 | "&[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] " ^ ENIAM_LCGstringOf.grammar_symbol 0 s) in |
43 | - let schemata = Xlist.map t.schemata (fun (selectors,cat,l) -> | |
43 | + let schemata = Xlist.map t.schemata (fun (selectors,cat,has_context,l) -> | |
44 | 44 | "[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "]" ^ |
45 | + (if has_context then "T" else "F") ^ | |
45 | 46 | String.concat "," (Xlist.map cat (fun (m,l) -> m ^ "[" ^ String.concat "," l ^ "]")) ^ |
46 | 47 | " {" ^ String.concat "," (Xlist.map l (fun (d,s) -> |
47 | 48 | ENIAM_LCGstringOf.direction d ^ ENIAM_LCGstringOf.grammar_symbol 0 s)) ^ "}") in |
... | ... |
lexSemantics/ENIAMlexSemanticsTypes.ml
... | ... | @@ -42,6 +42,7 @@ let empty_frame = {selectors=[]; senses=[]; cats=["X",["X"]]; positions=[]; arol |
42 | 42 | type lex_sem = { |
43 | 43 | schemata: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * |
44 | 44 | (string * string list) list * (* sensy *) |
45 | + bool * (* has_context *) | |
45 | 46 | (ENIAM_LCGtypes.direction * ENIAM_LCGtypes.grammar_symbol) list) list; |
46 | 47 | lex_entries: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * |
47 | 48 | ENIAM_LCGtypes.grammar_symbol) list; |
... | ... |
lexSemantics/ENIAMwalRenderer.ml
... | ... | @@ -177,60 +177,60 @@ let render_phrase = function |
177 | 177 | | E (PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top] |
178 | 178 | | phrase -> failwith ("render_phrase: " ^ ENIAMwalStringOf.phrase phrase) |
179 | 179 | |
180 | -let render_phrase_cat cat = function | |
181 | - NP(Case case) -> Tensor[Atom "np"; Atom cat; Top; Atom case; Top; Top] | |
182 | - | NP NomAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
183 | - | NP VocAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"] | |
184 | -(* | NP GenAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"] | |
185 | - | NP AllAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]*) | |
186 | - | NP CaseAgr -> Tensor[Atom "np"; Atom cat; Top; AVar "case"; Top; Top] | |
187 | - | NP CaseUndef -> Tensor[Atom "np"; Atom cat; Top; Top; Top; Top] | |
188 | - | PrepNP(Psem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] | |
189 | - | PrepNP(Psem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] | |
190 | - | PrepNP(Psem,"_",Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Atom case] | |
191 | - | PrepNP(Psem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Top] | |
192 | - | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case] | |
193 | - | PrepNP(Pnosem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] | |
194 | - | PrepNP(Pnosem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] | |
195 | - | PrepNP(Pnosem,"_",Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Atom case] | |
196 | - | PrepNP(Pnosem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Top] | |
197 | - | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case] | |
198 | - | AdjP(Case case) -> Tensor[Atom "adjp"; Atom cat; Top; Atom case; Top] | |
199 | - | AdjP NomAgr -> Tensor[Atom "adjp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"] | |
200 | - | AdjP AllAgr -> Tensor[Atom "adjp"; Atom cat; AVar "number"; AVar "case"; AVar "gender"] | |
201 | -(* | AdjP CaseAgr -> Tensor[Atom "adjp"; Top; AVar "case"; Top] | |
202 | - | PrepAdjP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]*) | |
203 | - | PrepAdjP(prep,Case case) -> Tensor[Atom "prepadjp"; Atom cat; Atom prep; Atom case] | |
180 | +let render_phrase_cat cat role = function | |
181 | + NP(Case case) -> Tensor[Atom "np"; Top; Atom case; Top; Top; Atom cat; Atom role; Top] | |
182 | + | NP NomAgr -> Tensor[Atom "np"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Atom cat; Atom role; Top] | |
183 | + | NP VocAgr -> Tensor[Atom "np"; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; Atom cat; Atom role; Top] | |
184 | +(* | NP GenAgr -> Tensor[Atom "np"; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"; Atom cat; Atom role; Top] | |
185 | + | NP AllAgr -> Tensor[Atom "np"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"; Atom cat; Atom role; Top]*) | |
186 | + | NP CaseAgr -> Tensor[Atom "np"; Top; AVar "case"; Top; Top; Atom cat; Atom role; Top] | |
187 | + | NP CaseUndef -> Tensor[Atom "np"; Top; Top; Top; Top; Atom cat; Atom role; Top] | |
188 | + | PrepNP(Psem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom "sem"; Top; Top; Atom cat; Atom role; Top] | |
189 | + | PrepNP(Psem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom "sem"; Top; Top; Atom cat; Atom role; Top] | |
190 | + | PrepNP(Psem,"_",Case case) -> Tensor[Atom "prepnp"; Atom "sem"; Top; Atom case; Atom cat; Atom role; Top] | |
191 | + | PrepNP(Psem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Top; Atom cat; Atom role; Top] | |
192 | + | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Atom case; Atom cat; Atom role; Top] | |
193 | + | PrepNP(Pnosem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom "nosem"; Top; Top; Atom cat; Atom role; Top] | |
194 | + | PrepNP(Pnosem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom "nosem"; Top; Top; Atom cat; Atom role; Top] | |
195 | + | PrepNP(Pnosem,"_",Case case) -> Tensor[Atom "prepnp"; Atom "nosem"; Top; Atom case; Atom cat; Atom role; Top] | |
196 | + | PrepNP(Pnosem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Top; Atom cat; Atom role; Top] | |
197 | + | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case; Atom cat; Atom role; Top] | |
198 | + | AdjP(Case case) -> Tensor[Atom "adjp"; Top; Atom case; Top; Top; Atom cat; Atom role; Top] | |
199 | + | AdjP NomAgr -> Tensor[Atom "adjp"; AVar "number"; Atom "nom"; AVar "gender"; Top; Atom cat; Atom role; Top] | |
200 | + | AdjP AllAgr -> Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"; Top; Atom cat; Atom role; Top] | |
201 | +(* | AdjP CaseAgr -> Tensor[Atom "adjp"; Top; AVar "case"; Top; Top; Atom cat; Atom role; Top] | |
202 | + | PrepAdjP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top; Atom cat; Atom role; Top]*) | |
203 | + | PrepAdjP(prep,Case case) -> Tensor[Atom "prepadjp"; Atom prep; Atom case; Atom cat; Atom role; Top] | |
204 | 204 | (* | NumP(Case case) -> Tensor[Atom "nump"; Top; Atom case; Top; Top] |
205 | 205 | | NumP NomAgr -> Tensor[Atom "nump"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] |
206 | 206 | | NumP CaseAgr -> Tensor[Atom "nump"; Top; AVar "case"; Top; Top] |
207 | 207 | | NumP CaseUndef -> Tensor[Atom "nump"; Top; Top; Top; Top] |
208 | 208 | | PrepNumP(_,"",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top] |
209 | 209 | | PrepNumP(_,prep,Case case) -> Tensor[Atom "prepnump"; Atom prep; Atom case] *) |
210 | -(* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top]*) | |
211 | - | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom cat; Atom prep] | |
212 | - | ComparP(prep,Case case) -> Tensor[Atom "compar"; Atom cat; Atom prep; Atom case] | |
213 | - (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep] *) | |
214 | - (* | IP -> Tensor[Atom "ip";Top;Top;Top] *) | |
215 | - | CP (ctype,Comp comp) -> Tensor[Atom "cp"; Atom cat; arg_of_ctype ctype; Atom comp] | |
216 | - (* | CP (ctype,CompUndef) -> Tensor[Atom "cp"; arg_of_ctype ctype; Top]*) | |
217 | - | NCP(Case case,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; arg_of_ctype ctype; Atom comp] | |
218 | - | NCP(Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; Top; Top] | |
219 | - | NCP(NomAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp] | |
220 | - | NCP(NomAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] | |
221 | - | NCP(VocAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp] | |
222 | - | NCP(VocAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; Top; Top] | |
223 | - | PrepNCP(Psem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom cat; Atom "sem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] | |
224 | - | PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom cat; Atom "sem"; Atom prep; Atom case; Top; Top] | |
225 | - | PrepNCP(Pnosem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom cat; Atom "nosem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] | |
226 | - | PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom cat; Atom "nosem"; Atom prep; Atom case; Top; Top] | |
227 | - | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom cat; Atom aspect] | |
228 | - | InfP AspectUndef -> Tensor[Atom "infp"; Atom cat; Top] | |
229 | - (* | PadvP -> Tensor[Atom "padvp"] *) | |
230 | - | AdvP "misc" -> Tensor[Atom "advp"; Atom cat; Top] (* FIXME: a może Atom "mod" zamiast Top *) | |
231 | - | AdvP "" -> Tensor[Atom "advp"; Atom cat; Top] (* FIXME: a może Atom "mod" zamiast Top *) | |
232 | - | AdvP mode -> Tensor[Atom "advp"; Atom cat; Atom mode] | |
233 | - | ColonP -> Tensor[Atom "colonp"; Atom cat] | |
210 | +(* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top; Atom cat; Atom role; Top]*) | |
211 | + | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom prep; Atom cat; Atom role; Top] | |
212 | + | ComparP(prep,Case case) -> Tensor[Atom "compar"; Atom prep; Atom case; Atom cat; Atom role; Top] | |
213 | + (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep; Atom cat; Atom role; Top] *) | |
214 | + (* | IP -> Tensor[Atom "ip";Top;Top;Top; Atom cat; Atom role; Top] *) | |
215 | + | CP (ctype,Comp comp) -> Tensor[Atom "cp"; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Top] | |
216 | + (* | CP (ctype,CompUndef) -> Tensor[Atom "cp"; arg_of_ctype ctype; Top; Atom cat; Atom role; Top]*) | |
217 | + | NCP(Case case,ctype,Comp comp) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Top] | |
218 | + | NCP(Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top; Atom cat; Atom role; Top] | |
219 | + | NCP(NomAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Top] | |
220 | + | NCP(NomAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top; Atom cat; Atom role; Top] | |
221 | + | NCP(VocAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Top] | |
222 | + | NCP(VocAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; Top; Top; Atom cat; Atom role; Top] | |
223 | + | PrepNCP(Psem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Top] | |
224 | + | PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top; Atom cat; Atom role; Top] | |
225 | + | PrepNCP(Pnosem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Top] | |
226 | + | PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top; Atom cat; Atom role; Top] | |
227 | + | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom aspect; Atom cat; Atom role; Top] | |
228 | + | InfP AspectUndef -> Tensor[Atom "infp"; Top; Atom cat; Atom role; Top] | |
229 | + (* | PadvP -> Tensor[Atom "padvp"; Atom cat; Atom role; Top] *) | |
230 | + | AdvP "misc" -> Tensor[Atom "advp"; (*Top;*) Atom cat; Atom role; Top] (* FIXME: a może Atom "mod" zamiast Top *) | |
231 | + | AdvP "" -> Tensor[Atom "advp"; (*Top;*) Atom cat; Atom role; Top] (* FIXME: a może Atom "mod" zamiast Top *) | |
232 | + | AdvP mode -> Tensor[Atom "advp"; (*Atom mode;*) Atom cat; Atom role; Top] | |
233 | + | ColonP -> Tensor[Atom "colonp"; Atom cat; Atom cat; Atom role; Top] | |
234 | 234 | (* | PrepP -> Tensor[Atom "prepp";Top] |
235 | 235 | | Prep("",CaseAgr) -> Tensor[Atom "prep"; Top; AVar "case"] |
236 | 236 | | Prep("",CaseUAgr) -> Tensor[Atom "prep"; Top; AVar "ucase"] |
... | ... | @@ -245,15 +245,15 @@ let render_phrase_cat cat = function |
245 | 245 | | AuxImp -> Tensor[Atom "aux-imp"] |
246 | 246 | | Pro -> One |
247 | 247 | | ProNG -> One *) |
248 | - | E (CP(CompTypeUndef,CompUndef)) -> Tensor[Atom "cp"; Atom cat; Top; Top] | |
249 | - | E (NCP(NomAgr,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] | |
250 | - | E (NP(NomAgr)) -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
251 | - | E (PrepNP(Psem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case] | |
252 | - | E (PrepNP(Pnosem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case] | |
253 | - | E (NP(Case case)) -> Tensor[Atom "np"; Atom cat; Top; Atom case; Top; Top] | |
254 | - | E (NCP(Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; Top; Top] | |
255 | - | E (PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom cat; Atom "sem"; Atom prep; Atom case; Top; Top] | |
256 | - | E (PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom cat; Atom "nosem"; Atom prep; Atom case; Top; Top] | |
248 | + | E (CP(CompTypeUndef,CompUndef)) -> Tensor[Atom "cp"; Top; Top; Atom cat; Atom role; Top] | |
249 | + | E (NCP(NomAgr,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top; Atom cat; Atom role; Top] | |
250 | + | E (NP(NomAgr)) -> Tensor[Atom "np"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Atom cat; Atom role; Top] | |
251 | + | E (PrepNP(Psem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Atom case; Atom cat; Atom role; Top] | |
252 | + | E (PrepNP(Pnosem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case; Atom cat; Atom role; Top] | |
253 | + | E (NP(Case case)) -> Tensor[Atom "np"; Top; Atom case; Top; Top; Atom cat; Atom role; Top] | |
254 | + | E (NCP(Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top; Atom cat; Atom role; Top] | |
255 | + | E (PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top; Atom cat; Atom role; Top] | |
256 | + | E (PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top; Atom cat; Atom role; Top] | |
257 | 257 | | phrase -> failwith ("render_phrase_cat: " ^ ENIAMwalStringOf.phrase phrase) |
258 | 258 | |
259 | 259 | let render_morf = function |
... | ... | @@ -264,7 +264,7 @@ let render_morf = function |
264 | 264 | | SimpleLexArg(lex,pos) -> Tensor([Atom "lex";Atom lex] @ render_pos pos) |
265 | 265 | | phrase -> render_phrase phrase |
266 | 266 | |
267 | -let render_morf_cat cats = function | |
267 | +let render_morf_cat cats role = function | |
268 | 268 | | Null -> [One] |
269 | 269 | | Pro -> [One] |
270 | 270 | | ProNG -> [One] |
... | ... | @@ -275,7 +275,7 @@ let render_morf_cat cats = function |
275 | 275 | | Lex lex -> Tensor[Atom lex] *) |
276 | 276 | | LexArg(id,lex,pos) -> [Tensor([Atom "lex";Atom (string_of_int id);Atom lex] @ render_pos pos)] |
277 | 277 | | SimpleLexArg(lex,pos) -> [Tensor([Atom "lex";Atom lex] @ render_pos pos)] |
278 | - | phrase -> Xlist.map cats (fun cat -> render_phrase_cat cat phrase) | |
278 | + | phrase -> Xlist.map cats (fun cat -> render_phrase_cat cat role phrase) | |
279 | 279 | |
280 | 280 | (* let extract_sel_prefs sel_prefs = |
281 | 281 | Xlist.map sel_prefs (function |
... | ... | @@ -296,7 +296,7 @@ let translate_dir = function |
296 | 296 | |
297 | 297 | let render_schema_cat schema = |
298 | 298 | Xlist.map schema (fun p -> |
299 | - match List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs)) with | |
299 | + match List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs p.role)) with | |
300 | 300 | [] -> failwith "render_schema" |
301 | 301 | | [s] -> translate_dir p.dir,s |
302 | 302 | | l -> translate_dir p.dir,Plus l) |
... | ... | @@ -312,7 +312,7 @@ let render_connected_schema schema = |
312 | 312 | let render_connected_schema_cat schema = |
313 | 313 | Xlist.map schema (fun p -> |
314 | 314 | {p with |
315 | - morfs=Xlist.map (List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs))) (fun morf -> LCG morf)}) | |
315 | + morfs=Xlist.map (List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs p.role))) (fun morf -> LCG morf)}) | |
316 | 316 | |
317 | 317 | (* FIXME: tu trzeba by dodać zwykłe reguły dla czasowników dotyczące ich negacji, aglutynatu itp. *) |
318 | 318 | let render_lex_entry = function |
... | ... |
lexSemantics/ENIAMwalRenderer_old.ml
0 → 100644
1 | +(* | |
2 | + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. | |
3 | + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | + * | |
6 | + * This library is free software: you can redistribute it and/or modify | |
7 | + * it under the terms of the GNU Lesser General Public License as published by | |
8 | + * the Free Software Foundation, either version 3 of the License, or | |
9 | + * (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + * GNU Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public License | |
17 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | + *) | |
19 | + | |
20 | +open ENIAM_LCGtypes | |
21 | +open ENIAMwalTypes | |
22 | + | |
23 | +let arg_of_ctype = function | |
24 | + Int -> Atom "int" | |
25 | + | Rel -> Atom "rel" | |
26 | + (* | Sub -> LCGtypes.Atom "sub" | |
27 | + | Coord -> LCGtypes.Atom "coord" *) | |
28 | + | CompTypeUndef -> Top | |
29 | + (* | CompTypeAgr -> LCGtypes.AVar "ctype" *) | |
30 | + | |
31 | +let render_number = function | |
32 | + Number n -> Atom n | |
33 | + | NumberUndef -> Top | |
34 | + | NumberAgr -> Top | |
35 | + | |
36 | +let render_negation = function | |
37 | + Negation -> Atom "neg" | |
38 | + | Aff -> Atom "aff" | |
39 | + | NegationUndef -> Top | |
40 | + | |
41 | +let render_pos_entry = function | |
42 | + "subst" -> [Atom "subst"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"] | |
43 | + | "ppron12" -> [Atom "ppron12"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"] | |
44 | + | "ppron3" -> [Atom "ppron3"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"] | |
45 | + | "siebie" -> [Atom "siebie"; AVar "case"] | |
46 | + | "num" -> [Atom "num"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"] | |
47 | + | "intnum" -> [Atom "num"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"] | |
48 | + | "prep" -> [Atom "prep"; AVar "case"] | |
49 | + | "adj" -> [Atom "adj"; AVar "number"; AVar "case"; AVar "gender"; AVar "grad"] | |
50 | + | "adv" -> [Atom "adv"; AVar "grad"] | |
51 | + | "ger" -> [Atom "ger"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"; AVar "negation"] | |
52 | + | "pact" -> [Atom "pact"; AVar "number"; AVar "case"; AVar "gender"; AVar "negation"] | |
53 | + | "ppas" -> [Atom "ppas"; AVar "number"; AVar "case"; AVar "gender"; AVar "negation"] | |
54 | + | "inf" -> [Atom "inf"; AVar "aspect"; AVar "negation"] | |
55 | + | "qub" -> [Atom "qub"] | |
56 | + | "compar" -> [Atom "compar"; AVar "case"] | |
57 | + | "comp" -> [Atom "comp"; AVar "ctype"] | |
58 | + | "fin" -> [Atom "pers"; AVar "negation"] | |
59 | + | "praet" -> [Atom "pers"; AVar "negation"] | |
60 | + | "pred" -> [Atom "pers"; AVar "negation"] | |
61 | + | "winien" -> [Atom "pers"; AVar "negation"] | |
62 | + | "bedzie" -> [Atom "pers"; AVar "negation"] | |
63 | + | s -> failwith ("render_pos_entry: " ^ s) | |
64 | + | |
65 | +let render_pos = function (* wprowadzam uzgodnienia a nie wartości cech, bo wartości cech są wprowadzane przez leksem a uzgodnienia wiążą je z wartościami u nadrzędnika *) | |
66 | + | SUBST(number,Case case) -> [Atom "subst"; render_number number; Atom case; Top; Top] | |
67 | + | SUBST(_,NomAgr) -> [Atom "subst"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
68 | + | SUBST(_,GenAgr) -> [Atom "subst"; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"] | |
69 | + | SUBST(_,AllAgr) -> [Atom "subst"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"] | |
70 | + | SUBST(number,CaseAgr) -> [Atom "subst"; render_number number; AVar "case"; Top; Top] | |
71 | + | SUBST(_,CaseUndef) -> [Atom "subst"; Top; Top; Top; Top] | |
72 | + | PPRON12(number,Case case) -> [Atom "ppron12"; render_number number; Atom case; Top; Top] | |
73 | + | PPRON3(number,Case case) -> [Atom "ppron3"; render_number number; Atom case; Top; Top] | |
74 | + | SIEBIE(Case case) -> [Atom "siebie"; Atom case] | |
75 | + | NUM(Case case,_) -> [Atom "num"; Top; Atom case; Top; Top] | |
76 | + | NUM(NomAgr,_) -> [Atom "num"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
77 | +(* | NUM(CaseAgr,_) -> [Atom "num"; Top; AVar "case"; Top; Top] | |
78 | + | NUM(CaseUndef,_) -> [Atom "num"; Top; Top; Top; Top]*) | |
79 | + | PREP(Case case) -> [Atom "prep"; Atom case] | |
80 | + | ADJ(_,Case case,_,Grad grad) -> [Atom "adj"; Top; Atom case; Top; Atom grad] | |
81 | +(* | ADJ(_,NomAgr,_,_) -> [Atom "adj"; AVar "number"; Atom "nom"; AVar "gender"] | |
82 | + | ADJ(_,CaseAgr,_,_) -> [Atom "adj"; Top; AVar "case"; Top]*) | |
83 | + | ADJ(_,CaseUndef,_,Grad grad) -> [Atom "adj"; Top; Top; Top; Atom grad] | |
84 | + | ADJ(_,AllAgr,_,Grad grad) -> [Atom "adj"; AVar "number"; AVar "case"; AVar "gender"; Atom grad] | |
85 | + | ADJ(_,AllAgr,_,GradUndef) -> [Atom "adj"; AVar "number"; AVar "case"; AVar "gender"; Top] | |
86 | + | ADV (Grad grad) -> [Atom "adv"; Atom grad] | |
87 | + | ADV GradUndef -> [Atom "adv"; Top] | |
88 | + | GER(_,Case case,_,_,neg) -> [Atom "ger"; Top; Atom case; Top; Top; render_negation neg] | |
89 | +(* | GER(_,NomAgr,_,_,_) -> [Atom "ger"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
90 | + | GER(_,CaseAgr,_,_,_) -> [Atom "ger"; Top; AVar "case"; Top; Top] | |
91 | + | GER(_,CaseUndef,_,_,_) -> [Atom "ger"; Top; Top; Top; Top] | |
92 | + | PACT(_,Case case,_,_,_) -> [Atom "pact"; Top; Atom case; Top] | |
93 | + | PACT(_,NomAgr,_,_,_) -> [Atom "pact"; AVar "number"; Atom "nom"; AVar "gender"]*) | |
94 | + | PACT(_,AllAgr,_,_,neg) -> [Atom "pact"; AVar "number"; AVar "case"; AVar "gender"; render_negation neg] | |
95 | +(* | PACT(_,CaseAgr,_,_,_) -> [Atom "pact"; Top; AVar "case"; Top]*) | |
96 | + | PPAS(_,Case case,_,_,neg) -> [Atom "ppas"; Top; Atom case; Top; render_negation neg] | |
97 | + | PPAS(_,CaseUndef,_,_,neg) -> [Atom "ppas"; Top; Top; Top; render_negation neg] | |
98 | + (* | PPAS(_,NomAgr,_,_,_) -> [Atom "ppas"; AVar "number"; Atom "nom"; AVar "gender"]*) | |
99 | + | PPAS(_,AllAgr,_,_,neg) -> [Atom "ppas"; AVar "number"; AVar "case"; AVar "gender"; render_negation neg] | |
100 | +(* | PPAS(_,CaseAgr,_,_,_) -> [Atom "ppas"; Top; AVar "case"; Top]*) | |
101 | + | INF(Aspect aspect,neg) -> [Atom "inf"; Atom aspect; render_negation neg] | |
102 | + | INF(AspectUndef,neg) -> [Atom "inf"; Top; render_negation neg] | |
103 | + | QUB -> [Atom "qub"] | |
104 | + | COMPAR (Case case) -> [Atom "compar"; Atom case] | |
105 | + | COMP ctype -> [Atom "comp"; arg_of_ctype ctype] | |
106 | + | PERS neg -> [Atom "pers"; render_negation neg] | |
107 | + | pos -> failwith ("render_pos: " ^ ENIAMwalStringOf.pos pos) | |
108 | + | |
109 | +let render_phrase = function | |
110 | + NP(Case case) -> Tensor[Atom "np"; Top; Atom case; Top; Top] | |
111 | + | NP NomAgr -> Tensor[Atom "np"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
112 | +(* | NP GenAgr -> Tensor[Atom "np"; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"] | |
113 | + | NP AllAgr -> Tensor[Atom "np"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]*) | |
114 | + | NP CaseAgr -> Tensor[Atom "np"; Top; AVar "case"; Top; Top] | |
115 | +(* | NP CaseUndef -> Tensor[Atom "np"; Top; Top; Top; Top] | |
116 | + | PrepNP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]*) | |
117 | + | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Atom case] | |
118 | + | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case] | |
119 | + | AdjP(Case case) -> Tensor[Atom "adjp"; Top; Atom case; Top] | |
120 | + | AdjP NomAgr -> Tensor[Atom "adjp"; AVar "number"; Atom "nom"; AVar "gender"] | |
121 | + | AdjP AllAgr -> Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"] | |
122 | +(* | AdjP CaseAgr -> Tensor[Atom "adjp"; Top; AVar "case"; Top] | |
123 | + | PrepAdjP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]*) | |
124 | + | PrepAdjP(prep,Case case) -> Tensor[Atom "prepadjp"; Atom prep; Atom case] | |
125 | + (* | NumP(Case case) -> Tensor[Atom "nump"; Top; Atom case; Top; Top] | |
126 | + | NumP NomAgr -> Tensor[Atom "nump"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
127 | + | NumP CaseAgr -> Tensor[Atom "nump"; Top; AVar "case"; Top; Top] | |
128 | + | NumP CaseUndef -> Tensor[Atom "nump"; Top; Top; Top; Top] | |
129 | + | PrepNumP(_,"",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top] | |
130 | + | PrepNumP(_,prep,Case case) -> Tensor[Atom "prepnump"; Atom prep; Atom case] *) | |
131 | +(* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top]*) | |
132 | + | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom prep] | |
133 | + | ComparP(prep,Case case) -> Tensor[Atom "compar"; Atom prep; Atom case] | |
134 | + (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep] *) | |
135 | + (* | IP -> Tensor[Atom "ip";Top;Top;Top] *) | |
136 | + | CP (ctype,Comp comp) -> Tensor[Atom "cp"; arg_of_ctype ctype; Atom comp] | |
137 | + (* | CP (ctype,CompUndef) -> Tensor[Atom "cp"; arg_of_ctype ctype; Top]*) | |
138 | + | NCP(Case case,ctype,Comp comp) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; arg_of_ctype ctype; Atom comp] | |
139 | + | NCP(Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top] | |
140 | + | NCP(NomAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp] | |
141 | + | NCP(NomAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] | |
142 | + | PrepNCP(Psem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] | |
143 | + | PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top] | |
144 | + | PrepNCP(Pnosem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] | |
145 | + | PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top] | |
146 | + | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom aspect] | |
147 | + | InfP AspectUndef -> Tensor[Atom "infp"; Top] | |
148 | + (* | PadvP -> Tensor[Atom "padvp"] *) | |
149 | + | AdvP "misc" -> Tensor[Atom "advp"; Top] (* FIXME: a może Atom "mod" zamiast Top *) | |
150 | + | AdvP mode -> Tensor[Atom "advp"; Atom mode] | |
151 | + | ColonP -> Tensor[Atom "colonp"] | |
152 | + | FixedP lex -> Tensor[Atom "fixed"; Atom lex] | |
153 | + (* | PrepP -> Tensor[Atom "prepp";Top] | |
154 | + | Prep("",CaseAgr) -> Tensor[Atom "prep"; Top; AVar "case"] | |
155 | + | Prep("",CaseUAgr) -> Tensor[Atom "prep"; Top; AVar "ucase"] | |
156 | + | Num(AllAgr,Acm acm) -> Tensor[Atom "num"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"; Atom acm] | |
157 | + | Measure(AllUAgr) -> Tensor[Atom "measure"; AVar "unumber"; AVar "ucase"; AVar "ugender"; AVar "uperson"] *) | |
158 | + | Or -> Tensor[Atom "or"] | |
159 | + (* | Qub -> Tensor[Atom "qub"]*) | |
160 | + (* | Inclusion -> Tensor[Atom "inclusion"] | |
161 | + | Adja -> Tensor[Atom "adja"] | |
162 | + | Aglt -> Tensor[Atom "aglt"; AVar "number"; AVar "person"] | |
163 | + | AuxPast -> Tensor[Atom "aux-past"; AVar "number"; AVar "gender"; AVar "person"] | |
164 | + | AuxFut -> Tensor[Atom "aux-fut"; AVar "number"; AVar "gender"; AVar "person"] | |
165 | + | AuxImp -> Tensor[Atom "aux-imp"] | |
166 | + | Pro -> One | |
167 | + | ProNG -> One *) | |
168 | + | E Or -> Tensor[Atom "or"] | |
169 | + | E (CP(CompTypeUndef,CompUndef)) -> Tensor[Atom "cp"; Top; Top] | |
170 | + | E (NCP(NomAgr,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] | |
171 | + | E (NP(NomAgr)) -> Tensor[Atom "np"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
172 | + | E (PrepNP(Psem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Atom case] | |
173 | + | E (PrepNP(Pnosem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case] | |
174 | + | E (NP(Case case)) -> Tensor[Atom "np"; Top; Atom case; Top; Top] | |
175 | + | E (NCP(Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top] | |
176 | + | E (PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top] | |
177 | + | E (PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top] | |
178 | + | phrase -> failwith ("render_phrase: " ^ ENIAMwalStringOf.phrase phrase) | |
179 | + | |
180 | +let render_phrase_cat cat = function | |
181 | + NP(Case case) -> Tensor[Atom "np"; Atom cat; Top; Atom case; Top; Top] | |
182 | + | NP NomAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
183 | + | NP VocAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"] | |
184 | +(* | NP GenAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"] | |
185 | + | NP AllAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]*) | |
186 | + | NP CaseAgr -> Tensor[Atom "np"; Atom cat; Top; AVar "case"; Top; Top] | |
187 | + | NP CaseUndef -> Tensor[Atom "np"; Atom cat; Top; Top; Top; Top] | |
188 | + | PrepNP(Psem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] | |
189 | + | PrepNP(Psem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] | |
190 | + | PrepNP(Psem,"_",Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Atom case] | |
191 | + | PrepNP(Psem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Top] | |
192 | + | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case] | |
193 | + | PrepNP(Pnosem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] | |
194 | + | PrepNP(Pnosem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] | |
195 | + | PrepNP(Pnosem,"_",Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Atom case] | |
196 | + | PrepNP(Pnosem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Top] | |
197 | + | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case] | |
198 | + | AdjP(Case case) -> Tensor[Atom "adjp"; Atom cat; Top; Atom case; Top] | |
199 | + | AdjP NomAgr -> Tensor[Atom "adjp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"] | |
200 | + | AdjP AllAgr -> Tensor[Atom "adjp"; Atom cat; AVar "number"; AVar "case"; AVar "gender"] | |
201 | +(* | AdjP CaseAgr -> Tensor[Atom "adjp"; Top; AVar "case"; Top] | |
202 | + | PrepAdjP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]*) | |
203 | + | PrepAdjP(prep,Case case) -> Tensor[Atom "prepadjp"; Atom cat; Atom prep; Atom case] | |
204 | + (* | NumP(Case case) -> Tensor[Atom "nump"; Top; Atom case; Top; Top] | |
205 | + | NumP NomAgr -> Tensor[Atom "nump"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
206 | + | NumP CaseAgr -> Tensor[Atom "nump"; Top; AVar "case"; Top; Top] | |
207 | + | NumP CaseUndef -> Tensor[Atom "nump"; Top; Top; Top; Top] | |
208 | + | PrepNumP(_,"",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top] | |
209 | + | PrepNumP(_,prep,Case case) -> Tensor[Atom "prepnump"; Atom prep; Atom case] *) | |
210 | +(* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top]*) | |
211 | + | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom cat; Atom prep] | |
212 | + | ComparP(prep,Case case) -> Tensor[Atom "compar"; Atom cat; Atom prep; Atom case] | |
213 | + (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep] *) | |
214 | + (* | IP -> Tensor[Atom "ip";Top;Top;Top] *) | |
215 | + | CP (ctype,Comp comp) -> Tensor[Atom "cp"; Atom cat; arg_of_ctype ctype; Atom comp] | |
216 | + (* | CP (ctype,CompUndef) -> Tensor[Atom "cp"; arg_of_ctype ctype; Top]*) | |
217 | + | NCP(Case case,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; arg_of_ctype ctype; Atom comp] | |
218 | + | NCP(Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; Top; Top] | |
219 | + | NCP(NomAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp] | |
220 | + | NCP(NomAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] | |
221 | + | NCP(VocAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp] | |
222 | + | NCP(VocAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; Top; Top] | |
223 | + | PrepNCP(Psem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom cat; Atom "sem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] | |
224 | + | PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom cat; Atom "sem"; Atom prep; Atom case; Top; Top] | |
225 | + | PrepNCP(Pnosem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom cat; Atom "nosem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] | |
226 | + | PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom cat; Atom "nosem"; Atom prep; Atom case; Top; Top] | |
227 | + | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom cat; Atom aspect] | |
228 | + | InfP AspectUndef -> Tensor[Atom "infp"; Atom cat; Top] | |
229 | + (* | PadvP -> Tensor[Atom "padvp"] *) | |
230 | + | AdvP "misc" -> Tensor[Atom "advp"; Atom cat; Top] (* FIXME: a może Atom "mod" zamiast Top *) | |
231 | + | AdvP "" -> Tensor[Atom "advp"; Atom cat; Top] (* FIXME: a może Atom "mod" zamiast Top *) | |
232 | + | AdvP mode -> Tensor[Atom "advp"; Atom cat; Atom mode] | |
233 | + | ColonP -> Tensor[Atom "colonp"; Atom cat] | |
234 | + (* | PrepP -> Tensor[Atom "prepp";Top] | |
235 | + | Prep("",CaseAgr) -> Tensor[Atom "prep"; Top; AVar "case"] | |
236 | + | Prep("",CaseUAgr) -> Tensor[Atom "prep"; Top; AVar "ucase"] | |
237 | + | Num(AllAgr,Acm acm) -> Tensor[Atom "num"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"; Atom acm] | |
238 | + | Measure(AllUAgr) -> Tensor[Atom "measure"; AVar "unumber"; AVar "ucase"; AVar "ugender"; AVar "uperson"] *) | |
239 | + (* | Qub -> Tensor[Atom "qub"]*) | |
240 | + (* | Inclusion -> Tensor[Atom "inclusion"] | |
241 | + | Adja -> Tensor[Atom "adja"] | |
242 | + | Aglt -> Tensor[Atom "aglt"; AVar "number"; AVar "person"] | |
243 | + | AuxPast -> Tensor[Atom "aux-past"; AVar "number"; AVar "gender"; AVar "person"] | |
244 | + | AuxFut -> Tensor[Atom "aux-fut"; AVar "number"; AVar "gender"; AVar "person"] | |
245 | + | AuxImp -> Tensor[Atom "aux-imp"] | |
246 | + | Pro -> One | |
247 | + | ProNG -> One *) | |
248 | + | E (CP(CompTypeUndef,CompUndef)) -> Tensor[Atom "cp"; Atom cat; Top; Top] | |
249 | + | E (NCP(NomAgr,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] | |
250 | + | E (NP(NomAgr)) -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
251 | + | E (PrepNP(Psem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case] | |
252 | + | E (PrepNP(Pnosem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case] | |
253 | + | E (NP(Case case)) -> Tensor[Atom "np"; Atom cat; Top; Atom case; Top; Top] | |
254 | + | E (NCP(Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; Top; Top] | |
255 | + | E (PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom cat; Atom "sem"; Atom prep; Atom case; Top; Top] | |
256 | + | E (PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom cat; Atom "nosem"; Atom prep; Atom case; Top; Top] | |
257 | + | phrase -> failwith ("render_phrase_cat: " ^ ENIAMwalStringOf.phrase phrase) | |
258 | + | |
259 | +let render_morf = function | |
260 | + | Null -> One | |
261 | + (* | X -> Tensor[Atom "X"] | |
262 | + | Lex lex -> Tensor[Atom lex] *) | |
263 | + | LexArg(id,lex,pos) -> Tensor([Atom "lex";Atom (string_of_int id);Atom lex] @ render_pos pos) | |
264 | + | SimpleLexArg(lex,pos) -> Tensor([Atom "lex";Atom lex] @ render_pos pos) | |
265 | + | phrase -> render_phrase phrase | |
266 | + | |
267 | +let render_morf_cat cats = function | |
268 | + | Null -> [One] | |
269 | + | Pro -> [One] | |
270 | + | ProNG -> [One] | |
271 | + | FixedP lex -> [Tensor[Atom "fixed"; Atom lex]] | |
272 | + | Or -> [Tensor[Atom "or"]] | |
273 | + | E Or -> [Tensor[Atom "or"]] | |
274 | + (* | X -> Tensor[Atom "X"] | |
275 | + | Lex lex -> Tensor[Atom lex] *) | |
276 | + | LexArg(id,lex,pos) -> [Tensor([Atom "lex";Atom (string_of_int id);Atom lex] @ render_pos pos)] | |
277 | + | SimpleLexArg(lex,pos) -> [Tensor([Atom "lex";Atom lex] @ render_pos pos)] | |
278 | + | phrase -> Xlist.map cats (fun cat -> render_phrase_cat cat phrase) | |
279 | + | |
280 | +(* let extract_sel_prefs sel_prefs = | |
281 | + Xlist.map sel_prefs (function | |
282 | + SynsetName s -> s | |
283 | + | _ -> failwith "extract_sel_prefs") *) | |
284 | + | |
285 | +let render_schema schema = | |
286 | + Xlist.map schema (fun p -> | |
287 | + match Xlist.map p.morfs render_morf with | |
288 | + [] -> failwith "render_schema" | |
289 | + | [s] -> Both,s | |
290 | + | l -> Both,Plus l) | |
291 | + | |
292 | +let translate_dir = function | |
293 | + Both_ -> Both | |
294 | + | Forward_ -> Forward | |
295 | + | Backward_ -> Backward | |
296 | + | |
297 | +let render_schema_cat schema = | |
298 | + Xlist.map schema (fun p -> | |
299 | + match List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs)) with | |
300 | + [] -> failwith "render_schema" | |
301 | + | [s] -> translate_dir p.dir,s | |
302 | + | l -> translate_dir p.dir,Plus l) | |
303 | + | |
304 | +let render_simple_schema schema = | |
305 | + Xlist.map schema (fun morfs -> | |
306 | + Both,Plus(One :: Xlist.map morfs render_morf)) | |
307 | + | |
308 | +let render_connected_schema schema = | |
309 | + Xlist.map schema (fun p -> | |
310 | + {p with morfs=Xlist.map p.morfs (fun morf -> LCG (render_morf morf))}) | |
311 | + | |
312 | +let render_connected_schema_cat schema = | |
313 | + Xlist.map schema (fun p -> | |
314 | + {p with | |
315 | + morfs=Xlist.map (List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs))) (fun morf -> LCG morf)}) | |
316 | + | |
317 | +(* FIXME: tu trzeba by dodać zwykłe reguły dla czasowników dotyczące ich negacji, aglutynatu itp. *) | |
318 | +let render_lex_entry = function | |
319 | + SimpleLexEntry(lemma,pos) -> Tensor([Atom "lex";Atom lemma] @ render_pos_entry pos) | |
320 | + | LexEntry(id,lemma,pos,NoRestr,schema) -> | |
321 | + ImpSet(Tensor([Atom "lex";Atom (string_of_int id);Atom lemma] @ render_pos_entry pos),render_schema schema) | |
322 | + (*Xlist.map (transform_entry pos lemma NegationUndef PredFalse AspectUndef schema) (fun (sel,schema) -> | |
323 | + sel,LexEntry(id,lemma,pos,NoRestr,schema))*) | |
324 | + | ComprepNPEntry(prep,NoRestr,schema) -> ImpSet(Tensor[Atom "comprepnp"; Atom prep],render_schema schema) | |
325 | + (*Xlist.map (transform_entry "comprep" s NegationUndef PredFalse AspectUndef schema) (fun (sel,schema) -> | |
326 | + sel,ComprepNPEntry(s,NoRestr,schema))*) | |
327 | + | LexEntry(id,lemma,pos,_,[]) (*as entry*) -> | |
328 | + ImpSet(Tensor([Atom "lex";Atom (string_of_int id);Atom lemma] @ render_pos_entry pos),[Both,Tensor[AVar "schema"]]) | |
329 | + | entry -> failwith ("render_entry:" ^ ENIAMwalStringOf.lex_entry entry) | |
330 | + | |
331 | +(* let schemata,entries = ENIAMvalence.prepare_all_valence ENIAMwalParser.phrases ENIAMwalParser.schemata ENIAMwalParser.entries *) | |
332 | + | |
333 | +(* let _ = | |
334 | + (* Entries.map schemata (fun pos lemma (selectors,schema) -> | |
335 | + (* Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema); *) | |
336 | + render_schema schema) *) | |
337 | + Entries.map entries (fun pos lemma (selectors,entry) -> | |
338 | + (* Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema); *) | |
339 | + selectors,render_lex_entry entry) *) | |
340 | + | |
341 | +let adjunct morfs = {empty_position with gf=ADJUNCT; is_necessary=Opt; morfs=Xlist.map morfs (fun morf -> LCG morf)} | |
342 | +let adjunct_multi dir morfs = {empty_position with gf=ADJUNCT; is_necessary=Multi; dir=dir; morfs=Xlist.map morfs (fun morf -> LCG morf)} | |
343 | +let adjunct_dir dir morfs = {empty_position with gf=ADJUNCT; is_necessary=Opt; dir=dir; morfs=Xlist.map morfs (fun morf -> LCG morf)} | |
344 | +let adjunct_ce ce morfs = {empty_position with gf=ADJUNCT; ce=[ce]; is_necessary=Opt; morfs=Xlist.map morfs (fun morf -> LCG morf)} | |
345 | + | |
346 | +let render_comprep prep = Both,Plus[One;Tensor[Atom "comprepnp"; Atom prep]] | |
347 | + | |
348 | +let render_connected_comprep prep = adjunct [Tensor[Atom "comprepnp"; Atom prep]] | |
349 | + | |
350 | +let render_prepnp prep cases = | |
351 | + Both,Plus(One :: List.flatten (Xlist.map cases (fun case -> | |
352 | + [Tensor[Atom "prepnp"; Atom prep; Atom case]; | |
353 | + Tensor[Atom "prepncp"; Atom prep; Atom case; Top; Top]]))) | |
354 | + | |
355 | +let render_connected_prepnp prep cases = | |
356 | + adjunct (List.flatten (Xlist.map cases (fun case -> | |
357 | + [Tensor[Atom "prepnp"; Atom prep; Atom case]; | |
358 | + Tensor[Atom "prepncp"; Atom prep; Atom case; Top; Top]]))) | |
359 | + | |
360 | +let render_prepadjp prep cases = | |
361 | + let postp = if prep = "z" || prep = "po" || prep = "na" then [Tensor[Atom "prepadjp"; Atom prep; Atom "postp"]] else [] in | |
362 | + Both,Plus(One :: postp @ (Xlist.map cases (fun case -> | |
363 | + Tensor[Atom "prepadjp"; Atom prep; Atom case]))) | |
364 | + | |
365 | +let render_connected_prepadjp prep cases = | |
366 | + let postp = if prep = "z" || prep = "po" || prep = "na" then [Tensor[Atom "prepadjp"; Atom prep; Atom "postp"]] else [] in | |
367 | + adjunct (postp @ (Xlist.map cases (fun case -> | |
368 | + Tensor[Atom "prepadjp"; Atom prep; Atom case]))) | |
369 | + | |
370 | +let render_compar prep = Both,Plus[One;Tensor[Atom "compar"; Atom prep; Top]] | |
371 | + | |
372 | +let render_connected_compar prep = adjunct [Tensor[Atom "compar"; Atom prep; Top]] | |
373 | + | |
374 | +let verb_adjuncts_simp = [ | |
375 | + Both, Plus[One;Tensor[Atom "advp"; Atom "pron"]]; | |
376 | + Both, Plus[One;Tensor[Atom "advp"; Atom "locat"]]; | |
377 | + Both, Plus[One;Tensor[Atom "advp"; Atom "abl"]]; | |
378 | + Both, Plus[One;Tensor[Atom "advp"; Atom "adl"]]; | |
379 | + Both, Plus[One;Tensor[Atom "advp"; Atom "perl"]]; | |
380 | + Both, Plus[One;Tensor[Atom "advp"; Atom "temp"]]; | |
381 | + Both, Plus[One;Tensor[Atom "advp"; Atom "dur"]]; | |
382 | + Both, Plus[One;Tensor[Atom "advp"; Atom "mod"]]; | |
383 | + Both, Plus[One;Tensor[Atom "np";Top;Atom "dat"; Top; Top];Tensor[Atom "ncp"; Top; Atom "dat"; Top; Top; Top; Top]]; | |
384 | + Both, Plus[One;Tensor[Atom "np";Top;Atom "inst"; Top; Top];Tensor[Atom "ncp"; Top; Atom "inst"; Top; Top; Top; Top]]; | |
385 | + Both, Plus[One;Tensor[Atom "date"];Tensor[Atom "day-lex"];Tensor[Atom "day-month"];Tensor[Atom "day"]]; | |
386 | + Forward, Plus[One;Tensor[Atom "cp";Top; Top]]; (* FIXME: to powinno być jako ostatnia lista argumentów *) | |
387 | + Both, Plus[One;Tensor[Atom "or"]]; | |
388 | + Both, Plus[One;Tensor[Atom "lex";Atom "się";Atom "qub"]]; | |
389 | + Both, Plus[One;Tensor[Atom "padvp"]]; | |
390 | +] | |
391 | + | |
392 | +let verb_connected_adjuncts_simp = [ | |
393 | + adjunct [Tensor[Atom "advp"; Atom "pron"]]; | |
394 | + adjunct [Tensor[Atom "advp"; Atom "locat"]]; | |
395 | + adjunct [Tensor[Atom "advp"; Atom "abl"]]; | |
396 | + adjunct [Tensor[Atom "advp"; Atom "adl"]]; | |
397 | + adjunct [Tensor[Atom "advp"; Atom "perl"]]; | |
398 | + adjunct [Tensor[Atom "advp"; Atom "temp"]]; | |
399 | + adjunct [Tensor[Atom "advp"; Atom "dur"]]; | |
400 | + adjunct [Tensor[Atom "advp"; Atom "mod"]]; | |
401 | + adjunct [Tensor[Atom "np";Top;Atom "dat"; Top; Top];Tensor[Atom "ncp"; Top; Atom "dat"; Top; Top; Top; Top]]; | |
402 | + adjunct [Tensor[Atom "np";Top;Atom "inst"; Top; Top];Tensor[Atom "ncp"; Top; Atom "inst"; Top; Top; Top; Top]]; | |
403 | + adjunct [Tensor[Atom "date"];Tensor[Atom "day-lex"];Tensor[Atom "day-month"];Tensor[Atom "day"]]; | |
404 | + adjunct_dir Forward_ [Tensor[Atom "cp";Top; Top]]; | |
405 | + adjunct [Tensor[Atom "or"]]; | |
406 | + adjunct [Tensor[Atom "lex";Atom "się";Atom "qub"]]; | |
407 | + adjunct_ce "3" [Tensor[Atom "padvp"]]; | |
408 | +] | |
409 | + | |
410 | +let proper_noun_adjuncts_simp = [ | |
411 | + Both, Plus[One;Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]]; | |
412 | + Forward, Plus[One;Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]]; | |
413 | + Backward, Maybe(Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]); | |
414 | + Forward, Plus[One;Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; | |
415 | +] | |
416 | + | |
417 | +let proper_noun_connected_adjuncts_simp = [ | |
418 | + adjunct [Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]]; | |
419 | + adjunct_dir Forward_ [Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]]; | |
420 | + adjunct_multi Backward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; | |
421 | + adjunct_dir Forward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; | |
422 | +] | |
423 | + | |
424 | +let common_noun_adjuncts_simp = [ | |
425 | + Both, Plus[One;Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]]; | |
426 | + Forward, Plus[One;Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]]; | |
427 | + Backward, Maybe(Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]); | |
428 | + Forward, Plus[One;Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; | |
429 | +] | |
430 | + | |
431 | +let common_noun_connected_adjuncts_simp = [ | |
432 | + adjunct [Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]]; | |
433 | + adjunct_dir Forward_ [Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]]; | |
434 | + adjunct_multi Backward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; | |
435 | + adjunct_dir Forward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; | |
436 | +] | |
437 | + | |
438 | +let measure_noun_adjuncts_simp = [ | |
439 | + Backward, Maybe(Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]); | |
440 | + Forward, Plus[One;Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; | |
441 | +] | |
442 | + | |
443 | +let measure_noun_connected_adjuncts_simp = [ | |
444 | + adjunct_multi Backward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; | |
445 | + adjunct_dir Forward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; | |
446 | +] | |
447 | + | |
448 | +let adj_adjuncts_simp = [ | |
449 | + Both, Plus[One;Tensor[Atom "advp"; Top]]; | |
450 | +] | |
451 | + | |
452 | +let adj_connected_adjuncts_simp = [ | |
453 | + adjunct [Tensor[Atom "advp"; Top]]; | |
454 | +] | |
455 | + | |
456 | +let adv_adjuncts_simp = [ | |
457 | + Both, Plus[One;Tensor[Atom "advp"; Top]]; | |
458 | + ] | |
459 | + | |
460 | +let adv_connected_adjuncts_simp = [ | |
461 | + adjunct [Tensor[Atom "advp"; Top]]; | |
462 | + ] | |
463 | + | |
464 | +let assing_prep_morfs = function | |
465 | + "po","postp" -> [ | |
466 | + LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "dat"; Atom "m1"]); | |
467 | + LCG(Tensor[Atom "adjp"; Top; Atom "postp"; Top])] | |
468 | + | "z","postp" -> [LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "nom"; Atom "f"])] | |
469 | + | "na","postp" -> [LCG(Tensor[Atom "advp"; Top])] | |
470 | + | _,case -> [ | |
471 | + LCG(Tensor[Atom "np"; Top; Atom case; Top; Top]); | |
472 | + LCG(Tensor[Atom "adjp"; Top; Atom case; Top])] | |
473 | + | |
474 | +let prep_morfs = [ | |
475 | + LCG(Tensor[Atom "np"; Top; Atom "case"; Top; Top]); | |
476 | + LCG(Tensor[Atom "adjp"; Top; Atom "case"; Top]); | |
477 | + LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "dat"; Atom "m1"]); | |
478 | + LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "nom"; Atom "f"]); | |
479 | + LCG(Tensor[Atom "advp"; Top]); | |
480 | + LCG(Tensor[Atom "year"]); | |
481 | + LCG(Tensor[Atom "hour-minute"]); | |
482 | + LCG(Tensor[Atom "day-month"]); | |
483 | + LCG(Tensor[Atom "hour"]); | |
484 | + LCG(Tensor[Atom "day"]); | |
485 | + LCG(Tensor[Atom "date"]); | |
486 | + ] | |
487 | + | |
488 | +let compar_morfs = [ | |
489 | + LCG(Tensor[Atom "np"; Top; Atom "case"; Top; Top]); | |
490 | + LCG(Tensor[Atom "adjp"; Top; Atom "case"; Top]); | |
491 | + LCG(Tensor[Atom "prepnp"; Top; Top]); | |
492 | + LCG(Tensor[Atom "prepadjp"; Top; Top]); | |
493 | + ] | |
... | ... |