Commit 233ef97530daf01385c95d087d36604b453d2063
merge
Showing
79 changed files
with
3569 additions
and
707 deletions
Too many changes to show.
To preserve performance only 54 of 79 files are displayed.
LCGlexicon/ENIAM_LCGlexicon.ml
... | ... | @@ -165,7 +165,7 @@ let make_rules x_flag filename = |
165 | 165 | dict_of_grammar lexicon |
166 | 166 | |
167 | 167 | let find_rules rules cats = |
168 | - let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith ("find_rules: unable to find rules for category " ^ cats.pos) in | |
168 | + let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith ("find_rules: unable to find rules for category '" ^ cats.pos ^ "' lemma='" ^ cats.lemma ^ "'") in | |
169 | 169 | (* Printf.printf "find_rules: %s %s |rules|=%d\n" cats.lemma cats.pos (Xlist.size rules); *) |
170 | 170 | let rules = try StringMap.find lex_rules cats.lemma @ rules with Not_found -> rules in |
171 | 171 | Xlist.fold rules [] (fun rules (selectors,syntax,semantics) -> |
... | ... | @@ -190,9 +190,12 @@ let assign_valence valence rules = |
190 | 190 | if ENIAM_LCGrenderer.count_avar "schema" syntax > 0 then |
191 | 191 | Xlist.fold valence l (fun l (selectors,schema) -> |
192 | 192 | try |
193 | + (* Printf.printf "selectors: %s\n" (string_of_selectors selectors); *) | |
194 | + (* Printf.printf "cats: %s\n%!" (string_of_cats cats); *) | |
193 | 195 | let cats = apply_selectors cats selectors in |
196 | + (* print_endline "passed"; *) | |
194 | 197 | (cats,(bracket,quant,ENIAM_LCGrenderer.substitute_schema "schema" schema syntax),semantics) :: l |
195 | - with Not_found -> l) | |
198 | + with Not_found -> ((*print_endline "rejected";*) l)) | |
196 | 199 | else (cats,(bracket,quant,syntax),semantics) :: l) |
197 | 200 | |
198 | 201 | type labels = { |
... | ... | @@ -231,10 +234,13 @@ let make_quantification e rules = |
231 | 234 | let make_node id orth lemma pos syntax weight cat_list is_raised = |
232 | 235 | let attrs = Xlist.fold cat_list [] (fun attrs -> function |
233 | 236 | | Lemma -> attrs |
237 | + | IncludeLemmata -> attrs | |
234 | 238 | | Pos -> attrs |
235 | 239 | | Pos2 -> attrs |
236 | 240 | | Cat -> ("CAT",SubstVar "cat") :: attrs |
237 | - | Proj -> ("PROJ",SubstVar "proj") :: attrs | |
241 | + | Coerced -> ("COERCED",SubstVar "coerced") :: attrs | |
242 | + | Role -> ("ROLE",SubstVar "role") :: attrs | |
243 | + | SNode -> ("NODE",SubstVar "node") :: attrs | |
238 | 244 | | Number -> ("NUM",SubstVar "number") :: attrs |
239 | 245 | | Case -> ("CASE",SubstVar "case") :: attrs |
240 | 246 | | Gender -> ("GEND",SubstVar "gender") :: attrs |
... | ... | @@ -251,16 +257,24 @@ let make_node id orth lemma pos syntax weight cat_list is_raised = |
251 | 257 | | Ctype -> ("CTYPE", SubstVar "ctype") :: attrs |
252 | 258 | | Mode -> ("MODE", SubstVar "mode") :: attrs |
253 | 259 | | Psem -> ("PSEM", SubstVar "psem") :: attrs |
260 | + | Icat -> attrs | |
254 | 261 | | Inumber -> attrs |
255 | 262 | | Igender -> attrs |
256 | 263 | | Iperson -> attrs |
257 | 264 | | Nperson -> attrs |
265 | + | Ncat -> attrs | |
258 | 266 | | Plemma -> attrs |
259 | 267 | | Unumber -> attrs |
260 | 268 | | Ucase -> attrs |
261 | 269 | | Ugender -> attrs |
262 | 270 | | Uperson -> attrs |
263 | - | Amode -> attrs) in | |
271 | + | Amode -> attrs | |
272 | + | Irole -> attrs | |
273 | + | Prole -> attrs | |
274 | + | Nrole -> attrs | |
275 | + | Inode -> attrs | |
276 | + | Pnode -> attrs | |
277 | + | Nnode -> attrs) in | |
264 | 278 | (* | s -> (string_of_selector s, Dot) :: attrs) in *) |
265 | 279 | (* | "lex" -> ("LEX",Val "+") :: attrs *) |
266 | 280 | (* | s -> failwith ("make_node: " ^ (string_of_selector s))) in *) |
... | ... | @@ -312,6 +326,7 @@ let create_entries rules id orth cats valence lex_entries = |
312 | 326 | (* variable_name_ref := []; *) |
313 | 327 | if cats.pos="interp" && cats.lemma="<clause>" then (BracketSet(Forward),Dot) :: l else |
314 | 328 | if cats.pos="interp" && cats.lemma="</clause>" then (BracketSet(Backward),Dot) :: l else |
329 | + if (cats.pos2="noun" || cats.pos2="verb" || cats.pos2="adj" || cats.pos2="adv") && cats.cat="X" && not !default_category_flag && cats.pos <> "aglt" then l else | |
315 | 330 | let e = get_labels () in |
316 | 331 | (* print_endline "create_entries 1"; *) |
317 | 332 | let rules = find_rules rules cats in |
... | ... |
LCGlexicon/ENIAM_LCGlexiconParser.ml
... | ... | @@ -95,6 +95,7 @@ let match_relation = function |
95 | 95 | |
96 | 96 | let rec split_mid i0 rev = function |
97 | 97 | [i,s] -> List.rev ((i,s) :: rev) |
98 | + | (i1,s) :: (i2,"|") :: (i3,"|") :: l -> raise (ParseError("split_mid", "duplicated delimeter found", i2)) | |
98 | 99 | | (i1,s) :: (i2,"|") :: l -> split_mid i2 ((i1,s) :: rev) l |
99 | 100 | | [] -> raise (ParseError("split_mid", "empty", i0)) |
100 | 101 | | (i,s) :: l -> raise (ParseError("split_mid", "delimiter not found: " ^ String.concat " " (s :: Xlist.map l snd), i)) |
... | ... |
LCGlexicon/ENIAM_LCGlexiconTypes.ml
... | ... | @@ -17,7 +17,7 @@ |
17 | 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | 18 | *) |
19 | 19 | |
20 | -type categories = {lemma: string; pos: string; pos2: string; cat: string; proj: string list; | |
20 | +type categories = {lemma: string; pos: string; pos2: string; cat: string; coerced: string list; roles: string list; snode: string list; | |
21 | 21 | numbers: string list; cases: string list; genders: string list; persons: string list; |
22 | 22 | grads: string list; praeps: string list; acms: string list; |
23 | 23 | aspects: string list; negations: string list; moods: string list; tenses: string list; |
... | ... | @@ -25,10 +25,12 @@ type categories = {lemma: string; pos: string; pos2: string; cat: string; proj: |
25 | 25 | } |
26 | 26 | |
27 | 27 | type selector = |
28 | - Lemma | (*NewLemma |*) Pos | Pos2 | Cat | Proj | Number | Case | Gender | Person | Grad | Praep | | |
28 | + Lemma | IncludeLemmata | (*NewLemma |*) Pos | Pos2 | Cat | Coerced | Role | SNode | | |
29 | + Number | Case | Gender | Person | Grad | Praep | | |
29 | 30 | Acm | Aspect | Negation | Mood | Tense | Nsyn | Nsem | Ctype | Mode | Psem | |
30 | - Inumber | Igender | Iperson | Nperson | Plemma | | |
31 | - Unumber | Ucase | Ugender | Uperson | Amode | |
31 | + Icat | Inumber | Igender | Iperson | Nperson | Ncat | Plemma | | |
32 | + Unumber | Ucase | Ugender | Uperson | Amode | | |
33 | + Irole | Prole | Nrole | Inode | Pnode | Nnode | |
32 | 34 | |
33 | 35 | module OrderedSelector = struct |
34 | 36 | type t = selector |
... | ... | @@ -73,12 +75,14 @@ type selector_relation = Eq | Neq (*| StrictEq*) |
73 | 75 | |
74 | 76 | *) |
75 | 77 | |
76 | -let empty_cats = {lemma=""; pos=""; pos2=""; cat="X"; proj=[]; | |
78 | +let empty_cats = {lemma=""; pos=""; pos2=""; cat="X"; coerced=[]; roles=[]; snode=[]; | |
77 | 79 | numbers=[]; cases=[]; genders=[]; persons=[]; |
78 | 80 | grads=[]; praeps=[]; acms=[]; aspects=[]; negations=[]; moods=[]; tenses=[]; |
79 | 81 | nsyn=[]; nsem=[]; modes=[]; psem=[]; |
80 | 82 | } |
81 | 83 | |
84 | +let default_category_flag = ref true | |
85 | + | |
82 | 86 | let resource_path = |
83 | 87 | try Sys.getenv "ENIAM_RESOURCE_PATH" |
84 | 88 | with Not_found -> |
... | ... | @@ -94,7 +98,7 @@ let data_path = |
94 | 98 | let rules_filename = resource_path ^ "/LCGlexicon/lexicon-pl.dic" |
95 | 99 | let user_lexicon_filename = data_path ^ "/lexicon.dic" |
96 | 100 | let user_cats_filename = data_path ^ "/senses.tab" |
97 | -let user_proj_filename = data_path ^ "/projections.tab" | |
101 | +let user_coerced_filename = data_path ^ "/coercions.tab" | |
98 | 102 | |
99 | 103 | let subst_uncountable_lexemes_filename = resource_path ^ "/LCGlexicon/subst_uncountable.dat" |
100 | 104 | let subst_uncountable_lexemes_filename2 = resource_path ^ "/LCGlexicon/subst_uncountable_stare.dat" |
... | ... |
LCGlexicon/ENIAM_LCGlexiconTypes_old.ml
0 → 100644
1 | +(* | |
2 | + * ENIAM_LCGlexicon is a library that provides LCG lexicon form Polish | |
3 | + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | + * | |
6 | + * This library is free software: you can redistribute it and/or modify | |
7 | + * it under the terms of the GNU Lesser General Public License as published by | |
8 | + * the Free Software Foundation, either version 3 of the License, or | |
9 | + * (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + * GNU Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public License | |
17 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | + *) | |
19 | + | |
20 | +type categories = {lemma: string; pos: string; pos2: string; cat: string; coerced: string list; | |
21 | + numbers: string list; cases: string list; genders: string list; persons: string list; | |
22 | + grads: string list; praeps: string list; acms: string list; | |
23 | + aspects: string list; negations: string list; moods: string list; tenses: string list; | |
24 | + nsyn: string list; nsem: string list; modes: string list; psem: string list; | |
25 | + } | |
26 | + | |
27 | +type selector = | |
28 | + Lemma | (*NewLemma |*) Pos | Pos2 | Cat | Coerced | Number | Case | Gender | Person | Grad | Praep | | |
29 | + Acm | Aspect | Negation | Mood | Tense | Nsyn | Nsem | Ctype | Mode | Psem | | |
30 | + Icat | Inumber | Igender | Iperson | Nperson | Ncat | Plemma | | |
31 | + Unumber | Ucase | Ugender | Uperson | Amode | |
32 | + | |
33 | +module OrderedSelector = struct | |
34 | + type t = selector | |
35 | + let compare = compare | |
36 | +end | |
37 | + | |
38 | +module SelectorMap=Xmap.Make(OrderedSelector) | |
39 | +module SelectorSet=Xset.Make(OrderedSelector) | |
40 | + | |
41 | +type rule = | |
42 | + Bracket | |
43 | + | Quant of (selector * ENIAM_LCGtypes.internal_grammar_symbol) list | |
44 | + | Raised of selector list | |
45 | + | Syntax of ENIAM_LCGtypes.grammar_symbol | |
46 | + | Sem of string | |
47 | + | |
48 | +type rule_sem = | |
49 | + BasicSem of selector list | |
50 | + | RaisedSem of selector list * selector list | |
51 | + | TermSem of selector list * string | |
52 | + | QuotSem of selector list | |
53 | + | InclusionSem of selector list | |
54 | + | ConjSem of selector list | |
55 | + | |
56 | +type selector_relation = Eq | Neq (*| StrictEq*) | |
57 | + | |
58 | +(* x="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jedną z wartości atrybutu x, reguła zostanie wykonana dla x z usuniętymi pozostałymi wartościami *) | |
59 | +(* x!="s" oznacza, że żeby reguła została użyta token musi mieć jako jedną z wartości atrybutu x symbol inny od "s", reguła zostanie wykonana dla x z usuniętą wartością "s" *) | |
60 | +(* x=="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jednyną z wartość atrybutu x *) | |
61 | + | |
62 | +(* wzajemne zależności między kategoriami (np między case i person w subst) są rozstrzygane w ENIAMcategories *) | |
63 | + | |
64 | +(* Basic oznacza że kwantyfikacja i term są generowane zgodnie ze standardowymi regułami: | |
65 | + - kwantyfikacja przebiega po wszystkich zdefiniowanych kategoriariach i wartościach wziętych z cats | |
66 | + - typ jest zadany bezpośrednio | |
67 | + - term tworzy wierzchołek w strukturze zależnościowej etykietowany wszystkimi zdefiniowanymi kategoriami | |
68 | + | |
69 | + Quant oznacza że typ i term są generowane zgodnie ze standardowymi regułami: | |
70 | + - kwantyfikacja jest zadana bezpośrednio | |
71 | + - typ jest zadany bezpośrednio | |
72 | + - term tworzy wierzchołek w strukturze zależnościowej etykietowany wszystkimi zdefiniowanymi kategoriami | |
73 | + | |
74 | +*) | |
75 | + | |
76 | +let empty_cats = {lemma=""; pos=""; pos2=""; cat="X"; coerced=[]; | |
77 | + numbers=[]; cases=[]; genders=[]; persons=[]; | |
78 | + grads=[]; praeps=[]; acms=[]; aspects=[]; negations=[]; moods=[]; tenses=[]; | |
79 | + nsyn=[]; nsem=[]; modes=[]; psem=[]; | |
80 | + } | |
81 | + | |
82 | +let default_category_flag = ref true | |
83 | + | |
84 | +let resource_path = | |
85 | + try Sys.getenv "ENIAM_RESOURCE_PATH" | |
86 | + with Not_found -> | |
87 | + if Sys.file_exists "/usr/share/eniam" then "/usr/share/eniam" else | |
88 | + if Sys.file_exists "/usr/local/share/eniam" then "/usr/local/share/eniam" else | |
89 | + if Sys.file_exists "resources" then "resources" else | |
90 | + failwith "resource directory does not exists" | |
91 | + | |
92 | +let data_path = | |
93 | + try Sys.getenv "ENIAM_USER_DATA_PATH" | |
94 | + with Not_found -> "data" | |
95 | + | |
96 | +let rules_filename = resource_path ^ "/LCGlexicon/lexicon-pl.dic" | |
97 | +let user_lexicon_filename = data_path ^ "/lexicon.dic" | |
98 | +let user_cats_filename = data_path ^ "/senses.tab" | |
99 | +let user_coerced_filename = data_path ^ "/coercions.tab" | |
100 | + | |
101 | +let subst_uncountable_lexemes_filename = resource_path ^ "/LCGlexicon/subst_uncountable.dat" | |
102 | +let subst_uncountable_lexemes_filename2 = resource_path ^ "/LCGlexicon/subst_uncountable_stare.dat" | |
103 | +let subst_container_lexemes_filename = resource_path ^ "/LCGlexicon/subst_container.dat" | |
104 | +let subst_numeral_lexemes_filename = resource_path ^ "/LCGlexicon/subst_numeral.dat" | |
105 | +let subst_time_lexemes_filename = resource_path ^ "/LCGlexicon/subst_time.dat" | |
106 | + | |
107 | +let adv_modes_filename = resource_path ^ "/Walenty/adv_modes.tab" | |
108 | +let num_nsems_filename = resource_path ^ "/LCGlexicon/num.tab" | |
... | ... |
LCGlexicon/ENIAM_LCGlexicon_old.ml
0 → 100644
1 | +(* | |
2 | + * ENIAM_LCGlexicon is a library that provides LCG lexicon form Polish | |
3 | + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | + * | |
6 | + * This library is free software: you can redistribute it and/or modify | |
7 | + * it under the terms of the GNU Lesser General Public License as published by | |
8 | + * the Free Software Foundation, either version 3 of the License, or | |
9 | + * (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + * GNU Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public License | |
17 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | + *) | |
19 | + | |
20 | +open Xstd | |
21 | +open ENIAM_LCGtypes | |
22 | +open ENIAM_LCGlexiconTypes | |
23 | +open ENIAMcategoriesPL | |
24 | + | |
25 | +let rec find_selector s = function | |
26 | + (t,Eq,x :: _) :: l -> if t = s then x else find_selector s l | |
27 | + | (t,_,_) :: l -> if t = s then failwith "find_selector 1" else find_selector s l | |
28 | + | [] -> failwith "find_selector 2" | |
29 | + | |
30 | +let rec get_syntax rev = function | |
31 | + Syntax syntax :: rule -> syntax, (List.rev rev) @ rule | |
32 | + | t :: rule -> get_syntax (t :: rev) rule | |
33 | + | [] -> failwith "get_syntax" | |
34 | + | |
35 | +let rec get_quant rev = function | |
36 | + Quant quant :: rule -> quant, (List.rev rev) @ rule | |
37 | + | t :: rule -> get_quant (t :: rev) rule | |
38 | + | [] -> [], List.rev rev | |
39 | + | |
40 | +let rec get_bracket rev = function | |
41 | + Bracket :: rule -> true, (List.rev rev) @ rule | |
42 | + | t :: rule -> get_bracket (t :: rev) rule | |
43 | + | [] -> false, List.rev rev | |
44 | + | |
45 | +let rec get_raised rev = function | |
46 | + Raised raised :: rule -> raised, (List.rev rev) @ rule | |
47 | + | t :: rule -> get_raised (t :: rev) rule | |
48 | + | [] -> raise Not_found | |
49 | + | |
50 | +let rec get_sem_term rev = function | |
51 | + Sem sem_term :: rule -> sem_term, (List.rev rev) @ rule | |
52 | + | t :: rule -> get_sem_term (t :: rev) rule | |
53 | + | [] -> raise Not_found | |
54 | + | |
55 | +let merge_quant pos_quants quants = | |
56 | + let map = Xlist.fold quants SelectorMap.empty (fun map (k,v) -> SelectorMap.add map k v) in | |
57 | + let l,map = Xlist.fold pos_quants ([],map) (fun (l,map) (cat,v) -> | |
58 | + if SelectorMap.mem map cat then (cat,SelectorMap.find map cat) :: l, SelectorMap.remove map cat | |
59 | + else (cat,v) :: l, map) in | |
60 | + List.rev (SelectorMap.fold map l (fun l cat v -> (cat,v) :: l)) | |
61 | + | |
62 | +let assign_quantifiers (selectors,rule,weight) = | |
63 | + let pos = find_selector Pos selectors in | |
64 | + let categories = | |
65 | + try StringMap.find pos_categories pos | |
66 | + with Not_found -> failwith ("assign_quantifiers: unknown part of speech " ^ pos) in | |
67 | + let categories = Xlist.map categories (fun s -> s,Top) in | |
68 | + let syntax,rule = get_syntax [] rule in | |
69 | + let quant,rule = get_quant [] rule in | |
70 | + let bracket,rule = get_bracket [] rule in | |
71 | + let quant = merge_quant categories quant in | |
72 | + selectors, (bracket,quant,syntax),(rule,weight) | |
73 | + | |
74 | +let rec check_quantifiers_int_rec (selectors,syntax) quants = function | |
75 | + Atom x -> () | |
76 | + | AVar "schema" -> () | |
77 | + | AVar x -> | |
78 | + if not (SelectorSet.mem quants (selector_of_string x)) | |
79 | + then failwith ("Variable '" ^ x ^ "' is not quantified in rule " ^ string_of_selectors selectors ^ ": " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax) | |
80 | + | With l -> Xlist.iter l (check_quantifiers_int_rec (selectors,syntax) quants) | |
81 | + | Zero -> () | |
82 | + | Top -> () | |
83 | + | |
84 | +let rec check_quantifiers_rec rule quants = function | |
85 | + Tensor l -> Xlist.iter l (check_quantifiers_int_rec rule quants) | |
86 | + | Plus l -> Xlist.iter l (check_quantifiers_rec rule quants) | |
87 | + | Imp(s,d,t) -> check_quantifiers_rec rule quants s; check_quantifiers_rec rule quants t | |
88 | + | One -> () | |
89 | + | ImpSet(s,l) -> check_quantifiers_rec rule quants s; Xlist.iter l (fun (_,t) -> check_quantifiers_rec rule quants t) | |
90 | + | Star s -> check_quantifiers_rec rule quants s | |
91 | + | Maybe s -> check_quantifiers_rec rule quants s | |
92 | + | _ -> failwith "check_quantifiers_rec" | |
93 | + | |
94 | +let check_quantifiers (selectors,(bracket,quant,syntax),_) = | |
95 | + let quants = Xlist.fold quant SelectorSet.empty (fun quants (q,_) -> SelectorSet.add quants q) in | |
96 | + check_quantifiers_rec (selectors,syntax) quants syntax | |
97 | + | |
98 | +let assign_semantics (selectors,(bracket,quant,syntax),(rule,weight)) = | |
99 | + let semantics = try | |
100 | + let raised,rule = get_raised [] rule in | |
101 | + if rule <> [] then failwith "assign_semantics 1" else | |
102 | + RaisedSem(Xlist.map quant fst, raised) | |
103 | + with Not_found -> (try | |
104 | + let term,rule = get_sem_term [] rule in | |
105 | + if rule <> [] then failwith "assign_semantics 2" else | |
106 | + TermSem(Xlist.map quant fst,term) | |
107 | + with Not_found -> BasicSem(Xlist.map quant fst)) in | |
108 | + selectors,(bracket,quant,syntax),(semantics,weight) | |
109 | + | |
110 | +let rec add_x_args_rec = function | |
111 | + Imp(s,d,t) -> Imp(add_x_args_rec s,d,t) | |
112 | + | ImpSet(s,l) -> ImpSet(add_x_args_rec s,l) | |
113 | + | Tensor[Atom "<conll_root>"] -> Tensor[Atom "<conll_root>"] | |
114 | + | Tensor l -> ImpSet(Tensor l,[Backward,Maybe(Tensor[Atom "X"]);Forward,Maybe(Tensor[Atom "X"])]) | |
115 | + | t -> failwith ("add_x_args_rec: " ^ ENIAM_LCGstringOf.grammar_symbol 0 t) | |
116 | + | |
117 | +let is_raised_semantics = function | |
118 | + RaisedSem _ -> true | |
119 | + | _ -> false | |
120 | + | |
121 | +let rec is_raised_arg = function | |
122 | + Imp _ -> true | |
123 | + | Tensor _ -> false | |
124 | + | Plus l -> Xlist.fold l false (fun b t -> is_raised_arg t || b) | |
125 | + | Maybe t -> is_raised_arg t | |
126 | + | One -> false | |
127 | + | t -> failwith ("is_raised_arg: " ^ ENIAM_LCGstringOf.grammar_symbol 0 t) | |
128 | + | |
129 | +let rec is_raised_syntax = function | |
130 | + Imp(s,d,t) -> is_raised_syntax s || is_raised_arg t | |
131 | + | ImpSet(s,l) -> is_raised_syntax s || Xlist.fold l false (fun b (_,t) -> is_raised_arg t || b) | |
132 | + | Tensor _ -> false | |
133 | + | t -> failwith ("is_raised_syntax: " ^ ENIAM_LCGstringOf.grammar_symbol 0 t) | |
134 | + | |
135 | + | |
136 | +let add_x_args (selectors,(bracket,quant,syntax),(semantics,weight)) = | |
137 | + if is_raised_syntax syntax then (selectors,(bracket,quant,syntax),(semantics,weight)) | |
138 | + else (selectors,(bracket,quant,add_x_args_rec syntax),(semantics,weight)) | |
139 | + | |
140 | +let rec extract_category pat rev = function | |
141 | + (cat,rel,v) :: l -> if cat = pat then rel,v,(List.rev rev @ l) else extract_category pat ((cat,rel,v) :: rev) l | |
142 | + | [] -> raise Not_found | |
143 | + | |
144 | +let dict_of_grammar grammar = | |
145 | + (* print_endline "dict_of_grammar"; *) | |
146 | + Xlist.fold grammar StringMap.empty (fun dict (selectors,(bracket,quant,syntax),semantics) -> | |
147 | + let pos_rel,poss,selectors = try extract_category Pos [] selectors with Not_found -> failwith "dict_of_grammar 1" in | |
148 | + let lemma_rel,lemmas,selectors = try extract_category Lemma [] selectors with Not_found -> Eq,[],selectors in | |
149 | + if pos_rel <> Eq || lemma_rel <> Eq then failwith "dict_of_grammar 2" else | |
150 | + let rule = selectors,(bracket,quant,syntax),semantics in | |
151 | + Xlist.fold poss dict (fun dict pos -> | |
152 | + let dict2,l = try StringMap.find dict pos with Not_found -> StringMap.empty,[] in | |
153 | + let dict2,l = | |
154 | + if lemmas = [] then dict2,rule :: l else | |
155 | + Xlist.fold lemmas dict2 (fun dict2 lemma -> | |
156 | + StringMap.add_inc dict2 lemma [rule] (fun l -> rule :: l)),l in | |
157 | + StringMap.add dict pos (dict2,l))) | |
158 | + | |
159 | +let make_rules x_flag filename = | |
160 | + let lexicon = ENIAM_LCGlexiconParser.load_lexicon filename in | |
161 | + let lexicon = List.rev (Xlist.rev_map lexicon assign_quantifiers) in | |
162 | + Xlist.iter lexicon check_quantifiers; | |
163 | + let lexicon = List.rev (Xlist.rev_map lexicon assign_semantics) in | |
164 | + let lexicon = if x_flag then List.rev (Xlist.rev_map lexicon add_x_args) else lexicon in | |
165 | + dict_of_grammar lexicon | |
166 | + | |
167 | +let find_rules rules cats = | |
168 | + let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith ("find_rules: unable to find rules for category '" ^ cats.pos ^ "' lemma='" ^ cats.lemma ^ "'") in | |
169 | + (* Printf.printf "find_rules: %s %s |rules|=%d\n" cats.lemma cats.pos (Xlist.size rules); *) | |
170 | + let rules = try StringMap.find lex_rules cats.lemma @ rules with Not_found -> rules in | |
171 | + Xlist.fold rules [] (fun rules (selectors,syntax,semantics) -> | |
172 | + try | |
173 | + let cats = apply_selectors cats selectors in | |
174 | + (cats,syntax,semantics) :: rules | |
175 | + with Not_found -> rules) | |
176 | + | |
177 | +let prepare_lex_entries rules lex_entries cats = | |
178 | + Xlist.fold lex_entries rules (fun rules (selectors,rule) -> | |
179 | + let selectors = (Pos,Eq,[cats.pos]) :: selectors in | |
180 | + let selectors,(bracket,quant,syntax),(rule,weight) = assign_quantifiers (selectors,[Syntax rule],0.) in | |
181 | + let selectors,(bracket,quant,syntax),(semantics,weight) = assign_semantics (selectors,(bracket,quant,syntax),(rule,weight)) in | |
182 | + try | |
183 | + let cats = apply_selectors cats selectors in | |
184 | + (cats,(bracket,quant,syntax),(semantics,weight)) :: rules | |
185 | + with Not_found -> rules) | |
186 | + | |
187 | +let assign_valence valence rules = | |
188 | + Xlist.fold rules [] (fun l (cats,(bracket,quant,syntax),semantics) -> | |
189 | + (* Printf.printf "%s %s |valence|=%d\n" cats.lemma cats.pos (Xlist.size valence); *) | |
190 | + if ENIAM_LCGrenderer.count_avar "schema" syntax > 0 then | |
191 | + Xlist.fold valence l (fun l (selectors,schema) -> | |
192 | + try | |
193 | + let cats = apply_selectors cats selectors in | |
194 | + (cats,(bracket,quant,ENIAM_LCGrenderer.substitute_schema "schema" schema syntax),semantics) :: l | |
195 | + with Not_found -> l) | |
196 | + else (cats,(bracket,quant,syntax),semantics) :: l) | |
197 | + | |
198 | +type labels = { | |
199 | + number: string; | |
200 | + case: string; | |
201 | + gender: string; | |
202 | + person: string; | |
203 | + aspect: string; | |
204 | +} | |
205 | + | |
206 | +let get_label e = function | |
207 | + Number -> e.number | |
208 | + | Case -> e.case | |
209 | + | Gender -> e.gender | |
210 | + | Person -> e.person | |
211 | + | Aspect -> e.aspect | |
212 | + | _ -> ENIAM_LCGreductions.get_variant_label () | |
213 | + | |
214 | +let get_labels () = { | |
215 | + number=ENIAM_LCGreductions.get_variant_label (); | |
216 | + case=ENIAM_LCGreductions.get_variant_label (); | |
217 | + gender=ENIAM_LCGreductions.get_variant_label (); | |
218 | + person=ENIAM_LCGreductions.get_variant_label (); | |
219 | + aspect=ENIAM_LCGreductions.get_variant_label (); | |
220 | +} | |
221 | + | |
222 | +let make_quantification e rules = | |
223 | + Xlist.map rules (fun (cats,(bracket,quant,syntax),semantics) -> | |
224 | + let syntax = Xlist.fold (List.rev quant) syntax (fun syntax (cat,t) -> | |
225 | + let t = if t = Top then ENIAM_LCGrenderer.make_quant_restriction (match_selector cats cat) else t in | |
226 | + let category = string_of_selector cat in | |
227 | + WithVar(category,t,get_label e cat,syntax)) in | |
228 | + let syntax = if bracket then ENIAM_LCGtypes.Bracket(true,true,syntax) else ENIAM_LCGtypes.Bracket(false,false,syntax) in | |
229 | + cats,syntax,semantics) | |
230 | + | |
231 | +let make_node id orth lemma pos syntax weight cat_list is_raised = | |
232 | + let attrs = Xlist.fold cat_list [] (fun attrs -> function | |
233 | + | Lemma -> attrs | |
234 | + | Pos -> attrs | |
235 | + | Pos2 -> attrs | |
236 | + | Cat -> ("CAT",SubstVar "cat") :: attrs | |
237 | + | Coerced -> ("COERCED",SubstVar "coerced") :: attrs | |
238 | + | Number -> ("NUM",SubstVar "number") :: attrs | |
239 | + | Case -> ("CASE",SubstVar "case") :: attrs | |
240 | + | Gender -> ("GEND",SubstVar "gender") :: attrs | |
241 | + | Person -> ("PERS",SubstVar "person") :: attrs | |
242 | + | Grad -> ("GRAD",SubstVar "grad") :: attrs | |
243 | + | Praep -> attrs | |
244 | + | Acm -> ("ACM",SubstVar "acm") :: attrs | |
245 | + | Aspect -> ("ASPECT", SubstVar "aspect") :: attrs | |
246 | + | Negation -> ("NEGATION",SubstVar "negation") :: attrs | |
247 | + | Mood -> ("MOOD", SubstVar "mood") :: attrs | |
248 | + | Tense -> ("TENSE", SubstVar "tense") :: attrs | |
249 | + | Nsyn -> ("NSYN", SubstVar "nsyn") :: attrs | |
250 | + | Nsem -> ("NSEM", SubstVar "nsem") :: attrs | |
251 | + | Ctype -> ("CTYPE", SubstVar "ctype") :: attrs | |
252 | + | Mode -> ("MODE", SubstVar "mode") :: attrs | |
253 | + | Psem -> ("PSEM", SubstVar "psem") :: attrs | |
254 | + | Icat -> attrs | |
255 | + | Inumber -> attrs | |
256 | + | Igender -> attrs | |
257 | + | Iperson -> attrs | |
258 | + | Nperson -> attrs | |
259 | + | Ncat -> attrs | |
260 | + | Plemma -> attrs | |
261 | + | Unumber -> attrs | |
262 | + | Ucase -> attrs | |
263 | + | Ugender -> attrs | |
264 | + | Uperson -> attrs | |
265 | + | Amode -> attrs) in | |
266 | + (* | s -> (string_of_selector s, Dot) :: attrs) in *) | |
267 | + (* | "lex" -> ("LEX",Val "+") :: attrs *) | |
268 | + (* | s -> failwith ("make_node: " ^ (string_of_selector s))) in *) | |
269 | + let symbol = if is_raised then | |
270 | + ENIAM_LCGrenderer.make_raised_symbol syntax | |
271 | + else ENIAM_LCGrenderer.make_symbol syntax in | |
272 | + {ENIAM_LCGrenderer.empty_node with | |
273 | + orth=orth; lemma=lemma; pos=pos; symbol=symbol; | |
274 | + weight=weight; id=id; attrs=List.rev attrs; args=Dot} | |
275 | + | |
276 | +let or_frame node = | |
277 | + (*Imp(Imp(Imp(Tensor[Atom "<root>"],Forward, | |
278 | + Tensor[Atom "</speaker>"]),Forward, | |
279 | + Imp(Tensor[Atom "ip"; Top; Top; Top],Forward,Tensor[Atom "or"])),Forward, | |
280 | + Tensor[Atom "or2"]),*) | |
281 | + (* Lambda("x",Lambda("y",Lambda("z",Node{node with gs=make_gs [] ["<root>"]; args=Tuple[ | |
282 | + Cut(SetAttr("AROLE",Val "Clause",SetAttr("GF",Gf CLAUSE,App(Var "y",Var "x"))))]}))) *) | |
283 | + VariantVar("lemma",Lambda("x",Lambda("y",Lambda("z",Node{node with args=Tuple[ | |
284 | + Cut(SetAttr("ARG_SYMBOL",Tuple[Val "TODO"],App(Var "y",Var "x")))]})))) | |
285 | + | |
286 | +let make_term id orth rules = | |
287 | + Xlist.map rules (fun (cats,syntax,(semantics,weight)) -> | |
288 | + ENIAM_LCGrenderer.reset_variable_names (); | |
289 | + ENIAM_LCGrenderer.add_variable_numbers (); | |
290 | + (* print_endline ("make_term 0: " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax); *) | |
291 | + match semantics with | |
292 | + BasicSem cat_list -> | |
293 | + let node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list false in | |
294 | + (* print_endline ("make_term 1: " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax); *) | |
295 | + let semantics = ENIAM_LCGrenderer.make_term node syntax in | |
296 | + ENIAM_LCGrenderer.simplify (syntax,semantics) | |
297 | + | RaisedSem(cat_list,outer_cat_list) -> | |
298 | + (* FIXME: jakie atrybuty powinien mieć outer node (w szczególności jaką wagę?) *) | |
299 | + let node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list true in | |
300 | + let outer_node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) outer_cat_list false in | |
301 | + (* print_endline ("make_term 2: " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax); *) | |
302 | + let semantics = ENIAM_LCGrenderer.make_raised_term node outer_node syntax in | |
303 | + ENIAM_LCGrenderer.simplify (syntax,semantics) | |
304 | + | TermSem(cat_list,"λxλyλz.NODE(yx,z)") -> | |
305 | + let node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list false in | |
306 | + (* print_endline ("make_term 3: " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax); *) | |
307 | + let semantics = or_frame node in | |
308 | + ENIAM_LCGrenderer.simplify (syntax,semantics) | |
309 | + | _ -> failwith "make_term: ni") | |
310 | + | |
311 | +let create_entries rules id orth cats valence lex_entries = | |
312 | + Xlist.fold cats [] (fun l cats -> | |
313 | + (* Printf.printf "create_entries: orth=%s lemma=%s pos=%s\n" orth cats.lemma cats.pos; *) | |
314 | + (* variable_name_ref := []; *) | |
315 | + if cats.pos="interp" && cats.lemma="<clause>" then (BracketSet(Forward),Dot) :: l else | |
316 | + if cats.pos="interp" && cats.lemma="</clause>" then (BracketSet(Backward),Dot) :: l else | |
317 | + if (cats.pos2="noun" || cats.pos2="verb" || cats.pos2="adj" || cats.pos2="adv") && cats.cat="X" && not !default_category_flag && cats.pos <> "aglt" then l else | |
318 | + let e = get_labels () in | |
319 | + (* print_endline "create_entries 1"; *) | |
320 | + let rules = find_rules rules cats in | |
321 | + let rules = prepare_lex_entries rules lex_entries cats in | |
322 | + (* Printf.printf "create_entries 2: %s %s |rules|=%d\n" cats.lemma cats.pos (Xlist.size rules); *) | |
323 | + let rules = assign_valence valence rules in | |
324 | + (* print_endline "create_entries 3"; *) | |
325 | + let rules = make_quantification e rules in | |
326 | + (* print_endline "create_entries 4"; *) | |
327 | + let rules = make_term id orth rules in | |
328 | + (* print_endline "create_entries 5"; *) | |
329 | + rules @ l) | |
... | ... |
LCGlexicon/ENIAMcategoriesPL.ml
... | ... | @@ -29,18 +29,27 @@ let all_persons = ["pri";"sec";"ter"] |
29 | 29 | |
30 | 30 | let selector_values = Xlist.fold [ |
31 | 31 | Lemma, []; |
32 | - Pos, ["subst";"depr";"ppron12";"ppron3";"siebie";"prep";"num";"intnum"; | |
32 | + IncludeLemmata, []; | |
33 | + Pos, ["subst";"depr";"ppron12";"ppron3";"siebie";"prep";"fixed";"num";"numcomp";"intnum"; | |
33 | 34 | "realnum";"intnum-interval";"realnum-interval";"symbol";"ordnum"; |
34 | 35 | "date";"date-interval";"hour-minute";"hour";"hour-minute-interval"; |
35 | 36 | "hour-interval";"year";"year-interval";"day";"day-interval";"day-month"; |
36 | 37 | "day-month-interval";"month-interval";"roman";"roman-interval";"roman-ordnum"; |
37 | - "match-result";"url";"email";"obj-id";"building-number";"adj";"adjc";"adjp";"adja"; | |
38 | + "match-result";"url";"email";"phone-number";"postal-code";"obj-id";"building-number";"list-item";"adj";"adjc";"adjp";"adja"; | |
38 | 39 | "adv";"ger";"pact";"ppas";"fin";"bedzie";"praet";"winien";"impt"; |
39 | - "imps";"pred";"aglt";"inf";"pcon";"pant";"qub";"part";"comp";"conj";"interj"; | |
40 | + "imps";"pred";"aglt";"inf";"pcon";"pant";"pacta";"qub";"part";"comp";"conj";"interj"; | |
40 | 41 | "sinterj";"burk";"interp";"xxx";"unk";"html-tag";"apron";"compar"]; |
41 | 42 | Pos2, []; |
42 | 43 | Cat, []; |
43 | - Proj, []; | |
44 | + Coerced, []; | |
45 | + Role, []; | |
46 | + Irole, []; | |
47 | + Prole, []; | |
48 | + Nrole, []; | |
49 | + SNode, ["concept";"sit";"dot";"relations"]; | |
50 | + Inode, ["concept";"sit";"dot";"relations"]; | |
51 | + Pnode, ["concept";"sit";"dot";"relations"]; | |
52 | + Nnode, ["concept";"sit";"dot";"relations"]; | |
44 | 53 | Number, all_numbers; |
45 | 54 | Case, "postp" :: "pred" :: all_cases; |
46 | 55 | Gender, all_genders; |
... | ... | @@ -124,7 +133,7 @@ let noun_type proper lemma pos = |
124 | 133 | if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then "pronoun" else |
125 | 134 | if pos = "symbol" || pos = "date" || pos = "date-interval" || pos = "hour" || pos = "hour-minute" || pos = "hour-interval" || pos = "hour-minute-interval" || |
126 | 135 | pos = "year" || pos = "year-interval" || pos = "day" || pos = "day-interval" || pos = "day-month" || pos = "day-month-interval" || |
127 | - pos = "match-result" || pos = "month-interval" || pos = "roman" || pos = "roman-interval" || pos = "url" || pos = "email" || pos = "obj-id" || pos = "building-number" || pos = "date" then "proper" else | |
136 | + pos = "match-result" || pos = "month-interval" || pos = "roman" || pos = "roman-interval" || pos = "url" || pos = "email" || pos = "phone-number" || pos = "postal-code" || pos = "obj-id" || pos = "building-number" || pos = "date" then "proper" else | |
128 | 137 | if StringSet.mem subst_pronoun_lexemes lemma then "pronoun" else |
129 | 138 | "common" in |
130 | 139 | let nsem = |
... | ... | @@ -150,7 +159,9 @@ let num_nsem lemma = |
150 | 159 | |
151 | 160 | let part_set = StringSet.of_list ["się"; "nie"; "by"; "niech"; "niechaj"; "niechże"; "niechajże"; "czy"; "gdyby"] |
152 | 161 | |
153 | -let clarify_categories proper cat proj = function | |
162 | +let snode = SelectorMap.find selector_values SNode | |
163 | + | |
164 | +let clarify_categories proper cat coerced (*snode*) = function | |
154 | 165 | lemma,"subst",[numbers;cases;genders] -> |
155 | 166 | let numbers = expand_numbers numbers in |
156 | 167 | let cases = expand_cases cases in |
... | ... | @@ -158,9 +169,9 @@ let clarify_categories proper cat proj = function |
158 | 169 | let cases,voc = split_voc cases in |
159 | 170 | let nsyn,nsem = noun_type proper lemma "subst" in |
160 | 171 | (if cases = [] then [] else |
161 | - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; proj=proj; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ | |
172 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ | |
162 | 173 | (if voc = [] then [] else |
163 | - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; proj=proj; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | |
174 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | |
164 | 175 | | lemma,"subst",[numbers;cases;genders;_] -> |
165 | 176 | let numbers = expand_numbers numbers in |
166 | 177 | let cases = expand_cases cases in |
... | ... | @@ -168,9 +179,9 @@ let clarify_categories proper cat proj = function |
168 | 179 | let cases,voc = split_voc cases in |
169 | 180 | let nsyn,nsem = noun_type proper lemma "subst" in |
170 | 181 | (if cases = [] then [] else |
171 | - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; proj=proj; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ | |
182 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ | |
172 | 183 | (if voc = [] then [] else |
173 | - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; proj=proj; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | |
184 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | |
174 | 185 | | lemma,"depr",[numbers;cases;genders] -> |
175 | 186 | let numbers = expand_numbers numbers in |
176 | 187 | let cases = expand_cases cases in |
... | ... | @@ -178,29 +189,29 @@ let clarify_categories proper cat proj = function |
178 | 189 | let cases,voc = split_voc cases in |
179 | 190 | let nsyn,nsem = noun_type proper lemma "depr" in |
180 | 191 | (if cases = [] then [] else |
181 | - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; proj=proj; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ | |
192 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ | |
182 | 193 | (if voc = [] then [] else |
183 | - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; proj=proj; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | |
194 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | |
184 | 195 | | lemma,"ppron12",[numbers;cases;genders;persons] -> |
185 | 196 | let numbers = expand_numbers numbers in |
186 | 197 | let cases = expand_cases cases in |
187 | 198 | let genders = expand_genders genders in |
188 | - [{empty_cats with lemma=lemma; pos="ppron12"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons}] | |
199 | + [{empty_cats with lemma=lemma; pos="ppron12"; pos2="pron"; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=persons}] | |
189 | 200 | | lemma,"ppron12",[numbers;cases;genders;persons;akcs] -> |
190 | 201 | let numbers = expand_numbers numbers in |
191 | 202 | let cases = expand_cases cases in |
192 | 203 | let genders = expand_genders genders in |
193 | - [{empty_cats with lemma=lemma; pos="ppron12"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons}] | |
204 | + [{empty_cats with lemma=lemma; pos="ppron12"; pos2="pron"; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=persons}] | |
194 | 205 | | lemma,"ppron3",[numbers;cases;genders;persons] -> |
195 | 206 | let numbers = expand_numbers numbers in |
196 | 207 | let cases = expand_cases cases in |
197 | 208 | let genders = expand_genders genders in |
198 | - [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=["praep-npraep"]}] | |
209 | + [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=["praep-npraep"]}] | |
199 | 210 | | lemma,"ppron3",[numbers;cases;genders;persons;akcs] -> |
200 | 211 | let numbers = expand_numbers numbers in |
201 | 212 | let cases = expand_cases cases in |
202 | 213 | let genders = expand_genders genders in |
203 | - [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=["praep-npraep"]}] | |
214 | + [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=["praep-npraep"]}] | |
204 | 215 | | lemma,"ppron3",[numbers;cases;genders;persons;akcs;praep] -> |
205 | 216 | let numbers = expand_numbers numbers in |
206 | 217 | let cases = expand_cases cases in |
... | ... | @@ -209,207 +220,216 @@ let clarify_categories proper cat proj = function |
209 | 220 | ["praep";"npraep"] -> ["praep-npraep"] |
210 | 221 | | ["npraep";"praep"] -> ["praep-npraep"] |
211 | 222 | | _ -> praep in |
212 | - [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=praep}] | |
223 | + [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=praep}] | |
213 | 224 | | lemma,"siebie",[cases] -> (* FIXME: czy tu określać numbers genders persons? *) |
214 | 225 | let cases = expand_cases cases in |
215 | - [{empty_cats with lemma=lemma; pos="siebie"; pos2="pron"; numbers=all_numbers; cases=cases; genders=all_genders; persons=["ter"]}] | |
226 | + [{empty_cats with lemma=lemma; pos="siebie"; pos2="pron"; snode=snode; numbers=all_numbers; cases=cases; genders=all_genders; persons=["ter"]}] | |
216 | 227 | | lemma,"prep",[cases;woks] -> |
217 | 228 | if StringSet.mem compar_lexemes lemma then |
218 | 229 | [{empty_cats with lemma=lemma; pos="compar"; pos2="prep"}] else |
219 | 230 | let cases = expand_cases cases in |
220 | - [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; cases=cases; psem=["sem";"nosem"]}] | |
231 | + [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; snode=snode; cases=cases; psem=["sem";"nosem"]}] | |
221 | 232 | | lemma,"prep",[cases] -> |
222 | 233 | if StringSet.mem compar_lexemes lemma then |
223 | 234 | [{empty_cats with lemma=lemma; pos="compar"; pos2="prep"}] else |
224 | 235 | let cases = expand_cases cases in |
225 | - [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; cases=cases; psem=["sem";"nosem"]}] | |
236 | + [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; snode=snode; cases=cases; psem=["sem";"nosem"]}] | |
226 | 237 | | lemma,"num",[numbers;cases;genders;acms] -> |
227 | 238 | let numbers = expand_numbers numbers in |
228 | 239 | let cases = expand_cases cases in |
229 | 240 | let genders = expand_genders genders in |
230 | 241 | let nsem = num_nsem lemma in |
231 | - [{empty_cats with lemma=lemma; pos="num"; pos2="num"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms; nsem=nsem}] | |
242 | + [{empty_cats with lemma=lemma; pos="num"; pos2="num"; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms; nsem=nsem}] | |
232 | 243 | | lemma,"num",[numbers;cases;genders;acms;_] -> |
233 | 244 | let numbers = expand_numbers numbers in |
234 | 245 | let cases = expand_cases cases in |
235 | 246 | let genders = expand_genders genders in |
236 | 247 | let nsem = num_nsem lemma in |
237 | - [{empty_cats with lemma=lemma; pos="num"; pos2="num"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms; nsem=nsem}] | |
238 | - | lemma,"numc",[] -> [] | |
248 | + [{empty_cats with lemma=lemma; pos="num"; pos2="num"; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms; nsem=nsem}] | |
249 | + | lemma,"numcomp",[] -> [{empty_cats with lemma=lemma; pos="numcomp"; pos2="numcomp"; snode=snode}] | |
239 | 250 | | lemma,"intnum",[] -> |
240 | 251 | let numbers,acms = |
241 | 252 | if lemma = "1" || lemma = "-1" then ["sg"],["congr"] else |
242 | 253 | let s = String.get lemma (String.length lemma - 1) in |
243 | 254 | ["pl"],if s = '2' || s = '3' || s = '4' then ["rec";"congr"] else ["rec"] in |
244 | - [{empty_cats with lemma=lemma; pos="intnum"; pos2="num"; numbers=numbers; cases=all_cases; genders=all_genders; persons=["ter"]; acms=acms; nsem=["count"]}] | |
255 | + [{empty_cats with lemma=lemma; pos="intnum"; pos2="num"; snode=snode; numbers=numbers; cases=all_cases; genders=all_genders; persons=["ter"]; acms=acms; nsem=["count"]}] | |
245 | 256 | | lemma,"realnum",[] -> |
246 | - [{empty_cats with lemma=lemma; pos="realnum"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}] | |
257 | + [{empty_cats with lemma=lemma; pos="realnum"; pos2="num"; snode=snode; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}] | |
247 | 258 | | lemma,"intnum-interval",[] -> |
248 | - [{empty_cats with lemma=lemma; pos="intnum-interval"; pos2="num"; numbers=["pl"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec";"congr"]; nsem=["count"]}] | |
259 | + [{empty_cats with lemma=lemma; pos="intnum-interval"; pos2="num"; snode=snode; numbers=["pl"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec";"congr"]; nsem=["count"]}] | |
249 | 260 | | lemma,"realnum-interval",[] -> |
250 | - [{empty_cats with lemma=lemma; pos="realnum-interval"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}] | |
261 | + [{empty_cats with lemma=lemma; pos="realnum-interval"; pos2="num"; snode=snode; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}] | |
251 | 262 | | lemma,"symbol",[] -> |
252 | - [{empty_cats with lemma=lemma; pos="symbol"; pos2="noun"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]}] | |
263 | + [{empty_cats with lemma=lemma; pos="symbol"; pos2="noun"; snode=snode; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]}] | |
253 | 264 | | lemma,"ordnum",[] -> |
254 | - [{empty_cats with lemma=lemma; pos="ordnum"; pos2="adj"; numbers=all_numbers; cases=all_cases; genders=all_genders; grads=["pos"]}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *) | |
265 | + [{empty_cats with lemma=lemma; pos="ordnum"; pos2="adj"; snode=snode; numbers=all_numbers; cases=all_cases; genders=all_genders; grads=["pos"]}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *) | |
255 | 266 | | lemma,"date",[] -> |
256 | 267 | let nsyn,nsem = noun_type proper lemma "date" in |
257 | - [{empty_cats with lemma=lemma; pos="date"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
268 | + [{empty_cats with lemma=lemma; pos="date"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
258 | 269 | | lemma,"date-interval",[] -> |
259 | 270 | let nsyn,nsem = noun_type proper lemma "date-interval" in |
260 | - [{empty_cats with lemma=lemma; pos="date-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
271 | + [{empty_cats with lemma=lemma; pos="date-interval"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
261 | 272 | | lemma,"hour-minute",[] -> |
262 | 273 | let nsyn,nsem = noun_type proper lemma "hour-minute" in |
263 | - [{empty_cats with lemma=lemma; pos="hour-minute"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
274 | + [{empty_cats with lemma=lemma; pos="hour-minute"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
264 | 275 | | lemma,"hour",[] -> |
265 | 276 | let nsyn,nsem = noun_type proper lemma "hour" in |
266 | - [{empty_cats with lemma=lemma; pos="hour"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
277 | + [{empty_cats with lemma=lemma; pos="hour"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
267 | 278 | | lemma,"hour-minute-interval",[] -> |
268 | 279 | let nsyn,nsem = noun_type proper lemma "hour-minute-interval" in |
269 | - [{empty_cats with lemma=lemma; pos="hour-minute-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
280 | + [{empty_cats with lemma=lemma; pos="hour-minute-interval"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
270 | 281 | | lemma,"hour-interval",[] -> |
271 | 282 | let nsyn,nsem = noun_type proper lemma "hour-interval" in |
272 | - [{empty_cats with lemma=lemma; pos="hour-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
283 | + [{empty_cats with lemma=lemma; pos="hour-interval"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
273 | 284 | | lemma,"year",[] -> |
274 | 285 | let nsyn,nsem = noun_type proper lemma "year" in |
275 | - [{empty_cats with lemma=lemma; pos="year"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
286 | + [{empty_cats with lemma=lemma; pos="year"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
276 | 287 | | lemma,"year-interval",[] -> |
277 | 288 | let nsyn,nsem = noun_type proper lemma "year-interval" in |
278 | - [{empty_cats with lemma=lemma; pos="year-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
289 | + [{empty_cats with lemma=lemma; pos="year-interval"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
279 | 290 | | lemma,"day",[] -> |
280 | 291 | let nsyn,nsem = noun_type proper lemma "day" in |
281 | - [{empty_cats with lemma=lemma; pos="day"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
292 | + [{empty_cats with lemma=lemma; pos="day"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
282 | 293 | | lemma,"day-interval",[] -> |
283 | 294 | let nsyn,nsem = noun_type proper lemma "day-interval" in |
284 | - [{empty_cats with lemma=lemma; pos="day-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
295 | + [{empty_cats with lemma=lemma; pos="day-interval"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
285 | 296 | | lemma,"day-month",[] -> |
286 | 297 | let nsyn,nsem = noun_type proper lemma "day-month" in |
287 | - [{empty_cats with lemma=lemma; pos="day-month"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
298 | + [{empty_cats with lemma=lemma; pos="day-month"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
288 | 299 | | lemma,"day-month-interval",[] -> |
289 | 300 | let nsyn,nsem = noun_type proper lemma "day-month-interval" in |
290 | - [{empty_cats with lemma=lemma; pos="day-month-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
301 | + [{empty_cats with lemma=lemma; pos="day-month-interval"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
291 | 302 | | lemma,"month-interval",[] -> |
292 | 303 | let nsyn,nsem = noun_type proper lemma "month-interval" in |
293 | - [{empty_cats with lemma=lemma; pos="month-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
304 | + [{empty_cats with lemma=lemma; pos="month-interval"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
294 | 305 | | lemma,"roman",[] -> |
295 | 306 | let nsyn,nsem = noun_type proper lemma "roman" in |
296 | - [{empty_cats with lemma=lemma; pos="roman-ordnum"; pos2="adj"; numbers=all_numbers; cases=all_cases; genders=all_genders; grads=["pos"]}; | |
297 | - {empty_cats with lemma=lemma; pos="roman"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
307 | + [{empty_cats with lemma=lemma; pos="roman-ordnum"; pos2="adj"; snode=snode; numbers=all_numbers; cases=all_cases; genders=all_genders; grads=["pos"]}; | |
308 | + {empty_cats with lemma=lemma; pos="roman"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
298 | 309 | | lemma,"roman-interval",[] -> |
299 | 310 | let nsyn,nsem = noun_type proper lemma "roman-interval" in |
300 | - [{empty_cats with lemma=lemma; pos="roman-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
311 | + [{empty_cats with lemma=lemma; pos="roman-interval"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
301 | 312 | | lemma,"match-result",[] -> |
302 | 313 | let nsyn,nsem = noun_type proper lemma "match-result" in |
303 | - [{empty_cats with lemma=lemma; pos="match-result"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
314 | + [{empty_cats with lemma=lemma; pos="match-result"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
304 | 315 | | lemma,"url",[] -> |
305 | 316 | let nsyn,nsem = noun_type proper lemma "url" in |
306 | - [{empty_cats with lemma=lemma; pos="url"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
317 | + [{empty_cats with lemma=lemma; pos="url"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
307 | 318 | | lemma,"email",[] -> |
308 | 319 | let nsyn,nsem = noun_type proper lemma "email" in |
309 | - [{empty_cats with lemma=lemma; pos="email"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
320 | + [{empty_cats with lemma=lemma; pos="email"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
321 | + | lemma,"phone-number",[] -> | |
322 | + let nsyn,nsem = noun_type proper lemma "phone-number" in | |
323 | + [{empty_cats with lemma=lemma; pos="phone-number"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
324 | + | lemma,"postal-code",[] -> | |
325 | + let nsyn,nsem = noun_type proper lemma "postal-code" in | |
326 | + [{empty_cats with lemma=lemma; pos="postal-code"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
310 | 327 | | lemma,"obj-id",[] -> |
311 | 328 | let nsyn,nsem = noun_type proper lemma "obj-id" in |
312 | - [{empty_cats with lemma=lemma; pos="obj-id"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
329 | + [{empty_cats with lemma=lemma; pos="obj-id"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
313 | 330 | | lemma,"building-number",[] -> |
314 | 331 | let nsyn,nsem = noun_type proper lemma "building-number" in |
315 | - [{empty_cats with lemma=lemma; pos="building-number"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
332 | + [{empty_cats with lemma=lemma; pos="building-number"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}] | |
333 | + | lemma,"fixed",[] -> [{empty_cats with lemma=lemma; pos="fixed"; pos2="fixed"; snode=snode}] | |
316 | 334 | | lemma,"adj",[numbers;cases;genders;grads] -> (* FIXME: adjsyn *) |
317 | 335 | let numbers = expand_numbers numbers in |
318 | 336 | let cases = expand_cases cases in |
319 | 337 | let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in |
320 | 338 | let genders = expand_genders genders in |
321 | 339 | let pos,pos2 = if StringSet.mem adj_pronoun_lexemes lemma then "apron","pron" else "adj","adj" in |
322 | - [{empty_cats with lemma=lemma; pos=pos; pos2=pos2; cat=cat; proj=proj; numbers=numbers; cases=cases; genders=genders; grads=grads}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *) | |
340 | + [{empty_cats with lemma=lemma; pos=pos; pos2=pos2; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; grads=grads}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *) | |
323 | 341 | | lemma,"adjc",[] -> |
324 | - [{empty_cats with lemma=lemma; pos="adjc"; pos2="adj"; cat=cat; proj=proj; numbers=["sg"]; cases=["pred"]; genders=["m1";"m2";"m3"]; grads=["pos"]}] | |
342 | + [{empty_cats with lemma=lemma; pos="adjc"; pos2="adj"; cat=cat; coerced=coerced; snode=snode; numbers=["sg"]; cases=["pred"]; genders=["m1";"m2";"m3"]; grads=["pos"]}] | |
325 | 343 | | lemma,"adjp",[] -> |
326 | - [{empty_cats with lemma=lemma; pos="adjp"; pos2="adj"; cat=cat; proj=proj; numbers=all_numbers; cases=["postp"]; genders=all_genders; grads=["pos"]}] | |
327 | - | lemma,"adja",[] -> [{empty_cats with lemma=lemma; cat=cat; proj=proj; pos="adja"; pos2="adja"}] | |
328 | - | lemma,"adv",[grads] -> [{empty_cats with lemma=lemma; cat=cat; proj=proj; pos="adv"; pos2="adv"; grads=grads; modes=adv_mode lemma}] | |
329 | - | lemma,"adv",[] -> [{empty_cats with lemma=lemma; cat=cat; proj=proj; pos="adv"; pos2="adv"; grads=["pos"]; modes=adv_mode lemma}] | |
344 | + [{empty_cats with lemma=lemma; pos="adjp"; pos2="adj"; cat=cat; coerced=coerced; snode=snode; numbers=all_numbers; cases=["postp"]; genders=all_genders; grads=["pos"]}] | |
345 | + | lemma,"adja",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; snode=snode; pos="adja"; pos2="adja"}] | |
346 | + | lemma,"adv",[grads] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; snode=snode; pos="adv"; pos2="adv"; grads=grads; modes=adv_mode lemma}] | |
347 | + | lemma,"adv",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; snode=snode; pos="adv"; pos2="adv"; grads=["pos"]; modes=adv_mode lemma}] | |
330 | 348 | | lemma,"ger",[numbers;cases;genders;aspects;negations] -> |
331 | 349 | let numbers = expand_numbers numbers in |
332 | 350 | let cases = expand_cases cases in |
333 | 351 | let genders = expand_genders genders in |
334 | - [{empty_cats with lemma=lemma; pos="ger"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; aspects=aspects; negations=negations}] (* FIXME: kwestia osoby przy voc *) | |
352 | + [{empty_cats with lemma=lemma; pos="ger"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; aspects=aspects; negations=negations}] (* FIXME: kwestia osoby przy voc *) | |
335 | 353 | | lemma,"pact",[numbers;cases;genders;aspects;negations] -> |
336 | 354 | let numbers = expand_numbers numbers in |
337 | 355 | let cases = expand_cases cases in |
338 | 356 | let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in |
339 | 357 | let genders = expand_genders genders in |
340 | - [{empty_cats with lemma=lemma; pos="pact"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}] | |
358 | + [{empty_cats with lemma=lemma; pos="pact"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}] | |
341 | 359 | | lemma,"ppas",[numbers;cases;genders;aspects;negations] -> |
342 | 360 | let numbers = expand_numbers numbers in |
343 | 361 | let cases = expand_cases cases in |
344 | 362 | let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in |
345 | 363 | let genders = expand_genders genders in |
346 | - [{empty_cats with lemma=lemma; pos="ppas"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}] | |
364 | + [{empty_cats with lemma=lemma; pos="ppas"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}] | |
347 | 365 | | lemma,"fin",[numbers;persons;aspects] -> (* FIXME: genders bez przymnogich *) |
348 | 366 | let numbers = expand_numbers numbers in |
349 | 367 | let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in |
350 | - let cats = {empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=all_genders; persons=persons; negations=["aff"; "neg"]; moods=["indicative"]} in | |
368 | + let cats = {empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; negations=["aff"; "neg"]; moods=["indicative"]} in | |
351 | 369 | (Xlist.map aspects (function |
352 | 370 | "imperf" -> {cats with aspects=["imperf"]; tenses=["pres"]} |
353 | 371 | | "perf" -> {cats with aspects=["perf"]; tenses=["fut"]} |
354 | 372 | | _ -> failwith "clarify_categories")) @ |
355 | 373 | (if persons2 = [] then [] else |
356 | - [{empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]) | |
374 | + [{empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]) | |
357 | 375 | | lemma,"bedzie",[numbers;persons;aspects] -> |
358 | 376 | let numbers = expand_numbers numbers in |
359 | 377 | let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in |
360 | - [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] @ | |
378 | + [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] @ | |
361 | 379 | (if persons2 = [] then [] else |
362 | - [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]) | |
380 | + [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]) | |
363 | 381 | | lemma,"praet",[numbers;genders;aspects;nagl] -> |
364 | 382 | let numbers = expand_numbers numbers in |
365 | 383 | let genders = expand_genders genders in |
366 | - [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @ | |
384 | + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @ | |
367 | 385 | (if Xlist.mem aspects "imperf" then |
368 | - [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] | |
386 | + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] | |
369 | 387 | else []) |
370 | 388 | | lemma,"praet",[numbers;genders;aspects] -> |
371 | 389 | let numbers = expand_numbers numbers in |
372 | 390 | let genders = expand_genders genders in |
373 | - [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @ | |
391 | + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @ | |
374 | 392 | (if Xlist.mem aspects "imperf" then |
375 | - [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] | |
393 | + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] | |
376 | 394 | else []) |
377 | 395 | | lemma,"winien",[numbers;genders;aspects] -> |
378 | 396 | let numbers = expand_numbers numbers in |
379 | 397 | let genders = expand_genders genders in |
380 | - [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["pres"]}; | |
381 | - {empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] @ | |
398 | + [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["pres"]}; | |
399 | + {empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] @ | |
382 | 400 | (if Xlist.mem aspects "imperf" then |
383 | - [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] | |
401 | + [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] | |
384 | 402 | else []) |
385 | 403 | | lemma,"impt",[numbers;persons;aspects] -> |
386 | 404 | let numbers = expand_numbers numbers in |
387 | - [{empty_cats with lemma=lemma; pos="impt"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}] | |
405 | + [{empty_cats with lemma=lemma; pos="impt"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}] | |
388 | 406 | | lemma,"imps",[aspects] -> |
389 | - [{empty_cats with lemma=lemma; pos="imps"; pos2="verb"; cat=cat; proj=proj; numbers=all_numbers; genders=all_genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] | |
407 | + [{empty_cats with lemma=lemma; pos="imps"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=all_numbers; genders=all_genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] | |
390 | 408 | | lemma,"pred",[] -> (* FIXME: czy predykatyw zawsze jest niedokonany? *) |
391 | - [{empty_cats with lemma=lemma; pos="pred"; pos2="verb"; cat=cat; proj=proj; numbers=["sg"]; genders=[(*"n2"*)"n"]; persons=["ter"]; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["pres";"past";"fut"]}] | |
409 | + [{empty_cats with lemma=lemma; pos="pred"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=["sg"]; genders=[(*"n2"*)"n"]; persons=["ter"]; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["pres";"past";"fut"]}] | |
392 | 410 | | lemma,"aglt",[numbers;persons;aspects;wok] -> |
393 | 411 | let numbers = expand_numbers numbers in |
394 | - [{empty_cats with lemma=lemma; pos="aglt"; pos2="verb"; numbers=numbers; persons=persons; aspects=aspects}] | |
395 | - | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; cat=cat; proj=proj; aspects=aspects; negations=["aff"; "neg"]}] | |
396 | - | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="pcon"; pos2="verb"; cat=cat; proj=proj; aspects=aspects; negations=["aff"; "neg"]}] | |
397 | - | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="pant"; pos2="verb"; cat=cat; proj=proj; aspects=aspects; negations=["aff"; "neg"]}] | |
412 | + [{empty_cats with lemma=lemma; pos="aglt"; pos2="verb"; snode=snode; numbers=numbers; persons=persons; aspects=aspects}] | |
413 | + | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] | |
414 | + | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="pcon"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] | |
415 | + | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="pant"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] | |
416 | + | lemma,"pacta",[] -> [{empty_cats with lemma=lemma; pos="pacta"; pos2="verb"; cat=cat; coerced=coerced; snode=snode}] | |
398 | 417 | | lemma,"qub",[] -> |
399 | - if StringSet.mem part_set lemma then [{empty_cats with lemma=lemma; pos="part"; pos2="qub"}] | |
400 | - else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"}] | |
401 | - | lemma,"comp",[] -> [{empty_cats with lemma=lemma; pos="comp"; pos2="comp"}] | |
402 | - | lemma,"conj",[] -> [{empty_cats with lemma=lemma; pos="conj"; pos2="conj"}] | |
403 | - | lemma,"interj",[] -> [{empty_cats with lemma=lemma; pos="interj"; pos2="interj"}] | |
404 | - | lemma,"sinterj",[] -> [{empty_cats with lemma=lemma; pos="sinterj"; pos2="sinterj"}] | |
405 | - | lemma,"burk",[] -> [{empty_cats with lemma=lemma; pos="burk"; pos2="burk"}] | |
406 | - | ",","interp",[] -> [{empty_cats with lemma=","; pos="conj"; pos2="conj"}] | |
407 | - | lemma,"interp",[] -> [{empty_cats with lemma=lemma; pos="interp"; pos2="interp"}] | |
418 | + if StringSet.mem part_set lemma then [{empty_cats with lemma=lemma; pos="part"; pos2="qub"; snode=snode}] | |
419 | + else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"; cat=cat; snode=snode}] | |
420 | + | lemma,"comp",[] -> [{empty_cats with lemma=lemma; pos="comp"; pos2="comp"; snode=snode}] | |
421 | + | lemma,"conj",[] -> [{empty_cats with lemma=lemma; pos="conj"; pos2="conj"; snode=snode}] | |
422 | + | lemma,"interj",[] -> [{empty_cats with lemma=lemma; pos="interj"; pos2="interj"; cat=cat; coerced=coerced; snode=snode}] | |
423 | + | lemma,"sinterj",[] -> [{empty_cats with lemma=lemma; pos="sinterj"; pos2="sinterj"; (*cat=cat; coerced=coerced;*) snode=snode}] | |
424 | + | lemma,"burk",[] -> [{empty_cats with lemma=lemma; pos="burk"; pos2="burk"; snode=snode}] | |
425 | + | ",","interp",[] -> [{empty_cats with lemma=","; pos="conj"; pos2="conj"; snode=snode}] | |
426 | + | lemma,"interp",[] -> [{empty_cats with lemma=lemma; pos="interp"; pos2="interp"; snode=snode}] | |
408 | 427 | | lemma,"unk",[] -> |
409 | - [{empty_cats with lemma=lemma; pos="unk"; pos2="noun"; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}] | |
428 | + [{empty_cats with lemma=lemma; pos="unk"; pos2="noun"; snode=snode; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}] | |
410 | 429 | | lemma,"xxx",[] -> |
411 | - [{empty_cats with lemma=lemma; pos="xxx"; pos2="noun"; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}] | |
412 | - | lemma,"html-tag",[] -> [{empty_cats with lemma=lemma; pos="html-tag"; pos2="html-tag"}] | |
430 | + [{empty_cats with lemma=lemma; pos="xxx"; pos2="noun"; snode=snode; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}] | |
431 | + | lemma,"html-tag",[] -> [{empty_cats with lemma=lemma; pos="html-tag"; pos2="html-tag"; snode=snode}] | |
432 | + | lemma,"list-item",[] -> [{empty_cats with lemma=lemma; pos="list-item"; pos2="list-item"; snode=snode}] | |
413 | 433 | | lemma,c,l -> failwith ("clarify_categories: " ^ lemma ^ ":" ^ c ^ ":" ^ (String.concat ":" (Xlist.map l (String.concat ".")))) |
414 | 434 | |
415 | 435 | (* FIXME: przenieść gdzieś indziej *) |
... | ... | @@ -421,19 +441,28 @@ let clarify_categories proper cat proj = function |
421 | 441 | | _ -> [] *) |
422 | 442 | |
423 | 443 | let selector_names = StringSet.of_list [ |
424 | - "lemma";"pos";"pos2";"cat";"proj";"number";"case";"gender";"person";"grad"; | |
444 | + "lemma";"pos";"pos2";"cat";"coerced";"role";"irole";"prole";"nrole";"node";"inode";"pnode";"nnode";"number";"case";"gender";"person";"grad"; | |
425 | 445 | "praep";"acm";"aspect";"negation";"mood";"tense";"nsyn";"nsem";"ctype";"mode";"psem"; |
426 | - "inumber";"igender";"iperson";"nperson";"plemma"; | |
446 | + "icat";"inumber";"igender";"iperson";"nperson";"ncat";"plemma"; | |
427 | 447 | "unumber";"ucase";"ugender";"uperson";"amode"] |
428 | 448 | |
429 | 449 | |
430 | 450 | let string_of_selector = function |
431 | 451 | Lemma -> "lemma" |
452 | + | IncludeLemmata -> "include-lemmata" | |
432 | 453 | (* | NewLemma -> "newlemma" *) |
433 | 454 | | Pos -> "pos" |
434 | 455 | | Pos2 -> "pos2" |
435 | 456 | | Cat -> "cat" |
436 | - | Proj -> "proj" | |
457 | + | Coerced -> "coerced" | |
458 | + | Role -> "role" | |
459 | + | Irole -> "irole" | |
460 | + | Prole -> "prole" | |
461 | + | Nrole -> "nrole" | |
462 | + | SNode -> "node" | |
463 | + | Inode -> "inode" | |
464 | + | Pnode -> "pnode" | |
465 | + | Nnode -> "nnode" | |
437 | 466 | | Number -> "number" |
438 | 467 | | Case -> "case" |
439 | 468 | | Gender -> "gender" |
... | ... | @@ -450,10 +479,12 @@ let string_of_selector = function |
450 | 479 | | Ctype -> "ctype" |
451 | 480 | | Mode -> "mode" |
452 | 481 | | Psem -> "psem" |
482 | + | Icat -> "icat" | |
453 | 483 | | Inumber -> "inumber" |
454 | 484 | | Igender -> "igender" |
455 | 485 | | Iperson -> "iperson" |
456 | 486 | | Nperson -> "nperson" |
487 | + | Ncat -> "ncat" | |
457 | 488 | | Plemma -> "plemma" |
458 | 489 | | Unumber -> "unumber" |
459 | 490 | | Ucase -> "ucase" |
... | ... | @@ -468,11 +499,20 @@ let string_of_selectors selectors = |
468 | 499 | |
469 | 500 | let selector_of_string = function |
470 | 501 | "lemma" -> Lemma |
502 | + | "include-lemmata" -> IncludeLemmata | |
471 | 503 | (* | NewLemma -> "newlemma" *) |
472 | 504 | | "pos" -> Pos |
473 | 505 | | "pos2" -> Pos2 |
474 | 506 | | "cat" -> Cat |
475 | - | "proj" -> Proj | |
507 | + | "coerced" -> Coerced | |
508 | + | "role" -> Role | |
509 | + | "irole" -> Irole | |
510 | + | "prole" -> Prole | |
511 | + | "nrole" -> Nrole | |
512 | + | "node" -> SNode | |
513 | + | "inode" -> Inode | |
514 | + | "pnode" -> Pnode | |
515 | + | "nnode" -> Nnode | |
476 | 516 | | "number" -> Number |
477 | 517 | | "case" -> Case |
478 | 518 | | "gender" -> Gender |
... | ... | @@ -489,10 +529,12 @@ let selector_of_string = function |
489 | 529 | | "ctype" -> Ctype |
490 | 530 | | "mode" -> Mode |
491 | 531 | | "psem" -> Psem |
532 | + | "icat" -> Icat | |
492 | 533 | | "inumber" -> Inumber |
493 | 534 | | "igender" -> Igender |
494 | 535 | | "iperson" -> Iperson |
495 | 536 | | "nperson" -> Nperson |
537 | + | "ncat" -> Ncat | |
496 | 538 | | "plemma" -> Plemma |
497 | 539 | | "unumber" -> Unumber |
498 | 540 | | "ucase" -> Ucase |
... | ... | @@ -506,7 +548,9 @@ let match_selector cats = function |
506 | 548 | (* | NewLemma -> [] *) |
507 | 549 | | Pos -> [cats.pos] |
508 | 550 | | Cat -> [cats.cat] |
509 | - | Proj -> cats.proj | |
551 | + | Coerced -> cats.coerced | |
552 | + | Role -> cats.roles | |
553 | + | SNode -> cats.snode | |
510 | 554 | | Number -> cats.numbers |
511 | 555 | | Case -> cats.cases |
512 | 556 | | Gender -> cats.genders |
... | ... | @@ -543,7 +587,9 @@ let set_selector cats vals = function |
543 | 587 | | Lemma -> (match vals with [v] -> {cats with lemma=v} | _ -> failwith "set_selector: Lemma") |
544 | 588 | | Pos -> (match vals with [v] -> {cats with pos=v} | _ -> failwith "set_selector: Pos") |
545 | 589 | | Cat -> (match vals with [v] -> {cats with cat=v} | _ -> failwith "set_selector: Cat") |
546 | - | Proj -> {cats with proj=vals} | |
590 | + | Coerced -> {cats with coerced=vals} | |
591 | + | Role -> {cats with roles=vals} | |
592 | + | SNode -> {cats with snode=vals} | |
547 | 593 | | c -> failwith ("set_selector: " ^ string_of_selector c) |
548 | 594 | |
549 | 595 | let rec apply_selectors cats = function |
... | ... | @@ -558,70 +604,84 @@ let rec apply_selectors cats = function |
558 | 604 | apply_selectors (set_selector cats (StringSet.to_list vals) sel) l |
559 | 605 | |
560 | 606 | let pos_categories = Xlist.fold [ |
561 | - "subst",[Lemma;Cat;Proj;Number;Case;Gender;Person;Nsyn;Nsem;]; | |
562 | - "depr",[Lemma;Cat;Proj;Number;Case;Gender;Person;Nsyn;Nsem;]; | |
563 | - "ppron12",[Lemma;Number;Case;Gender;Person;]; | |
564 | - "ppron3",[Lemma;Number;Case;Gender;Person;Praep;]; | |
565 | - "siebie",[Lemma;Number;Case;Gender;Person;]; | |
566 | - "prep",[Lemma;Cat;Proj;Psem;Case;]; | |
567 | - "compar",[Lemma;Cat;Proj;Case;]; | |
568 | - "num",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
569 | - "intnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
570 | - "realnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
571 | - "intnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
572 | - "realnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
573 | - "symbol",[Lemma;Number;Case;Gender;Person;]; | |
574 | - "ordnum",[Lemma;Number;Case;Gender;Grad;]; | |
575 | - "date",[Lemma;Nsyn;Nsem;]; | |
576 | - "date-interval",[Lemma;Nsyn;Nsem;]; | |
577 | - "hour-minute",[Lemma;Nsyn;Nsem;]; | |
578 | - "hour",[Lemma;Nsyn;Nsem;]; | |
579 | - "hour-minute-interval",[Lemma;Nsyn;Nsem;]; | |
580 | - "hour-interval",[Lemma;Nsyn;Nsem;]; | |
581 | - "year",[Lemma;Nsyn;Nsem;]; | |
582 | - "year-interval",[Lemma;Nsyn;Nsem;]; | |
583 | - "day",[Lemma;Nsyn;Nsem;]; | |
584 | - "day-interval",[Lemma;Nsyn;Nsem;]; | |
585 | - "day-month",[Lemma;Nsyn;Nsem;]; | |
586 | - "day-month-interval",[Lemma;Nsyn;Nsem;]; | |
587 | - "month-interval",[Lemma;Nsyn;Nsem;]; | |
588 | - "roman-ordnum",[Lemma;Number;Case;Gender;Grad;]; | |
589 | - "roman",[Lemma;Nsyn;Nsem;]; | |
590 | - "roman-interval",[Lemma;Nsyn;Nsem;]; | |
591 | - "match-result",[Lemma;Nsyn;Nsem;]; | |
592 | - "url",[Lemma;Nsyn;Nsem;]; | |
593 | - "email",[Lemma;Nsyn;Nsem;]; | |
594 | - "obj-id",[Lemma;Nsyn;Nsem;]; | |
595 | - "building-number",[Lemma;Nsyn;Nsem;]; | |
596 | - "adj",[Lemma;Cat;Proj;Number;Case;Gender;Grad;]; | |
597 | - "adjc",[Lemma;Cat;Proj;Number;Case;Gender;Grad;]; | |
598 | - "adjp",[Lemma;Cat;Proj;Number;Case;Gender;Grad;]; | |
599 | - "apron",[Lemma;Number;Case;Gender;Grad;]; | |
600 | - "adja",[Lemma;Cat;Proj;]; | |
601 | - "adv",[Lemma;Cat;Proj;Grad;Mode];(* ctype *) | |
602 | - "ger",[Lemma;(*NewLemma;*)Cat;Proj;Number;Case;Gender;Person;Aspect;Negation;]; | |
603 | - "pact",[Lemma;(*NewLemma;*)Cat;Proj;Number;Case;Gender;Aspect;Negation;]; | |
604 | - "ppas",[Lemma;(*NewLemma;*)Cat;Proj;Number;Case;Gender;Aspect;Negation;]; | |
605 | - "fin",[Lemma;(*NewLemma;*)Cat;Proj;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
606 | - "bedzie",[Lemma;(*NewLemma;*)Cat;Proj;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
607 | - "praet",[Lemma;(*NewLemma;*)Cat;Proj;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
608 | - "winien",[Lemma;(*NewLemma;*)Cat;Proj;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
609 | - "impt",[Lemma;(*NewLemma;*)Cat;Proj;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
610 | - "imps",[Lemma;(*NewLemma;*)Cat;Proj;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
611 | - "pred",[Lemma;(*NewLemma;*)Cat;Proj;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
612 | - "aglt",[Lemma;Number;Person;Aspect;]; | |
613 | - "inf",[Lemma;(*NewLemma;*)Cat;Proj;Aspect;Negation;]; | |
614 | - "pcon",[Lemma;(*NewLemma;*)Cat;Proj;Aspect;Negation;]; | |
615 | - "pant",[Lemma;(*NewLemma;*)Cat;Proj;Aspect;Negation;]; | |
616 | - "qub",[Lemma;]; | |
617 | - "part",[Lemma;]; | |
618 | - "comp",[Lemma;];(* ctype *) | |
619 | - "conj",[Lemma;];(* ctype *) | |
620 | - "interj",[Lemma;]; | |
621 | - "sinterj",[Lemma;]; | |
622 | - "burk",[Lemma;]; | |
623 | - "interp",[Lemma;]; | |
624 | - "unk",[Lemma;Number;Case;Gender;Person;]; | |
625 | - "xxx",[Lemma;Number;Case;Gender;Person;]; | |
626 | - "html-tag",[Lemma;]; | |
607 | + "subst",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Person;Nsyn;Nsem;]; | |
608 | + "depr",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Person;Nsyn;Nsem;]; | |
609 | + "ppron12",[Lemma;SNode;Number;Case;Gender;Person;]; | |
610 | + "ppron3",[Lemma;SNode;Number;Case;Gender;Person;Praep;]; | |
611 | + "siebie",[Lemma;SNode;Number;Case;Gender;Person;]; | |
612 | + "prep",[Lemma;Cat;Coerced;Role;SNode;Psem;Case;]; | |
613 | + "compar",[Lemma;Cat;Coerced;Role;SNode;Case;]; | |
614 | + "num",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;]; | |
615 | + "numcomp",[Lemma;SNode]; | |
616 | + "intnum",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;]; | |
617 | + "realnum",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;]; | |
618 | + "intnum-interval",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;]; | |
619 | + "realnum-interval",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;]; | |
620 | + "symbol",[Lemma;SNode;Number;Case;Gender;Person;]; | |
621 | + "ordnum",[Lemma;SNode;Number;Case;Gender;Grad;]; | |
622 | + "date",[Lemma;SNode;Nsyn;Nsem;]; | |
623 | + "date-interval",[Lemma;SNode;Nsyn;Nsem;]; | |
624 | + "hour-minute",[Lemma;SNode;Nsyn;Nsem;]; | |
625 | + "hour",[Lemma;SNode;Nsyn;Nsem;]; | |
626 | + "hour-minute-interval",[Lemma;SNode;Nsyn;Nsem;]; | |
627 | + "hour-interval",[Lemma;SNode;Nsyn;Nsem;]; | |
628 | + "year",[Lemma;SNode;Nsyn;Nsem;]; | |
629 | + "year-interval",[Lemma;SNode;Nsyn;Nsem;]; | |
630 | + "day",[Lemma;SNode;Nsyn;Nsem;]; | |
631 | + "day-interval",[Lemma;SNode;Nsyn;Nsem;]; | |
632 | + "day-month",[Lemma;SNode;Nsyn;Nsem;]; | |
633 | + "day-month-interval",[Lemma;SNode;Nsyn;Nsem;]; | |
634 | + "month-interval",[Lemma;SNode;Nsyn;Nsem;]; | |
635 | + "roman-ordnum",[Lemma;SNode;Number;Case;Gender;Grad;]; | |
636 | + "roman",[Lemma;SNode;Nsyn;Nsem;]; | |
637 | + "roman-interval",[Lemma;SNode;Nsyn;Nsem;]; | |
638 | + "match-result",[Lemma;SNode;Nsyn;Nsem;]; | |
639 | + "url",[Lemma;SNode;Nsyn;Nsem;]; | |
640 | + "email",[Lemma;SNode;Nsyn;Nsem;]; | |
641 | + "phone-number",[Lemma;SNode;Nsyn;Nsem;]; | |
642 | + "postal-code",[Lemma;SNode;Nsyn;Nsem;]; | |
643 | + "obj-id",[Lemma;SNode;Nsyn;Nsem;]; | |
644 | + "building-number",[Lemma;SNode;Nsyn;Nsem;]; | |
645 | + "fixed",[Lemma;SNode;]; | |
646 | + "adj",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Grad;]; | |
647 | + "adjc",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Grad;]; | |
648 | + "adjp",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Grad;]; | |
649 | + "apron",[Lemma;Cat;Role;SNode;Number;Case;Gender;Grad;]; | |
650 | + "adja",[Lemma;Cat;Coerced;Role;SNode;]; | |
651 | + "adv",[Lemma;Cat;Coerced;Role;SNode;Grad;Mode];(* ctype *) | |
652 | + "ger",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Case;Gender;Person;Aspect;Negation;]; | |
653 | + "pact",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Case;Gender;Aspect;Negation;]; | |
654 | + "ppas",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Case;Gender;Aspect;Negation;]; | |
655 | + "fin",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
656 | + "bedzie",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
657 | + "praet",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
658 | + "winien",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
659 | + "impt",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
660 | + "imps",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
661 | + "pred",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
662 | + "aglt",[Lemma;SNode;Number;Person;Aspect;]; | |
663 | + "inf",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; | |
664 | + "pcon",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; | |
665 | + "pant",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; | |
666 | + "pacta",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;]; | |
667 | + "qub",[Lemma;Cat;Role;SNode;]; | |
668 | + "part",[Lemma;SNode]; | |
669 | + "comp",[Lemma;SNode;];(* ctype *) | |
670 | + "conj",[Lemma;SNode;];(* ctype *) | |
671 | + "interj",[Lemma;Cat;Coerced;Role;SNode;]; | |
672 | + "sinterj",[Lemma;Cat;Coerced;Role;SNode;]; | |
673 | + "burk",[Lemma;SNode;]; | |
674 | + "interp",[Lemma;SNode;]; | |
675 | + "unk",[Lemma;SNode;Number;Case;Gender;Person;]; | |
676 | + "xxx",[Lemma;SNode;Number;Case;Gender;Person;]; | |
677 | + "html-tag",[Lemma;SNode;]; | |
678 | + "list-item",[Lemma;SNode;]; | |
627 | 679 | ] StringMap.empty (fun map (k,l) -> StringMap.add map k l) |
680 | + | |
681 | +let string_of_cats cats = | |
682 | + String.concat ", " (SelectorMap.fold selector_values [] (fun l sel _ -> | |
683 | + try | |
684 | + let s = String.concat "|" (match_selector cats sel) in | |
685 | + if s = "" then l else | |
686 | + (string_of_selector sel ^ "=" ^ s) :: l | |
687 | + with _ -> l)) | |
... | ... |
LCGlexicon/ENIAMcategoriesPL_old.ml
0 → 100644
1 | +(* | |
2 | + * ENIAM_LCGlexicon is a library that provides LCG lexicon form Polish | |
3 | + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | + * | |
6 | + * This library is free software: you can redistribute it and/or modify | |
7 | + * it under the terms of the GNU Lesser General Public License as published by | |
8 | + * the Free Software Foundation, either version 3 of the License, or | |
9 | + * (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + * GNU Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public License | |
17 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | + *) | |
19 | + | |
20 | +open ENIAM_LCGlexiconTypes | |
21 | +open Xstd | |
22 | + | |
23 | +let all_numbers = ["sg";"pl"] | |
24 | +let all_cases = ["nom";"gen";"dat";"acc";"inst";"loc";"voc"] | |
25 | +(* let all_genders = ["m1";"m2";"m3";"f";"n1";"n2";"p1";"p2";"p3"] *) | |
26 | +let all_genders = ["m1";"m2";"m3";"f";"n"] | |
27 | +let all_persons = ["pri";"sec";"ter"] | |
28 | +(* FIXME: zamiast wszystkich możliwych wartości można używać Zero gdy nie ma uzgodnienia *) | |
29 | + | |
30 | +let selector_values = Xlist.fold [ | |
31 | + Lemma, []; | |
32 | + Pos, ["subst";"depr";"ppron12";"ppron3";"siebie";"prep";"fixed";"num";"numcomp";"intnum"; | |
33 | + "realnum";"intnum-interval";"realnum-interval";"symbol";"ordnum"; | |
34 | + "date";"date-interval";"hour-minute";"hour";"hour-minute-interval"; | |
35 | + "hour-interval";"year";"year-interval";"day";"day-interval";"day-month"; | |
36 | + "day-month-interval";"month-interval";"roman";"roman-interval";"roman-ordnum"; | |
37 | + "match-result";"url";"email";"phone-number";"postal-code";"obj-id";"building-number";"list-item";"adj";"adjc";"adjp";"adja"; | |
38 | + "adv";"ger";"pact";"ppas";"fin";"bedzie";"praet";"winien";"impt"; | |
39 | + "imps";"pred";"aglt";"inf";"pcon";"pant";"qub";"part";"comp";"conj";"interj"; | |
40 | + "sinterj";"burk";"interp";"xxx";"unk";"html-tag";"apron";"compar"]; | |
41 | + Pos2, []; | |
42 | + Cat, []; | |
43 | + Coerced, []; | |
44 | + Number, all_numbers; | |
45 | + Case, "postp" :: "pred" :: all_cases; | |
46 | + Gender, all_genders; | |
47 | + Person, all_persons; | |
48 | + Grad, ["pos";"com";"sup"]; | |
49 | + Praep, ["praep";"npraep";"praep-npraep"]; | |
50 | + Acm, ["congr";"rec"]; | |
51 | + Ctype, ["int";"rel";"sub";"coord"]; | |
52 | + Mode, ["abl";"adl";"locat";"perl";"dur";"temp";"mod"]; | |
53 | + Aspect, ["perf";"imperf"]; | |
54 | + Negation, ["neg";"aff"]; | |
55 | + Mood, ["indicative";"imperative";"conditional"]; | |
56 | + Tense, ["past";"pres";"fut"]; | |
57 | + Nsyn, ["proper";"pronoun";"common"]; | |
58 | + Nsem, ["count";"time";"mass";"measure"]; | |
59 | + Psem, ["sem";"nosem"]; | |
60 | + Ucase, all_cases; | |
61 | +] SelectorMap.empty (fun map (selector,vals) -> SelectorMap.add map selector vals) | |
62 | + | |
63 | + | |
64 | +let expand_numbers numbers = | |
65 | + if Xlist.mem numbers "_" then all_numbers else numbers | |
66 | + | |
67 | +let expand_genders genders = | |
68 | + if Xlist.mem genders "_" then all_genders else genders | |
69 | + | |
70 | +let expand_cases cases = | |
71 | + if Xlist.mem cases "_" || Xlist.mem cases "$C" then all_cases else cases | |
72 | + | |
73 | +let expand_akcs akcs = | |
74 | + if Xlist.mem akcs "_" then ["akc";"nakc"] else akcs | |
75 | + | |
76 | +let split_voc cases = | |
77 | + Xlist.fold cases ([],[]) (fun (cases,voc) -> function | |
78 | + "voc" -> cases, "voc" :: voc | |
79 | + | s -> s :: cases, voc) | |
80 | + | |
81 | +let load_subst_data filename _ = | |
82 | + StringSet.of_list (File.load_lines filename) | |
83 | + | |
84 | +let subst_uncountable_lexemes = ref StringSet.empty | |
85 | +let subst_uncountable_lexemes2 = ref StringSet.empty | |
86 | +let subst_container_lexemes = ref StringSet.empty | |
87 | +let subst_numeral_lexemes = ref StringSet.empty | |
88 | +let subst_time_lexemes = ref StringSet.empty | |
89 | + | |
90 | +let subst_pronoun_lexemes = StringSet.of_list ["co"; "kto"; "cokolwiek"; "ktokolwiek"; "nic"; "nikt"; "coś"; "ktoś"; "to"] | |
91 | +let adj_pronoun_lexemes = StringSet.of_list ["czyj"; "jaki"; "który"; "jakiś"; "ten"; "taki"] | |
92 | +let compar_lexemes = StringSet.of_list ["jak"; "jako"; "niż"; "niczym"; "niby"; "co"; "zamiast"] | |
93 | + | |
94 | +(* let adj_quant_lexemes = StringSet.of_list ["każdy"; "wszelki"; "wszystek"; "żaden"; "jakiś"; "pewien"; "niektóry"; "jedyny"; "sam"] *) | |
95 | + | |
96 | +let load_adv_modes filename adv_modes = | |
97 | + File.fold_tab filename adv_modes (fun adv_modes -> function | |
98 | + [adv;mode] -> StringMap.add_inc adv_modes adv [mode] (fun l -> mode :: l) | |
99 | + | _ -> failwith "load_adv_modes") | |
100 | + | |
101 | +let load_num_nsems filename num_nsems = | |
102 | + File.fold_tab filename num_nsems (fun num_nsems -> function | |
103 | + lemma :: _ :: nsems :: _ -> | |
104 | + Xlist.fold (Xstring.split "," nsems) num_nsems (fun num_nsems nsem -> | |
105 | + StringMap.add_inc num_nsems lemma [nsem] (fun l -> nsem :: l)) | |
106 | + | _ -> failwith "load_num_nsems") | |
107 | + | |
108 | +let adv_modes = ref (StringMap.empty : string list StringMap.t) | |
109 | +let num_nsems = ref (StringMap.empty : string list StringMap.t) | |
110 | + | |
111 | +let initialize () = | |
112 | + subst_uncountable_lexemes := File.catch_no_file (load_subst_data subst_uncountable_lexemes_filename) StringSet.empty; | |
113 | + subst_uncountable_lexemes2 := File.catch_no_file (load_subst_data subst_uncountable_lexemes_filename2) StringSet.empty; | |
114 | + subst_container_lexemes := File.catch_no_file (load_subst_data subst_container_lexemes_filename) StringSet.empty; | |
115 | + subst_numeral_lexemes := File.catch_no_file (load_subst_data subst_numeral_lexemes_filename) StringSet.empty; | |
116 | + subst_time_lexemes := File.catch_no_file (load_subst_data subst_time_lexemes_filename) StringSet.empty; | |
117 | + adv_modes := File.catch_no_file (load_adv_modes adv_modes_filename) StringMap.empty; | |
118 | + num_nsems := File.catch_no_file (load_num_nsems num_nsems_filename) StringMap.empty; | |
119 | + () | |
120 | + | |
121 | +let noun_type proper lemma pos = | |
122 | + let nsyn = | |
123 | + if proper then "proper" else | |
124 | + if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then "pronoun" else | |
125 | + if pos = "symbol" || pos = "date" || pos = "date-interval" || pos = "hour" || pos = "hour-minute" || pos = "hour-interval" || pos = "hour-minute-interval" || | |
126 | + pos = "year" || pos = "year-interval" || pos = "day" || pos = "day-interval" || pos = "day-month" || pos = "day-month-interval" || | |
127 | + pos = "match-result" || pos = "month-interval" || pos = "roman" || pos = "roman-interval" || pos = "url" || pos = "email" || pos = "phone-number" || pos = "postal-code" || pos = "obj-id" || pos = "building-number" || pos = "date" then "proper" else | |
128 | + if StringSet.mem subst_pronoun_lexemes lemma then "pronoun" else | |
129 | + "common" in | |
130 | + let nsem = | |
131 | + if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then ["count"] else | |
132 | + if StringSet.mem !subst_time_lexemes lemma then ["time"] else | |
133 | + let l = ["count"] in | |
134 | + let l = if StringSet.mem !subst_uncountable_lexemes lemma || StringSet.mem !subst_uncountable_lexemes2 lemma then "mass" :: l else l in | |
135 | + if StringSet.mem !subst_container_lexemes lemma then "measure" :: l else l in | |
136 | + [nsyn],nsem | |
137 | + | |
138 | +let adv_mode lemma = | |
139 | + try | |
140 | + StringMap.find !adv_modes lemma | |
141 | + with Not_found -> ["mod"] | |
142 | + | |
143 | +let num_nsem lemma = | |
144 | + try | |
145 | + StringMap.find !num_nsems lemma | |
146 | + with Not_found -> (*try | |
147 | + StringMap.find !num_nsems (String.lowercase lemma) | |
148 | + with Not_found ->*) failwith ("num_nsem: " ^ lemma) | |
149 | + | |
150 | + | |
151 | +let part_set = StringSet.of_list ["się"; "nie"; "by"; "niech"; "niechaj"; "niechże"; "niechajże"; "czy"; "gdyby"] | |
152 | + | |
153 | +let clarify_categories proper cat coerced = function | |
154 | + lemma,"subst",[numbers;cases;genders] -> | |
155 | + let numbers = expand_numbers numbers in | |
156 | + let cases = expand_cases cases in | |
157 | + let genders = expand_genders genders in | |
158 | + let cases,voc = split_voc cases in | |
159 | + let nsyn,nsem = noun_type proper lemma "subst" in | |
160 | + (if cases = [] then [] else | |
161 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ | |
162 | + (if voc = [] then [] else | |
163 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | |
164 | + | lemma,"subst",[numbers;cases;genders;_] -> | |
165 | + let numbers = expand_numbers numbers in | |
166 | + let cases = expand_cases cases in | |
167 | + let genders = expand_genders genders in | |
168 | + let cases,voc = split_voc cases in | |
169 | + let nsyn,nsem = noun_type proper lemma "subst" in | |
170 | + (if cases = [] then [] else | |
171 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ | |
172 | + (if voc = [] then [] else | |
173 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | |
174 | + | lemma,"depr",[numbers;cases;genders] -> | |
175 | + let numbers = expand_numbers numbers in | |
176 | + let cases = expand_cases cases in | |
177 | + let genders = expand_genders genders in | |
178 | + let cases,voc = split_voc cases in | |
179 | + let nsyn,nsem = noun_type proper lemma "depr" in | |
180 | + (if cases = [] then [] else | |
181 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ | |
182 | + (if voc = [] then [] else | |
183 | + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | |
184 | + | lemma,"ppron12",[numbers;cases;genders;persons] -> | |
185 | + let numbers = expand_numbers numbers in | |
186 | + let cases = expand_cases cases in | |
187 | + let genders = expand_genders genders in | |
188 | + [{empty_cats with lemma=lemma; pos="ppron12"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons}] | |
189 | + | lemma,"ppron12",[numbers;cases;genders;persons;akcs] -> | |
190 | + let numbers = expand_numbers numbers in | |
191 | + let cases = expand_cases cases in | |
192 | + let genders = expand_genders genders in | |
193 | + [{empty_cats with lemma=lemma; pos="ppron12"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons}] | |
194 | + | lemma,"ppron3",[numbers;cases;genders;persons] -> | |
195 | + let numbers = expand_numbers numbers in | |
196 | + let cases = expand_cases cases in | |
197 | + let genders = expand_genders genders in | |
198 | + [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=["praep-npraep"]}] | |
199 | + | lemma,"ppron3",[numbers;cases;genders;persons;akcs] -> | |
200 | + let numbers = expand_numbers numbers in | |
201 | + let cases = expand_cases cases in | |
202 | + let genders = expand_genders genders in | |
203 | + [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=["praep-npraep"]}] | |
204 | + | lemma,"ppron3",[numbers;cases;genders;persons;akcs;praep] -> | |
205 | + let numbers = expand_numbers numbers in | |
206 | + let cases = expand_cases cases in | |
207 | + let genders = expand_genders genders in | |
208 | + let praep = match praep with | |
209 | + ["praep";"npraep"] -> ["praep-npraep"] | |
210 | + | ["npraep";"praep"] -> ["praep-npraep"] | |
211 | + | _ -> praep in | |
212 | + [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=praep}] | |
213 | + | lemma,"siebie",[cases] -> (* FIXME: czy tu określać numbers genders persons? *) | |
214 | + let cases = expand_cases cases in | |
215 | + [{empty_cats with lemma=lemma; pos="siebie"; pos2="pron"; numbers=all_numbers; cases=cases; genders=all_genders; persons=["ter"]}] | |
216 | + | lemma,"prep",[cases;woks] -> | |
217 | + if StringSet.mem compar_lexemes lemma then | |
218 | + [{empty_cats with lemma=lemma; pos="compar"; pos2="prep"}] else | |
219 | + let cases = expand_cases cases in | |
220 | + [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; cases=cases; psem=["sem";"nosem"]}] | |
221 | + | lemma,"prep",[cases] -> | |
222 | + if StringSet.mem compar_lexemes lemma then | |
223 | + [{empty_cats with lemma=lemma; pos="compar"; pos2="prep"}] else | |
224 | + let cases = expand_cases cases in | |
225 | + [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; cases=cases; psem=["sem";"nosem"]}] | |
226 | + | lemma,"num",[numbers;cases;genders;acms] -> | |
227 | + let numbers = expand_numbers numbers in | |
228 | + let cases = expand_cases cases in | |
229 | + let genders = expand_genders genders in | |
230 | + let nsem = num_nsem lemma in | |
231 | + [{empty_cats with lemma=lemma; pos="num"; pos2="num"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms; nsem=nsem}] | |
232 | + | lemma,"num",[numbers;cases;genders;acms;_] -> | |
233 | + let numbers = expand_numbers numbers in | |
234 | + let cases = expand_cases cases in | |
235 | + let genders = expand_genders genders in | |
236 | + let nsem = num_nsem lemma in | |
237 | + [{empty_cats with lemma=lemma; pos="num"; pos2="num"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms; nsem=nsem}] | |
238 | + | lemma,"numcomp",[] -> [{empty_cats with lemma=lemma; pos="numcomp"; pos2="numcomp"}] | |
239 | + | lemma,"intnum",[] -> | |
240 | + let numbers,acms = | |
241 | + if lemma = "1" || lemma = "-1" then ["sg"],["congr"] else | |
242 | + let s = String.get lemma (String.length lemma - 1) in | |
243 | + ["pl"],if s = '2' || s = '3' || s = '4' then ["rec";"congr"] else ["rec"] in | |
244 | + [{empty_cats with lemma=lemma; pos="intnum"; pos2="num"; numbers=numbers; cases=all_cases; genders=all_genders; persons=["ter"]; acms=acms; nsem=["count"]}] | |
245 | + | lemma,"realnum",[] -> | |
246 | + [{empty_cats with lemma=lemma; pos="realnum"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}] | |
247 | + | lemma,"intnum-interval",[] -> | |
248 | + [{empty_cats with lemma=lemma; pos="intnum-interval"; pos2="num"; numbers=["pl"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec";"congr"]; nsem=["count"]}] | |
249 | + | lemma,"realnum-interval",[] -> | |
250 | + [{empty_cats with lemma=lemma; pos="realnum-interval"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}] | |
251 | + | lemma,"symbol",[] -> | |
252 | + [{empty_cats with lemma=lemma; pos="symbol"; pos2="noun"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]}] | |
253 | + | lemma,"ordnum",[] -> | |
254 | + [{empty_cats with lemma=lemma; pos="ordnum"; pos2="adj"; numbers=all_numbers; cases=all_cases; genders=all_genders; grads=["pos"]}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *) | |
255 | + | lemma,"date",[] -> | |
256 | + let nsyn,nsem = noun_type proper lemma "date" in | |
257 | + [{empty_cats with lemma=lemma; pos="date"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
258 | + | lemma,"date-interval",[] -> | |
259 | + let nsyn,nsem = noun_type proper lemma "date-interval" in | |
260 | + [{empty_cats with lemma=lemma; pos="date-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
261 | + | lemma,"hour-minute",[] -> | |
262 | + let nsyn,nsem = noun_type proper lemma "hour-minute" in | |
263 | + [{empty_cats with lemma=lemma; pos="hour-minute"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
264 | + | lemma,"hour",[] -> | |
265 | + let nsyn,nsem = noun_type proper lemma "hour" in | |
266 | + [{empty_cats with lemma=lemma; pos="hour"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
267 | + | lemma,"hour-minute-interval",[] -> | |
268 | + let nsyn,nsem = noun_type proper lemma "hour-minute-interval" in | |
269 | + [{empty_cats with lemma=lemma; pos="hour-minute-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
270 | + | lemma,"hour-interval",[] -> | |
271 | + let nsyn,nsem = noun_type proper lemma "hour-interval" in | |
272 | + [{empty_cats with lemma=lemma; pos="hour-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
273 | + | lemma,"year",[] -> | |
274 | + let nsyn,nsem = noun_type proper lemma "year" in | |
275 | + [{empty_cats with lemma=lemma; pos="year"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
276 | + | lemma,"year-interval",[] -> | |
277 | + let nsyn,nsem = noun_type proper lemma "year-interval" in | |
278 | + [{empty_cats with lemma=lemma; pos="year-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
279 | + | lemma,"day",[] -> | |
280 | + let nsyn,nsem = noun_type proper lemma "day" in | |
281 | + [{empty_cats with lemma=lemma; pos="day"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
282 | + | lemma,"day-interval",[] -> | |
283 | + let nsyn,nsem = noun_type proper lemma "day-interval" in | |
284 | + [{empty_cats with lemma=lemma; pos="day-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
285 | + | lemma,"day-month",[] -> | |
286 | + let nsyn,nsem = noun_type proper lemma "day-month" in | |
287 | + [{empty_cats with lemma=lemma; pos="day-month"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
288 | + | lemma,"day-month-interval",[] -> | |
289 | + let nsyn,nsem = noun_type proper lemma "day-month-interval" in | |
290 | + [{empty_cats with lemma=lemma; pos="day-month-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
291 | + | lemma,"month-interval",[] -> | |
292 | + let nsyn,nsem = noun_type proper lemma "month-interval" in | |
293 | + [{empty_cats with lemma=lemma; pos="month-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
294 | + | lemma,"roman",[] -> | |
295 | + let nsyn,nsem = noun_type proper lemma "roman" in | |
296 | + [{empty_cats with lemma=lemma; pos="roman-ordnum"; pos2="adj"; numbers=all_numbers; cases=all_cases; genders=all_genders; grads=["pos"]}; | |
297 | + {empty_cats with lemma=lemma; pos="roman"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
298 | + | lemma,"roman-interval",[] -> | |
299 | + let nsyn,nsem = noun_type proper lemma "roman-interval" in | |
300 | + [{empty_cats with lemma=lemma; pos="roman-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
301 | + | lemma,"match-result",[] -> | |
302 | + let nsyn,nsem = noun_type proper lemma "match-result" in | |
303 | + [{empty_cats with lemma=lemma; pos="match-result"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
304 | + | lemma,"url",[] -> | |
305 | + let nsyn,nsem = noun_type proper lemma "url" in | |
306 | + [{empty_cats with lemma=lemma; pos="url"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
307 | + | lemma,"email",[] -> | |
308 | + let nsyn,nsem = noun_type proper lemma "email" in | |
309 | + [{empty_cats with lemma=lemma; pos="email"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
310 | + | lemma,"phone-number",[] -> | |
311 | + let nsyn,nsem = noun_type proper lemma "phone-number" in | |
312 | + [{empty_cats with lemma=lemma; pos="phone-number"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
313 | + | lemma,"postal-code",[] -> | |
314 | + let nsyn,nsem = noun_type proper lemma "postal-code" in | |
315 | + [{empty_cats with lemma=lemma; pos="postal-code"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
316 | + | lemma,"obj-id",[] -> | |
317 | + let nsyn,nsem = noun_type proper lemma "obj-id" in | |
318 | + [{empty_cats with lemma=lemma; pos="obj-id"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
319 | + | lemma,"building-number",[] -> | |
320 | + let nsyn,nsem = noun_type proper lemma "building-number" in | |
321 | + [{empty_cats with lemma=lemma; pos="building-number"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] | |
322 | + | lemma,"fixed",[] -> [{empty_cats with lemma=lemma; pos="fixed"; pos2="fixed"}] | |
323 | + | lemma,"adj",[numbers;cases;genders;grads] -> (* FIXME: adjsyn *) | |
324 | + let numbers = expand_numbers numbers in | |
325 | + let cases = expand_cases cases in | |
326 | + let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in | |
327 | + let genders = expand_genders genders in | |
328 | + let pos,pos2 = if StringSet.mem adj_pronoun_lexemes lemma then "apron","pron" else "adj","adj" in | |
329 | + [{empty_cats with lemma=lemma; pos=pos; pos2=pos2; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; grads=grads}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *) | |
330 | + | lemma,"adjc",[] -> | |
331 | + [{empty_cats with lemma=lemma; pos="adjc"; pos2="adj"; cat=cat; coerced=coerced; numbers=["sg"]; cases=["pred"]; genders=["m1";"m2";"m3"]; grads=["pos"]}] | |
332 | + | lemma,"adjp",[] -> | |
333 | + [{empty_cats with lemma=lemma; pos="adjp"; pos2="adj"; cat=cat; coerced=coerced; numbers=all_numbers; cases=["postp"]; genders=all_genders; grads=["pos"]}] | |
334 | + | lemma,"adja",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; pos="adja"; pos2="adja"}] | |
335 | + | lemma,"adv",[grads] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; pos="adv"; pos2="adv"; grads=grads; modes=adv_mode lemma}] | |
336 | + | lemma,"adv",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; pos="adv"; pos2="adv"; grads=["pos"]; modes=adv_mode lemma}] | |
337 | + | lemma,"ger",[numbers;cases;genders;aspects;negations] -> | |
338 | + let numbers = expand_numbers numbers in | |
339 | + let cases = expand_cases cases in | |
340 | + let genders = expand_genders genders in | |
341 | + [{empty_cats with lemma=lemma; pos="ger"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; aspects=aspects; negations=negations}] (* FIXME: kwestia osoby przy voc *) | |
342 | + | lemma,"pact",[numbers;cases;genders;aspects;negations] -> | |
343 | + let numbers = expand_numbers numbers in | |
344 | + let cases = expand_cases cases in | |
345 | + let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in | |
346 | + let genders = expand_genders genders in | |
347 | + [{empty_cats with lemma=lemma; pos="pact"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}] | |
348 | + | lemma,"ppas",[numbers;cases;genders;aspects;negations] -> | |
349 | + let numbers = expand_numbers numbers in | |
350 | + let cases = expand_cases cases in | |
351 | + let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in | |
352 | + let genders = expand_genders genders in | |
353 | + [{empty_cats with lemma=lemma; pos="ppas"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}] | |
354 | + | lemma,"fin",[numbers;persons;aspects] -> (* FIXME: genders bez przymnogich *) | |
355 | + let numbers = expand_numbers numbers in | |
356 | + let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in | |
357 | + let cats = {empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; negations=["aff"; "neg"]; moods=["indicative"]} in | |
358 | + (Xlist.map aspects (function | |
359 | + "imperf" -> {cats with aspects=["imperf"]; tenses=["pres"]} | |
360 | + | "perf" -> {cats with aspects=["perf"]; tenses=["fut"]} | |
361 | + | _ -> failwith "clarify_categories")) @ | |
362 | + (if persons2 = [] then [] else | |
363 | + [{empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]) | |
364 | + | lemma,"bedzie",[numbers;persons;aspects] -> | |
365 | + let numbers = expand_numbers numbers in | |
366 | + let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in | |
367 | + [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] @ | |
368 | + (if persons2 = [] then [] else | |
369 | + [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]) | |
370 | + | lemma,"praet",[numbers;genders;aspects;nagl] -> | |
371 | + let numbers = expand_numbers numbers in | |
372 | + let genders = expand_genders genders in | |
373 | + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @ | |
374 | + (if Xlist.mem aspects "imperf" then | |
375 | + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] | |
376 | + else []) | |
377 | + | lemma,"praet",[numbers;genders;aspects] -> | |
378 | + let numbers = expand_numbers numbers in | |
379 | + let genders = expand_genders genders in | |
380 | + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @ | |
381 | + (if Xlist.mem aspects "imperf" then | |
382 | + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] | |
383 | + else []) | |
384 | + | lemma,"winien",[numbers;genders;aspects] -> | |
385 | + let numbers = expand_numbers numbers in | |
386 | + let genders = expand_genders genders in | |
387 | + [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["pres"]}; | |
388 | + {empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] @ | |
389 | + (if Xlist.mem aspects "imperf" then | |
390 | + [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] | |
391 | + else []) | |
392 | + | lemma,"impt",[numbers;persons;aspects] -> | |
393 | + let numbers = expand_numbers numbers in | |
394 | + [{empty_cats with lemma=lemma; pos="impt"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}] | |
395 | + | lemma,"imps",[aspects] -> | |
396 | + [{empty_cats with lemma=lemma; pos="imps"; pos2="verb"; cat=cat; coerced=coerced; numbers=all_numbers; genders=all_genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] | |
397 | + | lemma,"pred",[] -> (* FIXME: czy predykatyw zawsze jest niedokonany? *) | |
398 | + [{empty_cats with lemma=lemma; pos="pred"; pos2="verb"; cat=cat; coerced=coerced; numbers=["sg"]; genders=[(*"n2"*)"n"]; persons=["ter"]; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["pres";"past";"fut"]}] | |
399 | + | lemma,"aglt",[numbers;persons;aspects;wok] -> | |
400 | + let numbers = expand_numbers numbers in | |
401 | + [{empty_cats with lemma=lemma; pos="aglt"; pos2="verb"; numbers=numbers; persons=persons; aspects=aspects}] | |
402 | + | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; cat=cat; coerced=coerced; aspects=aspects; negations=["aff"; "neg"]}] | |
403 | + | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="pcon"; pos2="verb"; cat=cat; coerced=coerced; aspects=aspects; negations=["aff"; "neg"]}] | |
404 | + | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="pant"; pos2="verb"; cat=cat; coerced=coerced; aspects=aspects; negations=["aff"; "neg"]}] | |
405 | + | lemma,"qub",[] -> | |
406 | + if StringSet.mem part_set lemma then [{empty_cats with lemma=lemma; pos="part"; pos2="qub"}] | |
407 | + else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"; cat=cat}] | |
408 | + | lemma,"comp",[] -> [{empty_cats with lemma=lemma; pos="comp"; pos2="comp"}] | |
409 | + | lemma,"conj",[] -> [{empty_cats with lemma=lemma; pos="conj"; pos2="conj"}] | |
410 | + | lemma,"interj",[] -> [{empty_cats with lemma=lemma; pos="interj"; pos2="interj"; cat=cat; coerced=coerced}] | |
411 | + | lemma,"sinterj",[] -> [{empty_cats with lemma=lemma; pos="sinterj"; pos2="sinterj"; (*cat=cat; coerced=coerced*)}] | |
412 | + | lemma,"burk",[] -> [{empty_cats with lemma=lemma; pos="burk"; pos2="burk"}] | |
413 | + | ",","interp",[] -> [{empty_cats with lemma=","; pos="conj"; pos2="conj"}] | |
414 | + | lemma,"interp",[] -> [{empty_cats with lemma=lemma; pos="interp"; pos2="interp"}] | |
415 | + | lemma,"unk",[] -> | |
416 | + [{empty_cats with lemma=lemma; pos="unk"; pos2="noun"; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}] | |
417 | + | lemma,"xxx",[] -> | |
418 | + [{empty_cats with lemma=lemma; pos="xxx"; pos2="noun"; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}] | |
419 | + | lemma,"html-tag",[] -> [{empty_cats with lemma=lemma; pos="html-tag"; pos2="html-tag"}] | |
420 | + | lemma,"list-item",[] -> [{empty_cats with lemma=lemma; pos="list-item"; pos2="list-item"}] | |
421 | + | lemma,c,l -> failwith ("clarify_categories: " ^ lemma ^ ":" ^ c ^ ":" ^ (String.concat ":" (Xlist.map l (String.concat ".")))) | |
422 | + | |
423 | +(* FIXME: przenieść gdzieś indziej *) | |
424 | +(* let assign token = | |
425 | + match token.ENIAMtokenizerTypes.token with | |
426 | + ENIAMtokenizerTypes.Lemma(lemma,pos,interp) -> List.flatten (Xlist.map interp (fun interp -> clarify_categories false (lemma,pos,interp))) | |
427 | + | ENIAMtokenizerTypes.Proper(lemma,pos,interp,_) -> List.flatten (Xlist.map interp (fun interp -> clarify_categories true (lemma,pos,interp))) | |
428 | + | ENIAMtokenizerTypes.Interp lemma -> clarify_categories false (lemma,"interp",[]) | |
429 | + | _ -> [] *) | |
430 | + | |
431 | +let selector_names = StringSet.of_list [ | |
432 | + "lemma";"pos";"pos2";"cat";"coerced";"number";"case";"gender";"person";"grad"; | |
433 | + "praep";"acm";"aspect";"negation";"mood";"tense";"nsyn";"nsem";"ctype";"mode";"psem"; | |
434 | + "icat";"inumber";"igender";"iperson";"nperson";"ncat";"plemma"; | |
435 | + "unumber";"ucase";"ugender";"uperson";"amode"] | |
436 | + | |
437 | + | |
438 | +let string_of_selector = function | |
439 | + Lemma -> "lemma" | |
440 | + (* | NewLemma -> "newlemma" *) | |
441 | + | Pos -> "pos" | |
442 | + | Pos2 -> "pos2" | |
443 | + | Cat -> "cat" | |
444 | + | Coerced -> "coerced" | |
445 | + | Number -> "number" | |
446 | + | Case -> "case" | |
447 | + | Gender -> "gender" | |
448 | + | Person -> "person" | |
449 | + | Grad -> "grad" | |
450 | + | Praep -> "praep" | |
451 | + | Acm -> "acm" | |
452 | + | Aspect -> "aspect" | |
453 | + | Negation -> "negation" | |
454 | + | Mood -> "mood" | |
455 | + | Tense -> "tense" | |
456 | + | Nsyn -> "nsyn" | |
457 | + | Nsem -> "nsem" | |
458 | + | Ctype -> "ctype" | |
459 | + | Mode -> "mode" | |
460 | + | Psem -> "psem" | |
461 | + | Icat -> "icat" | |
462 | + | Inumber -> "inumber" | |
463 | + | Igender -> "igender" | |
464 | + | Iperson -> "iperson" | |
465 | + | Nperson -> "nperson" | |
466 | + | Ncat -> "ncat" | |
467 | + | Plemma -> "plemma" | |
468 | + | Unumber -> "unumber" | |
469 | + | Ucase -> "ucase" | |
470 | + | Ugender -> "ugender" | |
471 | + | Uperson -> "uperson" | |
472 | + | Amode -> "amode" | |
473 | + | |
474 | +let string_of_selectors selectors = | |
475 | + String.concat ", " (Xlist.map selectors (fun (cat,rel,l) -> | |
476 | + let rel = if rel = Eq then "=" else "!=" in | |
477 | + string_of_selector cat ^ rel ^ (String.concat "|" l))) | |
478 | + | |
479 | +let selector_of_string = function | |
480 | + "lemma" -> Lemma | |
481 | + (* | NewLemma -> "newlemma" *) | |
482 | + | "pos" -> Pos | |
483 | + | "pos2" -> Pos2 | |
484 | + | "cat" -> Cat | |
485 | + | "coerced" -> Coerced | |
486 | + | "number" -> Number | |
487 | + | "case" -> Case | |
488 | + | "gender" -> Gender | |
489 | + | "person" -> Person | |
490 | + | "grad" -> Grad | |
491 | + | "praep" -> Praep | |
492 | + | "acm" -> Acm | |
493 | + | "aspect" -> Aspect | |
494 | + | "negation" -> Negation | |
495 | + | "mood" -> Mood | |
496 | + | "tense" -> Tense | |
497 | + | "nsyn" -> Nsyn | |
498 | + | "nsem" -> Nsem | |
499 | + | "ctype" -> Ctype | |
500 | + | "mode" -> Mode | |
501 | + | "psem" -> Psem | |
502 | + | "icat" -> Icat | |
503 | + | "inumber" -> Inumber | |
504 | + | "igender" -> Igender | |
505 | + | "iperson" -> Iperson | |
506 | + | "nperson" -> Nperson | |
507 | + | "ncat" -> Ncat | |
508 | + | "plemma" -> Plemma | |
509 | + | "unumber" -> Unumber | |
510 | + | "ucase" -> Ucase | |
511 | + | "ugender" -> Ugender | |
512 | + | "uperson" -> Uperson | |
513 | + | "amode" -> Amode | |
514 | + | s -> failwith ("selector_of_string: " ^ s) | |
515 | + | |
516 | +let match_selector cats = function | |
517 | + Lemma -> [cats.lemma] | |
518 | +(* | NewLemma -> [] *) | |
519 | + | Pos -> [cats.pos] | |
520 | + | Cat -> [cats.cat] | |
521 | + | Coerced -> cats.coerced | |
522 | + | Number -> cats.numbers | |
523 | + | Case -> cats.cases | |
524 | + | Gender -> cats.genders | |
525 | + | Person -> cats.persons | |
526 | + | Grad -> cats.grads | |
527 | + | Praep -> cats.praeps | |
528 | + | Acm -> cats.acms | |
529 | + | Aspect -> cats.aspects | |
530 | + | Negation -> cats.negations | |
531 | + | Mood -> cats.moods | |
532 | + | Tense -> cats.tenses | |
533 | + | Nsyn -> cats.nsyn | |
534 | + | Nsem -> cats.nsem | |
535 | + | Mode -> cats.modes | |
536 | + | Psem -> cats.psem | |
537 | + | c -> failwith ("match_selector: " ^ string_of_selector c) | |
538 | + | |
539 | +let set_selector cats vals = function | |
540 | + Number -> {cats with numbers=vals} | |
541 | + | Case -> {cats with cases=vals} | |
542 | + | Gender -> {cats with genders=vals} | |
543 | + | Person -> {cats with persons=vals} | |
544 | + | Grad -> {cats with grads=vals} | |
545 | + | Praep -> {cats with praeps=vals} | |
546 | + | Acm -> {cats with acms=vals} | |
547 | + | Aspect -> {cats with aspects=vals} | |
548 | + | Negation -> {cats with negations=vals} | |
549 | + | Mood -> {cats with moods=vals} | |
550 | + | Tense -> {cats with tenses=vals} | |
551 | + | Nsyn -> {cats with nsyn=vals} | |
552 | + | Nsem -> {cats with nsem=vals} | |
553 | + | Mode -> {cats with modes=vals} | |
554 | + | Psem -> {cats with psem=vals} | |
555 | + | Lemma -> (match vals with [v] -> {cats with lemma=v} | _ -> failwith "set_selector: Lemma") | |
556 | + | Pos -> (match vals with [v] -> {cats with pos=v} | _ -> failwith "set_selector: Pos") | |
557 | + | Cat -> (match vals with [v] -> {cats with cat=v} | _ -> failwith "set_selector: Cat") | |
558 | + | Coerced -> {cats with coerced=vals} | |
559 | + | c -> failwith ("set_selector: " ^ string_of_selector c) | |
560 | + | |
561 | +let rec apply_selectors cats = function | |
562 | + [] -> cats | |
563 | + | (sel,Eq,vals) :: l -> | |
564 | + let vals = StringSet.intersection (StringSet.of_list (match_selector cats sel)) (StringSet.of_list vals) in | |
565 | + if StringSet.is_empty vals then raise Not_found else | |
566 | + apply_selectors (set_selector cats (StringSet.to_list vals) sel) l | |
567 | + | (sel,Neq,vals) :: l -> | |
568 | + let vals = StringSet.difference (StringSet.of_list (match_selector cats sel)) (StringSet.of_list vals) in | |
569 | + if StringSet.is_empty vals then raise Not_found else | |
570 | + apply_selectors (set_selector cats (StringSet.to_list vals) sel) l | |
571 | + | |
572 | +let pos_categories = Xlist.fold [ | |
573 | + "subst",[Lemma;Cat;Coerced;Number;Case;Gender;Person;Nsyn;Nsem;]; | |
574 | + "depr",[Lemma;Cat;Coerced;Number;Case;Gender;Person;Nsyn;Nsem;]; | |
575 | + "ppron12",[Lemma;Number;Case;Gender;Person;]; | |
576 | + "ppron3",[Lemma;Number;Case;Gender;Person;Praep;]; | |
577 | + "siebie",[Lemma;Number;Case;Gender;Person;]; | |
578 | + "prep",[Lemma;Cat;Coerced;Psem;Case;]; | |
579 | + "compar",[Lemma;Cat;Coerced;Case;]; | |
580 | + "num",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
581 | + "numcomp",[Lemma]; | |
582 | + "intnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
583 | + "realnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
584 | + "intnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
585 | + "realnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | |
586 | + "symbol",[Lemma;Number;Case;Gender;Person;]; | |
587 | + "ordnum",[Lemma;Number;Case;Gender;Grad;]; | |
588 | + "date",[Lemma;Nsyn;Nsem;]; | |
589 | + "date-interval",[Lemma;Nsyn;Nsem;]; | |
590 | + "hour-minute",[Lemma;Nsyn;Nsem;]; | |
591 | + "hour",[Lemma;Nsyn;Nsem;]; | |
592 | + "hour-minute-interval",[Lemma;Nsyn;Nsem;]; | |
593 | + "hour-interval",[Lemma;Nsyn;Nsem;]; | |
594 | + "year",[Lemma;Nsyn;Nsem;]; | |
595 | + "year-interval",[Lemma;Nsyn;Nsem;]; | |
596 | + "day",[Lemma;Nsyn;Nsem;]; | |
597 | + "day-interval",[Lemma;Nsyn;Nsem;]; | |
598 | + "day-month",[Lemma;Nsyn;Nsem;]; | |
599 | + "day-month-interval",[Lemma;Nsyn;Nsem;]; | |
600 | + "month-interval",[Lemma;Nsyn;Nsem;]; | |
601 | + "roman-ordnum",[Lemma;Number;Case;Gender;Grad;]; | |
602 | + "roman",[Lemma;Nsyn;Nsem;]; | |
603 | + "roman-interval",[Lemma;Nsyn;Nsem;]; | |
604 | + "match-result",[Lemma;Nsyn;Nsem;]; | |
605 | + "url",[Lemma;Nsyn;Nsem;]; | |
606 | + "email",[Lemma;Nsyn;Nsem;]; | |
607 | + "phone-number",[Lemma;Nsyn;Nsem;]; | |
608 | + "postal-code",[Lemma;Nsyn;Nsem;]; | |
609 | + "obj-id",[Lemma;Nsyn;Nsem;]; | |
610 | + "building-number",[Lemma;Nsyn;Nsem;]; | |
611 | + "fixed",[Lemma;]; | |
612 | + "adj",[Lemma;Cat;Coerced;Number;Case;Gender;Grad;]; | |
613 | + "adjc",[Lemma;Cat;Coerced;Number;Case;Gender;Grad;]; | |
614 | + "adjp",[Lemma;Cat;Coerced;Number;Case;Gender;Grad;]; | |
615 | + "apron",[Lemma;Number;Case;Gender;Grad;]; | |
616 | + "adja",[Lemma;Cat;Coerced;]; | |
617 | + "adv",[Lemma;Cat;Coerced;Grad;Mode];(* ctype *) | |
618 | + "ger",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Case;Gender;Person;Aspect;Negation;]; | |
619 | + "pact",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Case;Gender;Aspect;Negation;]; | |
620 | + "ppas",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Case;Gender;Aspect;Negation;]; | |
621 | + "fin",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
622 | + "bedzie",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
623 | + "praet",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
624 | + "winien",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
625 | + "impt",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
626 | + "imps",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
627 | + "pred",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; | |
628 | + "aglt",[Lemma;Number;Person;Aspect;]; | |
629 | + "inf",[Lemma;(*NewLemma;*)Cat;Coerced;Aspect;Negation;]; | |
630 | + "pcon",[Lemma;(*NewLemma;*)Cat;Coerced;Aspect;Negation;]; | |
631 | + "pant",[Lemma;(*NewLemma;*)Cat;Coerced;Aspect;Negation;]; | |
632 | + "qub",[Lemma;Cat;]; | |
633 | + "part",[Lemma;]; | |
634 | + "comp",[Lemma;];(* ctype *) | |
635 | + "conj",[Lemma;];(* ctype *) | |
636 | + "interj",[Lemma;Cat;Coerced;]; | |
637 | + "sinterj",[Lemma;]; | |
638 | + "burk",[Lemma;]; | |
639 | + "interp",[Lemma;]; | |
640 | + "unk",[Lemma;Number;Case;Gender;Person;]; | |
641 | + "xxx",[Lemma;Number;Case;Gender;Person;]; | |
642 | + "html-tag",[Lemma;]; | |
643 | + "list-item",[Lemma;]; | |
644 | + ] StringMap.empty (fun map (k,l) -> StringMap.add map k l) | |
... | ... |
LCGlexicon/resources/lexicon-pl.dic
1 | 1 | @PHRASE_NAMES |
2 | 2 | lex infp np prepnp adjp ip cp ncp advp padvp |
3 | - adja prepadjp comprepnp compar measure num aglt aux-fut | |
3 | + adja prepadjp comprepnp comparp measure num aglt aux-fut | |
4 | 4 | aux-past aux-imp qub interj hyphen int |
5 | 5 | rparen rparen2 rquot rquot2 rquot3 inclusion |
6 | 6 | day-interval day-lex day-month-interval date-interval |
... | ... | @@ -153,8 +153,8 @@ lemma=w,pos=prep,case=loc: prepnp*lemma*case{\(1+advp*T),/(day-month+day+ye |
153 | 153 | |
154 | 154 | # komparatywy |
155 | 155 | # FIXME: trzeba poprawić comparnp i comparpp w walencji |
156 | -pos=compar: QUANT[case=nom&gen&dat&acc&inst] compar*lemma*case{\(1+advp*T),/(np*T*case*T*T+adjp*T*case*T)}{\(1+qub),/(1+inclusion)}; | |
157 | -pos=compar: QUANT[case=postp] compar*lemma*case{\(1+advp*T),/(prepnp*T*T+prepadjp*T*T)}{\(1+qub),/(1+inclusion)}; | |
156 | +pos=compar: QUANT[case=nom&gen&dat&acc&inst] comparp*lemma*case{\(1+advp*T),/(np*T*case*T*T+adjp*T*case*T)}{\(1+qub),/(1+inclusion)}; | |
157 | +pos=compar: QUANT[case=postp] comparp*lemma*case{\(1+advp*T),/(prepnp*T*T+prepadjp*T*T)}{\(1+qub),/(1+inclusion)}; | |
158 | 158 | |
159 | 159 | # frazy przymiotnikowe |
160 | 160 | # FIXME: let grad = match grads with [grad] -> grad | _ -> failwith "make_adjp: grad" in |
... | ... |
LCGlexicon/resources/subst_container.dat
LCGparser/ENIAM_LCG_XMLof.ml
LCGparser/ENIAM_LCGgraphOf.ml
... | ... | @@ -102,7 +102,8 @@ let rec print_simplified_dependency_tree_rec2 file edge upper = function |
102 | 102 | |
103 | 103 | let rec print_simplified_dependency_tree_rec file edge upper id = function |
104 | 104 | Node t -> |
105 | - fprintf file " %s [label=\"%s\\n%s:%s\\n%s\\n%f\"]\n" id (escape_string t.orth) (escape_string t.lemma) t.pos (escape_string (ENIAM_LCGstringOf.linear_term 0 t.symbol)) t.weight; | |
105 | + (* fprintf file " %s [label=\"%s\\n%s:%s\\n%s\\n%f\"]\n" id (escape_string t.orth) (escape_string t.lemma) t.pos (escape_string (ENIAM_LCGstringOf.linear_term 0 t.symbol)) t.weight; *) | |
106 | + fprintf file " %s [label=\"%s\\n%s:%s\\n%s\"]\n" id (escape_string t.orth) (escape_string t.lemma) t.pos (escape_string (ENIAM_LCGstringOf.linear_term 0 t.symbol)); | |
106 | 107 | print_edge file edge upper id; |
107 | 108 | print_simplified_dependency_tree_rec2 file "" id t.args |
108 | 109 | | Variant(e,l) -> |
... | ... |
LCGparser/ENIAM_LCGlatexOf.ml
... | ... | @@ -213,7 +213,7 @@ let chart page text_fragments g = |
213 | 213 | String.concat "" (List.rev (IntMap.fold layers [] (fun l layer nodes -> |
214 | 214 | IntMap.fold nodes l (fun l node1 contents -> |
215 | 215 | Xlist.fold contents l (fun l (node2,symbol,sem) -> |
216 | - let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in | |
216 | + let s = try Xlatex.escape_string (IntMap.find text_fragments.(node1) node2) with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in | |
217 | 217 | (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$ & $%s$\\\\\n\\hline\n" layer node1 node2 s symbol sem) :: l))))) ^ |
218 | 218 | "\\end{longtable}" |
219 | 219 | |
... | ... | @@ -221,7 +221,7 @@ let chart2 page text_fragments g = |
221 | 221 | let n = match page with "a4" -> "4" | "a1" -> "10" | _ -> "6" in |
222 | 222 | "\\begin{longtable}{|l|p{" ^ n ^ "cm}|l|}\n\\hline\n" ^ |
223 | 223 | String.concat "" (List.rev (ENIAM_LCGchart.fold g [] (fun l (symbol,node1,node2,sem,layer) -> |
224 | - let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in | |
224 | + let s = try Xlatex.escape_string (IntMap.find text_fragments.(node1) node2) with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in | |
225 | 225 | (Printf.sprintf "%d--%d & %s & $\\begin{array}{l}%s\\end{array}$\\\\\n\\hline\n" node1 node2 s (grammar_symbol 0 symbol)) :: l))) ^ |
226 | 226 | "\\end{longtable}" |
227 | 227 | |
... | ... |
LCGparser/ENIAM_LCGrules.ml
... | ... | @@ -392,6 +392,24 @@ let forward_application references functs args = |
392 | 392 | | BracketSet(Forward),_ -> Xlist.fold args l (fun l -> function Bracket(false,rf,arg),arg_sem -> (Bracket(true,rf,arg),arg_sem) :: l | _ -> l) |
393 | 393 | | _ -> l) |
394 | 394 | |
395 | +let forward_application_ignore_brackets references functs args = | |
396 | + Xlist.fold functs [] (fun l -> function | |
397 | + Bracket(lf,false,funct),sem -> | |
398 | + let argst,argsf = Xlist.fold args ([],[]) (fun (argst,argsf) -> function | |
399 | + Bracket(_,true,arg),arg_sem -> (arg,arg_sem) :: argst, argsf | |
400 | + | Bracket(_,false,arg),arg_sem -> argst, (arg,arg_sem) :: argsf | |
401 | + | _ -> argst,argsf) in | |
402 | + let l = Xlist.fold (deduce_app references Forward (funct,sem) argst) l (fun l (t,sem) -> | |
403 | + (Bracket(lf,true,t), (*LCGreductions.linear_term_beta_reduction2*) sem) :: l) in | |
404 | + Xlist.fold (deduce_app references Forward (funct,sem) argsf) l (fun l (t,sem) -> | |
405 | + (Bracket(lf,false,t), (*LCGreductions.linear_term_beta_reduction2*) sem) :: l) | |
406 | + | Bracket(lf,true,funct),sem -> | |
407 | + let args = Xlist.fold args [] (fun args -> function Bracket(_,_,arg),arg_sem -> (arg,arg_sem) :: args | _ -> args) in | |
408 | + Xlist.fold (deduce_app references Forward (funct,sem) args) l (fun l (t,sem) -> | |
409 | + (Bracket(lf,true,t), (*LCGreductions.linear_term_beta_reduction2*) sem) :: l) | |
410 | + | BracketSet(Forward),_ -> Xlist.fold args l (fun l -> function Bracket(_,rf,arg),arg_sem -> (Bracket(true,rf,arg),arg_sem) :: l | _ -> l) | |
411 | + | _ -> l) | |
412 | + | |
395 | 413 | let forward_application_conll references functs args = |
396 | 414 | Xlist.fold functs [] (fun l -> function |
397 | 415 | Bracket(_,_,funct),sem -> |
... | ... | @@ -436,6 +454,27 @@ let backward_application references args functs = |
436 | 454 | | BracketSet(Backward),_ -> (*print_endline "tt";*) Xlist.fold args l (fun l -> function Bracket(lf,false,arg),arg_sem -> (Bracket(lf,true,arg),arg_sem) :: l | _ -> l) |
437 | 455 | | _ -> l) |
438 | 456 | |
457 | +let backward_application_ignore_brackets references args functs = | |
458 | + (* Printf.printf "backward_application: [%s] [%s]\n%!" | |
459 | + (String.concat "; " (Xlist.map args (fun (arg,_) -> "'" ^ ENIAM_LCGstringOf.grammar_symbol 1 arg ^ "'"))) | |
460 | + (String.concat "; " (Xlist.map functs (fun (arg,_) -> "'" ^ ENIAM_LCGstringOf.grammar_symbol 1 arg ^ "'"))); *) | |
461 | + Xlist.fold functs [] (fun l -> function | |
462 | + Bracket(false,rf,funct),sem -> | |
463 | + let argst,argsf = Xlist.fold args ([],[]) (fun (argst,argsf) -> function | |
464 | + Bracket(true,_,arg),arg_sem -> (arg,arg_sem) :: argst, argsf | |
465 | + | Bracket(false,_,arg),arg_sem -> argst, (arg,arg_sem) :: argsf | |
466 | + | _ -> argst,argsf) in | |
467 | + let l = Xlist.fold (deduce_app references Backward (funct,sem) argst) l (fun l (t,sem) -> | |
468 | + (Bracket(true,rf,t), (*LCGreductions.linear_term_beta_reduction2*) sem) :: l) in | |
469 | + Xlist.fold (deduce_app references Backward (funct,sem) argsf) l (fun l (t,sem) -> | |
470 | + (Bracket(false,rf,t), (*LCGreductions.linear_term_beta_reduction2*) sem) :: l) | |
471 | + | Bracket(true,rf,funct),sem -> | |
472 | + let args = Xlist.fold args [] (fun args -> function Bracket(_,_,arg),arg_sem -> (arg,arg_sem) :: args | _ -> args) in | |
473 | + Xlist.fold (deduce_app references Backward (funct,sem) args) l (fun l (t,sem) -> | |
474 | + (Bracket(true,rf,t), (*LCGreductions.linear_term_beta_reduction2*) sem) :: l) | |
475 | + | BracketSet(Backward),_ -> (*print_endline "tt";*) Xlist.fold args l (fun l -> function Bracket(lf,_,arg),arg_sem -> (Bracket(lf,true,arg),arg_sem) :: l | _ -> l) | |
476 | + | _ -> l) | |
477 | + | |
439 | 478 | let backward_application_conll references args functs = |
440 | 479 | (* Printf.printf "backward_application: [%s] [%s]\n%!" |
441 | 480 | (String.concat "; " (Xlist.map args (fun (arg,_) -> "'" ^ ENIAM_LCGstringOf.grammar_symbol 1 arg ^ "'"))) |
... | ... | @@ -469,6 +508,7 @@ let backward_cross_composition references args functs = |
469 | 508 | (* FIXME: błąd przy redukcji "Jan chce iść spać" *) |
470 | 509 | |
471 | 510 | let application_rules = [0,backward_application; 0,forward_application] |
511 | +let application_rules_ignore_brackets = [0,backward_application_ignore_brackets; 0,forward_application_ignore_brackets] | |
472 | 512 | let cross_composition_rules = [1,backward_cross_composition;1,forward_cross_composition] |
473 | 513 | |
474 | 514 | let rec flatten_functor2 l seml = function |
... | ... |
exec/ENIAMexec.ml
... | ... | @@ -33,6 +33,8 @@ let translate_mode = function |
33 | 33 | | ENIAMsubsyntaxTypes.Mate -> Mate |
34 | 34 | | ENIAMsubsyntaxTypes.Swigra -> Swigra |
35 | 35 | | ENIAMsubsyntaxTypes.POLFIE -> POLFIE |
36 | + | ENIAMsubsyntaxTypes.Error -> Error | |
37 | + | ENIAMsubsyntaxTypes.Name -> Name | |
36 | 38 | |
37 | 39 | let rec translate_sentence = function |
38 | 40 | ENIAMsubsyntaxTypes.RawSentence s -> RawSentence s |
... | ... | @@ -53,6 +55,7 @@ let rec translate_paragraph = function |
53 | 55 | sentence=translate_sentence p.ENIAMsubsyntaxTypes.sentence})) |
54 | 56 | | ENIAMsubsyntaxTypes.AltParagraph l -> AltParagraph(Xlist.map l (fun (mode,paragraph) -> |
55 | 57 | translate_mode mode, translate_paragraph paragraph)) |
58 | + | ENIAMsubsyntaxTypes.ErrorParagraph s -> ErrorParagraph s | |
56 | 59 | |
57 | 60 | let rec translate_text = function |
58 | 61 | ENIAMsubsyntaxTypes.RawText s -> RawText s |
... | ... | @@ -61,14 +64,16 @@ let rec translate_text = function |
61 | 64 | | ENIAMsubsyntaxTypes.AltText l -> AltText(Xlist.map l (fun (mode,text) -> |
62 | 65 | translate_mode mode, translate_text text)) |
63 | 66 | |
64 | -let clarify_categories cats token = | |
67 | +let clarify_categories cats (*snode*) token = | |
65 | 68 | match token.ENIAMtokenizerTypes.token with |
66 | 69 | ENIAMtokenizerTypes.Lemma(lemma,pos,interp) -> |
67 | - List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,proj) -> ENIAMcategoriesPL.clarify_categories false cat proj (lemma,pos,interp))))) | |
70 | + List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,coerced) -> | |
71 | + (* Printf.printf "lemma=%s pos=%s cat=%s coerced=%s\n%!" lemma pos cat (String.concat "," coerced); *) | |
72 | + ENIAMcategoriesPL.clarify_categories false cat coerced (*snode*) (lemma,pos,interp))))) | |
68 | 73 | | ENIAMtokenizerTypes.Proper(lemma,pos,interp,senses2) -> |
69 | - List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,proj) -> ENIAMcategoriesPL.clarify_categories true cat proj (lemma,pos,interp))))) | |
74 | + List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,coerced) -> ENIAMcategoriesPL.clarify_categories true cat coerced (*snode*) (lemma,pos,interp))))) | |
70 | 75 | | ENIAMtokenizerTypes.Interp lemma -> |
71 | - List.flatten (Xlist.map cats (fun (cat,proj) -> ENIAMcategoriesPL.clarify_categories false cat proj (lemma,"interp",[]))) | |
76 | + List.flatten (Xlist.map cats (fun (cat,coerced) -> ENIAMcategoriesPL.clarify_categories false cat coerced (*snode*) (lemma,"interp",[]))) | |
72 | 77 | | _ -> [] |
73 | 78 | |
74 | 79 | let create_chart rules tokens lex_sems paths last = |
... | ... | @@ -79,9 +84,9 @@ let create_chart rules tokens lex_sems paths last = |
79 | 84 | let s = ExtArray.get lex_sems id in |
80 | 85 | ENIAM_LCGrenderer.reset_variable_names (); |
81 | 86 | ENIAM_LCGrenderer.add_variable_numbers (); |
82 | - if s.ENIAMlexSemanticsTypes.schemata = [] then failwith ("create_chart: no schema for token=" ^ t.ENIAMtokenizerTypes.orth ^ " lemma=" ^ ENIAMtokens.get_lemma t.ENIAMtokenizerTypes.token) else | |
83 | - Xlist.fold s.ENIAMlexSemanticsTypes.schemata chart (fun chart (selectors,cats,schema) -> | |
84 | - let cats = clarify_categories cats t in | |
87 | + (* if s.ENIAMlexSemanticsTypes.schemata = [] then failwith ("create_chart: no schema for token=" ^ t.ENIAMtokenizerTypes.orth ^ " lemma=" ^ ENIAMtokens.get_lemma t.ENIAMtokenizerTypes.token) else *) | |
88 | + Xlist.fold s.ENIAMlexSemanticsTypes.schemata chart (fun chart (selectors,cats,(*snode,*)schema) -> | |
89 | + let cats = clarify_categories cats (*snode*) t in | |
85 | 90 | (* let chart = ENIAM_LCGchart.add_inc_list chart lnode rnode s.ENIAMlexSemanticsTypes.lex_entries 0 in *) |
86 | 91 | let l = ENIAM_LCGlexicon.create_entries rules id t.ENIAMtokenizerTypes.orth cats [selectors,schema] s.ENIAMlexSemanticsTypes.lex_entries in |
87 | 92 | ENIAM_LCGchart.add_inc_list chart lnode rnode l 0)) in |
... | ... | @@ -110,10 +115,11 @@ let create_dep_chart dep_rules tokens lex_sems paths = |
110 | 115 | let s = ExtArray.get lex_sems id in |
111 | 116 | ENIAM_LCGrenderer.reset_variable_names (); |
112 | 117 | ENIAM_LCGrenderer.add_variable_numbers (); |
113 | - let cats = clarify_categories ["X",["X"]] t in | |
114 | - let schemata = Xlist.map s.ENIAMlexSemanticsTypes.schemata (fun (selectors,_,schema) -> selectors,schema) in | |
115 | - let l = ENIAM_LCGlexicon.create_entries dep_rules id t.ENIAMtokenizerTypes.orth cats schemata s.ENIAMlexSemanticsTypes.lex_entries in | |
116 | - IntMap.add nodes i l) in | |
118 | + Xlist.fold s.ENIAMlexSemanticsTypes.schemata nodes (fun nodes (selectors,cats,(*snode,*)schema) -> | |
119 | + let cats = clarify_categories ["X",["X"]] (*snode*) t in | |
120 | + (* let chart = ENIAM_LCGchart.add_inc_list chart lnode rnode s.ENIAMlexSemanticsTypes.lex_entries 0 in *) | |
121 | + let l = ENIAM_LCGlexicon.create_entries dep_rules id t.ENIAMtokenizerTypes.orth cats [selectors,schema] s.ENIAMlexSemanticsTypes.lex_entries in | |
122 | + IntMap.add_inc nodes i l (fun l2 -> l @ l2))) in | |
117 | 123 | (* print_endline "create_dep_chart 3"; *) |
118 | 124 | let x = dep_create_rec nodes sons 0 in |
119 | 125 | (* print_endline "create_dep_chart 4"; *) |
... | ... | @@ -134,10 +140,27 @@ let create_text_fragments tokens paths last = |
134 | 140 | text_fragments.(i) <- map); |
135 | 141 | text_fragments |
136 | 142 | |
143 | +(*let create_beg_positions tokens paths last = | |
144 | + let beg_positions = Array.make last (-1) in | |
145 | + Xlist.iter paths (fun (id,lnode,rnode) -> | |
146 | + let t = ExtArray.get tokens id in | |
147 | + beg_positions.(lnode) <- t.ENIAMtokenizerTypes.beg); | |
148 | + beg_positions | |
149 | + | |
150 | +let create_end_positions tokens paths last = | |
151 | + let end_positions = Array.make last (-1) in | |
152 | + Xlist.iter paths (fun (id,lnode,rnode) -> | |
153 | + let t = ExtArray.get tokens id in | |
154 | + end_positions.(rnode) <- t.ENIAMtokenizerTypes.beg + t.ENIAMtokenizerTypes.len); | |
155 | + end_positions*) | |
156 | + | |
137 | 157 | let eniam_parse_sentence timeout verbosity rules tokens lex_sems paths last = |
138 | 158 | ENIAM_LCGreductions.reset_variant_label (); |
139 | 159 | let result = {empty_eniam_parse_result with paths_size = Xlist.size paths} in |
140 | - let result = if verbosity = 0 then result else {result with text_fragments=create_text_fragments tokens paths last} in | |
160 | + let result = if verbosity = 0 then result else {result with | |
161 | + text_fragments=create_text_fragments tokens paths last; | |
162 | + (*beg_positions=create_beg_positions tokens paths last; | |
163 | + end_positions=create_end_positions tokens paths last;*)} in | |
141 | 164 | let time1 = time_fun () in |
142 | 165 | try |
143 | 166 | (* print_endline "eniam_parse_sentence 1"; *) |
... | ... | @@ -469,6 +492,7 @@ let eniam_semantic_processing verbosity tokens lex_sems (result : eniam_parse_re |
469 | 492 | let graph = ENIAMsemGraph.greater_simplify graph in |
470 | 493 | (* let graph = ENIAMsemGraph.manage_quantification graph in *) |
471 | 494 | let graph = ENIAMsemGraph.simplify_gender graph in |
495 | + let graph = ENIAMsemGraph.manage_variant_labels graph in | |
472 | 496 | let result = (*if verbosity = 0 then result else*) {result with semantic_graph11=graph; semantic_graph12=graph} in |
473 | 497 | graph,result |
474 | 498 | with e -> ENIAMsemTypes.Dot,{result with status=SemGraphError; msg=string_of_exn e} in |
... | ... |
exec/ENIAMexecTypes.ml
... | ... | @@ -78,7 +78,7 @@ type semantic_processing_result = { |
78 | 78 | } |
79 | 79 | *) |
80 | 80 | type mode = |
81 | - Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE | |
81 | + Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE | Error | Name | |
82 | 82 | |
83 | 83 | type sentence = |
84 | 84 | RawSentence of string |
... | ... | @@ -98,6 +98,7 @@ and paragraph = |
98 | 98 | RawParagraph of string |
99 | 99 | | StructParagraph of paragraph_record list (* zdania *) |
100 | 100 | | AltParagraph of (mode * paragraph) list |
101 | + | ErrorParagraph of string | |
101 | 102 | |
102 | 103 | type text = |
103 | 104 | RawText of string |
... | ... | @@ -267,6 +268,7 @@ let rec map_paragraph mode f = function |
267 | 268 | let l = Xlist.rev_map l (fun (mode,paragraph) -> |
268 | 269 | mode, map_paragraph mode f paragraph) in |
269 | 270 | AltParagraph(List.rev l) |
271 | + | ErrorParagraph s -> ErrorParagraph s | |
270 | 272 | |
271 | 273 | let rec map_text mode f = function |
272 | 274 | RawText s -> RawText s |
... | ... | @@ -295,6 +297,7 @@ let rec fold_paragraph mode s f = function |
295 | 297 | | AltParagraph l -> |
296 | 298 | Xlist.fold l s (fun s (mode,paragraph) -> |
297 | 299 | fold_paragraph mode s f paragraph) |
300 | + | ErrorParagraph _ -> s | |
298 | 301 | |
299 | 302 | let rec fold_text mode s f = function |
300 | 303 | RawText _ -> s |
... | ... | @@ -306,6 +309,7 @@ let rec fold_text mode s f = function |
306 | 309 | fold_text mode s f text) |
307 | 310 | |
308 | 311 | let rules_filename = ENIAM_LCGlexiconTypes.resource_path ^ "/LCGlexicon/lexicon-pl.dic" |
312 | +let colours_filename = ENIAMwalTypes.data_path ^ "/colours.tab" | |
309 | 313 | |
310 | 314 | let lcg_rules = ref ([] : (int * (ENIAM_LCGtypes.linear_term ExtArray.t -> |
311 | 315 | (ENIAM_LCGtypes.SymbolMap.key * ENIAM_LCGtypes.linear_term) list -> |
... | ... |
exec/ENIAMexecXMLof.ml
... | ... | @@ -64,6 +64,7 @@ let rec paragraph m = function |
64 | 64 | Xml.Element("StructParagraph",set_mode m,Xlist.map sentences (fun p -> |
65 | 65 | Xml.Element("Sentence",["id",p.id;"beg",string_of_int p.beg;"len",string_of_int p.len;"next",string_of_int p.next],[sentence "" p.sentence]))) |
66 | 66 | | AltParagraph l -> Xml.Element("AltParagraph",set_mode m,Xlist.map l (fun (m,t) -> paragraph (ENIAMvisualization.string_of_mode m) t)) |
67 | + | ErrorParagraph s -> Xml.Element("ErrorParagraph",set_mode m,[Xml.PCData s]) | |
67 | 68 | |
68 | 69 | let rec text m = function |
69 | 70 | RawText s -> Xml.Element("RawText",set_mode m,[Xml.PCData s]) |
... | ... |
exec/ENIAMselectSent.ml
... | ... | @@ -69,6 +69,7 @@ let rec select_sentence_modes_paragraph = function |
69 | 69 | let l = Xlist.rev_map l (fun (mode,paragraph) -> |
70 | 70 | mode, select_sentence_modes_paragraph paragraph) in |
71 | 71 | AltParagraph(List.rev l) |
72 | + | ErrorParagraph s -> ErrorParagraph s | |
72 | 73 | |
73 | 74 | let rec select_sentence_modes_text = function |
74 | 75 | RawText s -> RawText s |
... | ... | @@ -148,6 +149,7 @@ let rec select_sentences_paragraph mode = function |
148 | 149 | let l = Xlist.rev_map l (fun (mode,paragraph) -> |
149 | 150 | mode, select_sentences_paragraph mode paragraph) in |
150 | 151 | AltParagraph(List.rev l) |
152 | + | ErrorParagraph s -> ErrorParagraph s | |
151 | 153 | |
152 | 154 | let rec select_sentences_text mode = function |
153 | 155 | RawText s -> RawText s |
... | ... |
exec/ENIAMvisualization.ml
... | ... | @@ -24,7 +24,7 @@ open ENIAMtokenizerTypes |
24 | 24 | open ENIAMexecTypes |
25 | 25 | |
26 | 26 | let string_of_status = function |
27 | - Idle -> "Idle" | |
27 | + Idle -> "Idle" | |
28 | 28 | | PreprocessingError -> "PreprocessingError" |
29 | 29 | | LexiconError -> "LexiconError" |
30 | 30 | | ParseError -> "ParseError" |
... | ... | @@ -657,6 +657,8 @@ let string_of_mode = function |
657 | 657 | | Mate -> "Mate" |
658 | 658 | | Swigra -> "Swigra" |
659 | 659 | | POLFIE -> "POLFIE" |
660 | + | Error -> "Error" | |
661 | + | Name -> "Name" | |
660 | 662 | (* |
661 | 663 | (*let rec string_of_sentence = function |
662 | 664 | RawSentence s -> sprintf "RawSentence(%s)" s |
... | ... | @@ -775,6 +777,94 @@ let create_latex_dep_chart path name dep_chart = |
775 | 777 | LatexMain.latex_compile_and_clean path name |
776 | 778 | *) |
777 | 779 | |
780 | +let rec extract_pos_cat_internal vars = function | |
781 | + | Atom x -> x | |
782 | + | AVar x -> (try extract_pos_cat_internal vars (Xlist.assoc vars x) with Not_found -> failwith "extract_pos_cat_internal") | |
783 | + | With l -> String.concat "&" (Xlist.map l (extract_pos_cat_internal vars)) | |
784 | + | Zero -> "0" | |
785 | + | Top -> "T" | |
786 | + | |
787 | +let rec extract_pos_cat vars = function | |
788 | + | Tensor [] -> failwith "extract_pos_cat: ni" | |
789 | + | Tensor [pos] -> extract_pos_cat_internal vars pos | |
790 | + | Tensor [pos;_] -> extract_pos_cat_internal vars pos | |
791 | + | Tensor [pos;_;_] -> extract_pos_cat_internal vars pos | |
792 | + | Tensor (Atom "num" :: _) -> "Number" | |
793 | + | Tensor (Atom "aglt" :: _) -> "Aglt" | |
794 | + | Tensor (Atom "prepnp" :: _) -> "Prep" | |
795 | + | Tensor (Atom "comparp" :: _) -> "Compar" | |
796 | + | Tensor (Atom "cp" :: _) -> "Comp" | |
797 | + | Tensor [_;cat;_;_] -> extract_pos_cat_internal vars cat | |
798 | + | Tensor [_;_;cat;_;_] -> extract_pos_cat_internal vars cat | |
799 | + | Tensor [_;_;_;cat;_;_] -> extract_pos_cat_internal vars cat | |
800 | + | Tensor [_;_;_;_;cat;_;_] -> extract_pos_cat_internal vars cat | |
801 | + | Tensor [_;_;_;_;_;cat;_;_] -> extract_pos_cat_internal vars cat | |
802 | + | Tensor [_;_;_;_;_;_;cat;_;_] -> extract_pos_cat_internal vars cat | |
803 | + (* | Tensor (pos :: cat :: _) -> (*extract_pos_cat_internal vars pos ^ "*" ^*) extract_pos_cat_internal vars cat *) | |
804 | + | Tensor _ as t -> print_endline ("Unknown symbol " ^ ENIAM_LCGstringOf.grammar_symbol 0 t); "Unknown" | |
805 | + | Plus l -> failwith "extract_pos_cat: ni" | |
806 | + | Imp(s,d,t2) -> extract_pos_cat vars s | |
807 | + | One -> failwith "extract_pos_cat: ni" | |
808 | + | ImpSet(s,l) -> extract_pos_cat vars s | |
809 | + | WithVar(v,g,e,s) -> extract_pos_cat ((v,g) :: vars) s | |
810 | + | Star s -> failwith "extract_pos_cat: ni" | |
811 | + | Bracket(lf,rf,s) -> extract_pos_cat vars s | |
812 | + | BracketSet d -> "BracketSet" | |
813 | + | Maybe s -> failwith "extract_pos_cat: ni" | |
814 | + | |
815 | +let get_text_fragment text_fragments node1 node2 = | |
816 | + try IntMap.find text_fragments.(node1) node2 | |
817 | + with (*Not_found*)_ -> "???"(*failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2)*) | |
818 | + | |
819 | +let omited = StringSet.of_list ["<subst>";"<depr>";"<ppron12>";"<ppron3>";"<siebie>";"<prep>"; | |
820 | + "<num>";"<intnum>";"<realnum>";"<intnum-interval>";"<realnum-interval>";"<symbol>";"<ordnum>"; | |
821 | + "<date>";"<date-interval>";"<hour-minute>";"<hour>";"<hour-minute-interval>";"<hour-interval>"; | |
822 | + "<year>";"<year-interval>";"<day>";"<day-interval>";"<day-month>";"<day-month-interval>"; | |
823 | + "<month-interval>";"<roman>";"<roman-interval>";"<roman-ordnum>";"<match-result>";"<url>"; | |
824 | + "<email>";"<obj-id>";"<adj>";"<apron>";"<adjc>";"<adjp>";"<adja>";"<adv>";"<ger>";"<pact>"; | |
825 | + "<ppas>";"<fin>";"<bedzie>";"<praet>";"<winien>";"<impt>";"<imps>";"<pred>";"<aglt>";"<inf>"; | |
826 | + "<pcon>";"<pant>";"<qub>";"<comp>";"<compar>";"<conj>";"<interj>";"<sinterj>";"<burk>"; | |
827 | + "<interp>";"<part>";"<unk>";"<building-number>";"<html-tag>";"<list-item>";"<numcomp>"; | |
828 | + "<phone-number>";"<postal-code>";"<sentence>";"<paragraph>"] | |
829 | + | |
830 | +let cat_tokens_sequence text_fragments g = | |
831 | + let _,_,l = ENIAM_LCGchart.fold g (0,0,[]) (fun (m,n,l) (symbol,node1,node2,sem,layer) -> | |
832 | + node1,node2, | |
833 | + (if m < node1 then | |
834 | + if n < node1 then [n, node1, get_text_fragment text_fragments n node1, "null"] | |
835 | + else if n = node1 then [] | |
836 | + else [node1, n, get_text_fragment text_fragments node1 n, "overlap"] | |
837 | + else if m = node1 then | |
838 | + if n < node2 then [m, n, get_text_fragment text_fragments m n, "overlap"] | |
839 | + else if n = node2 then [] | |
840 | + else [node1, node2, get_text_fragment text_fragments node1 node2, "overlap"] | |
841 | + else failwith "cat_tokens_sequence") @ | |
842 | + [node1, node2, get_text_fragment text_fragments node1 node2, extract_pos_cat [] symbol] @ l) in | |
843 | + let map = Xlist.fold l IntMap.empty (fun map (m,n,text,symbol) -> | |
844 | + IntMap.add_inc map (1000000*m+n) [text,symbol] (fun l -> (text,symbol) :: l)) in | |
845 | + let map = IntMap.map map (fun l -> | |
846 | + let t,ov,set = Xlist.fold l ("",false,StringSet.empty) (fun (t,ov,set) (text,symbol) -> | |
847 | + if symbol = "null" then text,ov,set | |
848 | + else if symbol = "overlap" then t,true,set | |
849 | + else if StringSet.mem omited symbol then text,ov,set | |
850 | + else t,ov,StringSet.add set symbol) in | |
851 | + let l = if StringSet.is_empty set then [t] else StringSet.to_list set in | |
852 | + if ov then "OVERLAP{" ^ String.concat " " l ^ "}" else | |
853 | + match l with | |
854 | + [t] -> t | |
855 | + | _ -> "{" ^ String.concat " " l ^ "}") in | |
856 | + let l = List.sort compare (IntMap.fold map [] (fun l k texts -> (k,texts) :: l)) in | |
857 | +(* let l = Xlist.sort l (fun (m1,n1,text1,symbol1) (m2,n2,text2,symbol2) -> | |
858 | + if m1 <> m2 then compare m1 m2 else | |
859 | + if n1 <> n2 then compare n1 n2 else | |
860 | + compare symbol1 symbol2) in | |
861 | + let l = if l = [] then l else | |
862 | + Xlist.fold (List.tl l) [List.hd l] (fun l a -> | |
863 | + match l with | |
864 | + [] -> failwith "cat_tokens_sequence" | |
865 | + | b :: l -> if a = b then b :: l else a :: b :: l) in*) | |
866 | + String.concat " " (Xlist.map l (fun (n,texts) -> texts)) | |
867 | + | |
778 | 868 | |
779 | 869 | (* verbosity: |
780 | 870 | 0 -> jedynie informacja o statusie zdania |
... | ... | @@ -785,13 +875,13 @@ let create_latex_dep_chart path name dep_chart = |
785 | 875 | let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam_parse_result) = |
786 | 876 | match result.status with |
787 | 877 | Idle -> "<font color=\"red\">idle</font>\n" |
788 | - | LexiconError -> sprintf "<font color=\"red\">error_lex</font>: %s paths_size=%d\n" result.msg result.paths_size | |
878 | + | LexiconError -> sprintf "<font color=\"red\">error_lex</font>: %s paths_size=%d\n" (escape_html result.msg) result.paths_size | |
789 | 879 | | ParseError -> |
790 | 880 | if verbosity = 0 then () else ( |
791 | 881 | ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.text_fragments result.chart1; |
792 | 882 | ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.text_fragments result.chart2; |
793 | 883 | ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2); |
794 | - sprintf "<font color=\"red\">error_parse</font>: %s paths_size=%d\n" result.msg result.paths_size ^ | |
884 | + sprintf "<font color=\"red\">error_parse</font>: %s paths_size=%d\n" (escape_html result.msg) result.paths_size ^ | |
795 | 885 | (if verbosity = 0 then "" else |
796 | 886 | sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix ^ |
797 | 887 | sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix ^ |
... | ... | @@ -803,7 +893,7 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam |
803 | 893 | ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2); |
804 | 894 | if verbosity = 0 then () else ( |
805 | 895 | ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.text_fragments result.chart2); |
806 | - sprintf "<font color=\"red\">timeout</font>: %s paths_size=%d\n" result.msg result.paths_size ^ | |
896 | + sprintf "<font color=\"red\">timeout</font>: %s paths_size=%d\n" (escape_html result.msg) result.paths_size ^ | |
807 | 897 | (if verbosity < 2 then "" else |
808 | 898 | sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix ^ |
809 | 899 | sprintf "<BR><A HREF=\"%s_2_references.pdf\">References 2</A>\n" file_prefix) ^ |
... | ... | @@ -829,6 +919,7 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam |
829 | 919 | sprintf "<BR><A HREF=\"%s_3_references.pdf\">References 3</A>\n" file_prefix ^ |
830 | 920 | sprintf "<BR><A HREF=\"%s_3_chart.pdf\">Chart 3</A>\n" file_prefix) ^ |
831 | 921 | (if verbosity = 0 then "" else |
922 | + sprintf "<BR>%s\n" (escape_html (cat_tokens_sequence result.text_fragments (ENIAM_LCGchart.select_maximal result.chart1))) ^ | |
832 | 923 | sprintf "<BR><A HREF=\"%s_3_chart_selection.pdf\">Chart 3 Selection</A>\n" file_prefix) ^ |
833 | 924 | "" |
834 | 925 | | ReductionError -> |
... | ... | @@ -840,7 +931,7 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam |
840 | 931 | ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.text_fragments result.chart1; |
841 | 932 | ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3); |
842 | 933 | (if verbosity < 2 then "" else |
843 | - sprintf "<font color=\"red\">error_reduction</font>: %s paths_size=%d chart_size=%d\n" result.msg result.paths_size result.chart_size ^ | |
934 | + sprintf "<font color=\"red\">error_reduction</font>: %s paths_size=%d chart_size=%d\n" (escape_html result.msg) result.paths_size result.chart_size ^ | |
844 | 935 | sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix ^ |
845 | 936 | sprintf "<BR><A HREF=\"%s_2_references.pdf\">References 2</A>\n" file_prefix ^ |
846 | 937 | sprintf "<BR><A HREF=\"%s_3_chart.pdf\">Chart 3</A>\n" file_prefix) ^ |
... | ... | @@ -898,7 +989,7 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam |
898 | 989 | Printf.fprintf file "\\[%s\\]\n" (ENIAM_LCGlatexOf.linear_term 0 result.term4)); |
899 | 990 | Xlatex.latex_compile_and_clean path (file_prefix ^ "_4_term"); |
900 | 991 | ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_4_dependency_tree") "a0" result.dependency_tree4); |
901 | - sprintf "<font color=\"red\">error_reduction2</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.msg result.paths_size result.chart_size result.dependency_tree_size ^ | |
992 | + sprintf "<font color=\"red\">error_reduction2</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" (escape_html result.msg) result.paths_size result.chart_size result.dependency_tree_size ^ | |
902 | 993 | (if verbosity < 2 then "" else |
903 | 994 | sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix ^ |
904 | 995 | sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix ^ |
... | ... | @@ -928,7 +1019,7 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam |
928 | 1019 | ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_6b_dependency_tree") result.dependency_tree6b; |
929 | 1020 | ENIAM_LCGgraphOf.print_simplified_dependency_tree path (file_prefix ^ "_6a_simple_dependency_tree") result.dependency_tree6a; |
930 | 1021 | ENIAM_LCGgraphOf.print_simplified_dependency_tree path (file_prefix ^ "_6b_simple_dependency_tree") result.dependency_tree6b); |
931 | - sprintf "<font color=\"red\">error_reduction3</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.msg result.paths_size result.chart_size result.dependency_tree_size ^ | |
1022 | + sprintf "<font color=\"red\">error_reduction3</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" (escape_html result.msg) result.paths_size result.chart_size result.dependency_tree_size ^ | |
932 | 1023 | (if verbosity < 2 then "" else |
933 | 1024 | sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix ^ |
934 | 1025 | sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix ^ |
... | ... | @@ -966,8 +1057,9 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam |
966 | 1057 | ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_6b_dependency_tree") "a4" result.dependency_tree6b); |
967 | 1058 | if verbosity = 0 then () else ( |
968 | 1059 | ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_6a_dependency_tree") result.dependency_tree6a; |
1060 | + ENIAM_LCGgraphOf.print_simplified_dependency_tree path (file_prefix ^ "_6a_simple_dependency_tree") result.dependency_tree6a); | |
1061 | + if verbosity < 2 then () else ( | |
969 | 1062 | ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_6b_dependency_tree") result.dependency_tree6b; |
970 | - ENIAM_LCGgraphOf.print_simplified_dependency_tree path (file_prefix ^ "_6a_simple_dependency_tree") result.dependency_tree6a; | |
971 | 1063 | ENIAM_LCGgraphOf.print_simplified_dependency_tree path (file_prefix ^ "_6b_simple_dependency_tree") result.dependency_tree6b); |
972 | 1064 | sprintf "parsed: paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.paths_size result.chart_size result.dependency_tree_size ^ |
973 | 1065 | (if verbosity < 2 then "" else |
... | ... | @@ -984,10 +1076,11 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam |
984 | 1076 | (if verbosity = 0 then "" else |
985 | 1077 | (if img <> 2 then sprintf "<BR><A HREF=\"%s_6a_dependency_tree.png\">Dependency Tree 6a</A>\n" file_prefix |
986 | 1078 | else sprintf "<BR><IMG SRC=\"%s_6a_dependency_tree.png\">\n" file_prefix) ^ |
1079 | + (if img <> 1 then sprintf "<BR><A HREF=\"%s_6a_simple_dependency_tree.png\">Simplified Dependency Tree 6a</A>\n" file_prefix | |
1080 | + else sprintf "<BR><IMG SRC=\"%s_6a_simple_dependency_tree.png\">\n" file_prefix)) ^ | |
1081 | + (if verbosity < 2 then "" else | |
987 | 1082 | (if img <> 2 then sprintf "<BR><A HREF=\"%s_6b_dependency_tree.png\">Dependency Tree 6b</A>\n" file_prefix |
988 | 1083 | else sprintf "<BR><IMG SRC=\"%s_6b_dependency_tree.png\">\n" file_prefix) ^ |
989 | - (if img <> 1 then sprintf "<BR><A HREF=\"%s_6a_simple_dependency_tree.png\">Simplified Dependency Tree 6a</A>\n" file_prefix | |
990 | - else sprintf "<BR><IMG SRC=\"%s_6a_simple_dependency_tree.png\">\n" file_prefix) ^ | |
991 | 1084 | (if img <> 1 then sprintf "<BR><A HREF=\"%s_6b_simple_dependency_tree.png\">Simplified Dependency Tree 6b</A>\n" file_prefix |
992 | 1085 | else sprintf "<BR><IMG SRC=\"%s_6b_simple_dependency_tree.png\">\n" file_prefix)) ^ |
993 | 1086 | "" |
... | ... | @@ -999,7 +1092,7 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam |
999 | 1092 | if ExtArray.size result.dependency_tree8 <> 0 then ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_8_dependency_tree") "a3" result.dependency_tree8; |
1000 | 1093 | if result.dependency_tree9 <> [| |] then ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") "a3" result.dependency_tree9; |
1001 | 1094 | if result.dependency_tree9 <> [| |] then ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") result.dependency_tree9); |
1002 | - sprintf "<font color=\"red\">error_sem_valence</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.msg result.paths_size result.chart_size result.dependency_tree_size ^ | |
1095 | + sprintf "<font color=\"red\">error_sem_valence</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" (escape_html result.msg) result.paths_size result.chart_size result.dependency_tree_size ^ | |
1003 | 1096 | (if verbosity = 0 then "" else |
1004 | 1097 | sprintf "<BR><A HREF=\"%s_6b_dependency_tree.pdf\">Dependency Tree References 6b</A>\n" file_prefix ^ |
1005 | 1098 | (if result.dependency_tree7 <> [| |] then sprintf "<BR><A HREF=\"%s_7_dependency_tree.pdf\">Dependency Tree References 7</A>\n" file_prefix else "") ^ |
... | ... | @@ -1027,7 +1120,7 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam |
1027 | 1120 | if ExtArray.size result.dependency_tree8 <> 0 then ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_8_dependency_tree") "a3" result.dependency_tree8; |
1028 | 1121 | if result.dependency_tree9 <> [| |] then ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") "a3" result.dependency_tree9; |
1029 | 1122 | if result.dependency_tree9 <> [| |] then ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") result.dependency_tree9)); |
1030 | - sprintf "<font color=\"red\">error_sem_graph</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.msg result.paths_size result.chart_size result.dependency_tree_size ^ | |
1123 | + sprintf "<font color=\"red\">error_sem_graph</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" (escape_html result.msg) result.paths_size result.chart_size result.dependency_tree_size ^ | |
1031 | 1124 | (if verbosity = 2 then |
1032 | 1125 | sprintf "<BR><A HREF=\"%s_6b_dependency_tree.pdf\">Dependency Tree References 6b</A>\n" file_prefix ^ |
1033 | 1126 | (if result.semantic_graph10 <> [| |] then sprintf "<BR><A HREF=\"%s_10_semantic_graph.pdf\">Semantic Graph References 10</A>\n" file_prefix else "") ^ |
... | ... | @@ -1050,14 +1143,32 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam |
1050 | 1143 | | SemGraphError2 -> |
1051 | 1144 | if verbosity = 0 then () else ( |
1052 | 1145 | ENIAMsemGraphOf.print_semantic_graph2 path (file_prefix ^ "_11_semantic_graph") "" result.semantic_graph11); |
1053 | - sprintf "<font color=\"red\">error_sem_graph2</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.msg result.paths_size result.chart_size result.dependency_tree_size ^ | |
1146 | + sprintf "<font color=\"red\">error_sem_graph2</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" (escape_html result.msg) result.paths_size result.chart_size result.dependency_tree_size ^ | |
1054 | 1147 | (if verbosity = 0 then "" else |
1055 | 1148 | sprintf "<BR><IMG SRC=\"%s_11_semantic_graph.png\">\n" file_prefix) ^ |
1056 | 1149 | "" |
1057 | 1150 | | SemNotValidated -> |
1151 | + if verbosity < 2 then () else ( | |
1152 | + ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_6b_dependency_tree") result.dependency_tree6b; | |
1153 | + ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") result.dependency_tree9; | |
1154 | + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_6b_dependency_tree") "a3" result.dependency_tree6b; | |
1155 | + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_7_dependency_tree") "a2" result.dependency_tree7; | |
1156 | + ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_8_dependency_tree") "a3" result.dependency_tree8; | |
1157 | + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") "a3" result.dependency_tree9; | |
1158 | + ENIAMsemLatexOf.print_semantic_graph path (file_prefix ^ "_10_semantic_graph") "a3" result.semantic_graph10; | |
1159 | + ENIAMsemGraphOf.print_semantic_graph2 path (file_prefix ^ "_11_semantic_graph") "" result.semantic_graph11); | |
1058 | 1160 | if verbosity = 0 then () else ( |
1059 | 1161 | ENIAMsemGraphOf.print_semantic_graph2 path (file_prefix ^ "_12_semantic_graph") "" result.semantic_graph12); |
1060 | - sprintf "<font color=\"red\">sem_not_validated</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.msg result.paths_size result.chart_size result.dependency_tree_size ^ | |
1162 | + sprintf "<font color=\"red\">sem_not_validated</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" (escape_html result.msg) result.paths_size result.chart_size result.dependency_tree_size ^ | |
1163 | + (if verbosity < 2 then "" else | |
1164 | + sprintf "<BR><A HREF=\"%s_6b_dependency_tree.pdf\">Dependency Tree References 6b</A>\n" file_prefix ^ | |
1165 | + sprintf "<BR><A HREF=\"%s_7_dependency_tree.pdf\">Dependency Tree References 7</A>\n" file_prefix ^ | |
1166 | + sprintf "<BR><A HREF=\"%s_8_dependency_tree.pdf\">Dependency Tree References 8</A>\n" file_prefix ^ | |
1167 | + sprintf "<BR><A HREF=\"%s_9_dependency_tree.pdf\">Dependency Tree References 9</A>\n" file_prefix ^ | |
1168 | + sprintf "<BR><IMG SRC=\"%s_6b_dependency_tree.png\">\n" file_prefix ^ | |
1169 | + sprintf "<BR><IMG SRC=\"%s_9_dependency_tree.png\">\n" file_prefix ^ | |
1170 | + sprintf "<BR><A HREF=\"%s_10_semantic_graph.pdf\">Semantic Graph References 10</A>\n" file_prefix ^ | |
1171 | + sprintf "<BR><IMG SRC=\"%s_11_semantic_graph.png\">\n" file_prefix) ^ | |
1061 | 1172 | (if verbosity = 0 then "" else |
1062 | 1173 | sprintf "<BR><IMG SRC=\"%s_12_semantic_graph.png\">\n" file_prefix) ^ |
1063 | 1174 | "" |
... | ... | @@ -1164,6 +1275,8 @@ let file_prefix_of_mode = function |
1164 | 1275 | | Mate -> "M" |
1165 | 1276 | | Swigra -> "S" |
1166 | 1277 | | POLFIE -> "P" |
1278 | + | Error -> "Er" | |
1279 | + | Name -> "N" | |
1167 | 1280 | |
1168 | 1281 | let rec html_of_sentence path file_prefix mode img verbosity tokens = function |
1169 | 1282 | RawSentence s -> escape_html s |
... | ... | @@ -1196,6 +1309,7 @@ let rec html_of_paragraph path mode img verbosity tokens = function |
1196 | 1309 | String.concat "\n" (Xlist.map l (fun (mode,paragraph) -> |
1197 | 1310 | sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_paragraph path mode img verbosity tokens paragraph))) ^ |
1198 | 1311 | "</table>" |
1312 | + | ErrorParagraph s -> sprintf "<font color=\"red\">subsyntax_error</font>: %s\n" (escape_html s) | |
1199 | 1313 | |
1200 | 1314 | let rec html_of_text path mode img verbosity tokens = function |
1201 | 1315 | RawText s -> escape_html s |
... | ... | @@ -1229,6 +1343,7 @@ let rec find_prev_next_paragraph rev = function |
1229 | 1343 | | StructParagraph sentences -> |
1230 | 1344 | Xlist.fold sentences rev (fun rev p -> find_prev_next_sentence p.id p.file_prefix rev p.sentence) |
1231 | 1345 | | AltParagraph l -> Xlist.fold l rev (fun rev (mode,paragraph) -> find_prev_next_paragraph rev paragraph) |
1346 | + | ErrorParagraph s -> rev | |
1232 | 1347 | |
1233 | 1348 | let rec make_prev_next_map map prev = function |
1234 | 1349 | [x] -> StringMap.add map x (prev,"") |
... | ... | @@ -1288,7 +1403,6 @@ let rec print_main_result_sentence path cg_bin_path results_web_path id file_pre |
1288 | 1403 | | AltSentence((Raw,RawSentence query) :: sentences) -> |
1289 | 1404 | File.file_out (path ^ "page" ^ id ^ "_" ^ file_prefix ^ ".html") (fun file -> |
1290 | 1405 | print_sentence_to_file path cg_bin_path results_web_path true id file_prefix prev_next_map query sentences file) |
1291 | - (* | AltSentence[Raw,RawSentence query] -> print_not_parsed_main_result path cg_bin_path results_web_path id file_prefix query pid prev_next_map *) | |
1292 | 1406 | | _ -> failwith "print_main_result_sentence: ni" |
1293 | 1407 | |
1294 | 1408 | let rec print_main_result_paragraph path cg_bin_path results_web_path id tokens prev_next_map = function |
... | ... | @@ -1296,6 +1410,8 @@ let rec print_main_result_paragraph path cg_bin_path results_web_path id tokens |
1296 | 1410 | | StructParagraph sentences -> |
1297 | 1411 | Xlist.iter sentences (fun p -> print_main_result_sentence path cg_bin_path results_web_path id p.file_prefix tokens p.id prev_next_map p.sentence) |
1298 | 1412 | | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> print_main_result_paragraph path cg_bin_path results_web_path id tokens prev_next_map paragraph) |
1413 | + | ErrorParagraph s -> File.file_out (path ^ "page" ^ id ^ "_Er.html") (fun file -> | |
1414 | + print_sentence_to_file path cg_bin_path results_web_path false id "Er" prev_next_map ("ErrorParagraph: " ^ s) [] file) | |
1299 | 1415 | |
1300 | 1416 | let rec print_main_result_text path cg_bin_path results_web_path id tokens = function |
1301 | 1417 | RawText s -> () |
... | ... | @@ -1309,8 +1425,7 @@ let rec print_main_result_first_page_sentence path cg_bin_path results_web_path |
1309 | 1425 | AltSentence[Raw,_;Struct,QuotedSentences sentences] -> |
1310 | 1426 | let p = List.hd sentences in |
1311 | 1427 | print_main_result_first_page_sentence path cg_bin_path results_web_path id p.file_prefix tokens p.id prev_next_map p.sentence |
1312 | -(* | AltSentence[Raw,RawSentence query] -> print_not_parsed_main_result_first_page path cg_bin_path results_web_path id file_prefix query pid prev_next_map | |
1313 | -*) | AltSentence((Raw,RawSentence query) :: sentences) -> | |
1428 | + | AltSentence((Raw,RawSentence query) :: sentences) -> | |
1314 | 1429 | print_sentence_to_file path cg_bin_path results_web_path false id file_prefix prev_next_map query sentences stdout |
1315 | 1430 | | _ -> failwith "print_main_result_first_page_sentence: ni" |
1316 | 1431 | |
... | ... | @@ -1320,6 +1435,7 @@ let rec print_main_result_first_page_paragraph path cg_bin_path results_web_path |
1320 | 1435 | let p = List.hd sentences in |
1321 | 1436 | print_main_result_first_page_sentence path cg_bin_path results_web_path id p.file_prefix tokens p.id prev_next_map p.sentence |
1322 | 1437 | | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> print_main_result_first_page_paragraph path cg_bin_path results_web_path id tokens prev_next_map paragraph) |
1438 | + | ErrorParagraph s -> print_sentence_to_file path cg_bin_path results_web_path false id "Er" prev_next_map ("ErrorParagraph: " ^ s) [] stdout | |
1323 | 1439 | |
1324 | 1440 | let rec print_main_result_first_page_text path cg_bin_path results_web_path id tokens = function |
1325 | 1441 | RawText s -> () |
... | ... | @@ -1328,3 +1444,28 @@ let rec print_main_result_first_page_text path cg_bin_path results_web_path id t |
1328 | 1444 | (List.rev (Xlist.fold paragraphs [] find_prev_next_paragraph)) in |
1329 | 1445 | print_main_result_first_page_paragraph path cg_bin_path results_web_path id tokens prev_next_map (List.hd paragraphs) |
1330 | 1446 | | AltText l -> Xlist.iter l (fun (mode,text) -> print_main_result_first_page_text path cg_bin_path results_web_path id tokens text) |
1447 | + | |
1448 | +let to_string_eniam_sentence verbosity tokens (result : eniam_parse_result) = | |
1449 | + let status_string = string_of_status result.status in | |
1450 | + if result.status = NotParsed then | |
1451 | + [status_string ^ ": " ^ cat_tokens_sequence result.text_fragments (ENIAM_LCGchart.select_maximal result.chart1)] | |
1452 | + else [status_string] | |
1453 | + | |
1454 | +let rec to_string_sentence verbosity tokens = function | |
1455 | + RawSentence s -> [] | |
1456 | + | StructSentence(paths,last) -> [] | |
1457 | + | DepSentence paths -> [] | |
1458 | + | ENIAMSentence result -> to_string_eniam_sentence verbosity tokens result | |
1459 | + | QuotedSentences sentences -> List.flatten (Xlist.map sentences (fun p -> to_string_sentence verbosity tokens p.sentence)) | |
1460 | + | AltSentence l -> List.flatten (Xlist.map l (fun (mode,sentence) -> to_string_sentence verbosity tokens sentence)) | |
1461 | + | |
1462 | +let rec to_string_paragraph verbosity tokens = function | |
1463 | + RawParagraph s -> [] | |
1464 | + | StructParagraph sentences -> List.flatten (Xlist.map sentences (fun p -> to_string_sentence verbosity tokens p.sentence)) | |
1465 | + | AltParagraph l -> List.flatten (Xlist.map l (fun (mode,paragraph) -> to_string_paragraph verbosity tokens paragraph)) | |
1466 | + | ErrorParagraph s -> ["SubsyntaxError"] | |
1467 | + | |
1468 | +let rec to_string_text verbosity tokens = function | |
1469 | + RawText s -> [] | |
1470 | + | StructText paragraphs -> List.flatten (Xlist.map paragraphs (to_string_paragraph verbosity tokens)) | |
1471 | + | AltText l -> List.flatten (Xlist.map l (fun (mode,text) -> to_string_text verbosity tokens text)) | |
... | ... |
exec/parser.ml
... | ... | @@ -112,7 +112,7 @@ let rec main_loop sub_in sub_out in_chan out_chan = |
112 | 112 | if text = "" then () else ( |
113 | 113 | let text,tokens,lex_sems,msg = |
114 | 114 | if !lexSemantics_built_in then |
115 | - let text,tokens,msg = ENIAMsubsyntax.catch_parse_text text in | |
115 | + let text,tokens,msg = ENIAMsubsyntax.catch_parse_text true text in | |
116 | 116 | let text,msg = |
117 | 117 | if msg <> "" || not !perform_integration then text,msg else |
118 | 118 | ENIAMpreIntegration.catch_parse_text ENIAMsubsyntaxTypes.Struct tokens text in |
... | ... |
exec/semparser.ml
... | ... | @@ -30,7 +30,7 @@ let load_cats_map filename = |
30 | 30 | | l -> failwith ("load_cats_map: " ^ String.concat "\t" l)) |
31 | 31 | |
32 | 32 | let cats_map = load_cats_map ENIAM_LCGlexiconTypes.user_cats_filename |
33 | -let proj_map = load_cats_map ENIAM_LCGlexiconTypes.user_proj_filename | |
33 | +let coerced_map = load_cats_map ENIAM_LCGlexiconTypes.user_coerced_filename | |
34 | 34 | |
35 | 35 | let subsyntax_built_in = ref true |
36 | 36 | let subsyntax_host = ref "localhost" |
... | ... | @@ -96,19 +96,19 @@ let get_cats cats_map = function |
96 | 96 | | Proper(_,_,_,cats) -> if cats = [] then ["X"] else cats |
97 | 97 | | _ -> ["X"] |
98 | 98 | |
99 | -let expand_projections proj_map cats = | |
100 | - Xlist.rev_map cats (fun cat -> cat, cat :: (try StringMap.find proj_map cat with Not_found -> [])) | |
99 | +let expand_coercions coerced_map cats = | |
100 | + Xlist.rev_map cats (fun cat -> cat, cat :: (try StringMap.find coerced_map cat with Not_found -> [])) | |
101 | 101 | (* StringSet.to_list (Xlist.fold cats StringSet.empty (fun set cat -> |
102 | - let cats = try StringMap.find proj_map cat with Not_found -> [] in | |
102 | + let cats = try StringMap.find coerced_map cat with Not_found -> [] in | |
103 | 103 | Xlist.fold (cat :: cats) set StringSet.add))*) |
104 | 104 | |
105 | -let assign_lex_sems proj_map cats_map tokens = | |
105 | +let assign_lex_sems coerced_map cats_map tokens = | |
106 | 106 | let lex_sems = ExtArray.make (ExtArray.size tokens) ENIAMlexSemanticsTypes.empty_lex_sem in |
107 | 107 | let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in |
108 | 108 | Int.iter 1 (ExtArray.size tokens - 1) (fun i -> |
109 | 109 | let lemma = ENIAMtokens.get_lemma (ExtArray.get tokens i).token in |
110 | 110 | let pos = ENIAMtokens.get_pos (ExtArray.get tokens i).token in |
111 | - let cats = expand_projections proj_map (get_cats cats_map (ExtArray.get tokens i).token) in | |
111 | + let cats = expand_coercions coerced_map (get_cats cats_map (ExtArray.get tokens i).token) in | |
112 | 112 | let frames = |
113 | 113 | Xlist.rev_map (ENIAMvalence.get_aroles [] lemma pos) (fun (sel,arole,arole_attr,arev) -> |
114 | 114 | {ENIAMlexSemanticsTypes.empty_frame with ENIAMlexSemanticsTypes.selectors=sel; ENIAMlexSemanticsTypes.arole=arole; ENIAMlexSemanticsTypes.arole_attr=arole_attr; ENIAMlexSemanticsTypes.arev=arev}) in |
... | ... | @@ -125,7 +125,7 @@ let rec main_loop sub_in sub_out = |
125 | 125 | Printf.fprintf sub_out "%s\n\n%!" text; |
126 | 126 | (Marshal.from_channel sub_in : ENIAMsubsyntaxTypes.text * token_env ExtArray.t * string)) in |
127 | 127 | if msg <> "" then print_endline msg else ( |
128 | - let lex_sems = assign_lex_sems proj_map cats_map tokens in | |
128 | + let lex_sems = assign_lex_sems coerced_map cats_map tokens in | |
129 | 129 | let text = ENIAMexec.translate_text text in |
130 | 130 | (* let text = ENIAMexec.parse !timeout !verbosity rules tokens lex_sems text in *) |
131 | 131 | let text = ENIAMexec.parse !timeout !verbosity rules dep_rules tokens lex_sems text in |
... | ... |
integration/ENIAMpreIntegration.ml
... | ... | @@ -339,6 +339,7 @@ let rec parse_paragraph mode tokens = function |
339 | 339 | let l = Xlist.rev_map l (fun (mode,paragraph) -> |
340 | 340 | mode, parse_paragraph mode tokens paragraph) in |
341 | 341 | AltParagraph(List.rev l) |
342 | + | ErrorParagraph s -> ErrorParagraph s | |
342 | 343 | |
343 | 344 | let rec parse_text mode tokens = function |
344 | 345 | RawText s -> RawText s |
... | ... |
lexSemantics/ENIAMadjuncts.ml
... | ... | @@ -37,7 +37,7 @@ let simplify_position_verb mode l = function (* FIXME: dodać czyszczenie E Pro |
37 | 37 | | E Or -> l |
38 | 38 | | E (CP(CompTypeUndef,CompUndef)) -> l |
39 | 39 | | E (PrepNP(_,prep,Case case)) -> l |
40 | - | E (PrepNCP(prep,Case case,CompTypeUndef,CompUndef)) -> l | |
40 | + | E (PrepNCP(_,prep,Case case,CompTypeUndef,CompUndef)) -> l | |
41 | 41 | | NP(Case "gen") as t -> if mode = "temp" then l else t :: l |
42 | 42 | | NP(Case "acc") as t -> if mode = "dur" then l else t :: l |
43 | 43 | | t -> t :: l |
... | ... | @@ -253,19 +253,19 @@ let simplify_schemata lexemes pos pos2 lemma schemata = |
253 | 253 | "{" ^ String.concat ";" (PhraseSet.fold morfs [] (fun l m -> ENIAMwalStringOf.phrase m :: l)) ^ "}")))); *) |
254 | 254 | schemata |
255 | 255 | |
256 | -let add_adjuncts preps compreps compars pos2 (selectors,cat,schema) = | |
256 | +let add_adjuncts preps compreps compars pos2 (selectors,cat,(*has_context,*)schema) = | |
257 | 257 | let compreps = Xlist.rev_map compreps ENIAMwalRenderer.render_comprep in |
258 | 258 | let prepnps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_prepnp prep cases) in |
259 | 259 | let prepadjps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_prepadjp prep cases) in |
260 | 260 | let compars = Xlist.rev_map compars ENIAMwalRenderer.render_compar in |
261 | 261 | match pos2 with |
262 | - "verb" -> [selectors,cat,schema @ ENIAMwalRenderer.verb_adjuncts_simp @ prepnps @ prepadjps @ compreps @ compars] | |
262 | + "verb" -> [selectors,cat,(*has_context,*)schema @ ENIAMwalRenderer.verb_adjuncts_simp @ prepnps @ prepadjps @ compreps @ compars] | |
263 | 263 | | "noun" -> [ |
264 | - [Nsyn,Eq,["proper"]] @ selectors,cat,ENIAMwalRenderer.proper_noun_adjuncts_simp @ prepnps @ compreps @ compars; | |
265 | - [Nsyn,Eq,["common"];Nsem,Eq,["measure"]] @ selectors,cat,ENIAMwalRenderer.measure_noun_adjuncts_simp @ prepnps @ compreps @ compars; | |
266 | - [Nsyn,Eq,["common"];Nsem,Neq,["measure"]] @ selectors,cat,ENIAMwalRenderer.common_noun_adjuncts_simp @ prepnps @ compreps @ compars] | |
267 | - | "adj" -> [selectors,cat,schema @ ENIAMwalRenderer.adj_adjuncts_simp @ compars] | |
268 | - | "adv" -> [selectors,cat,schema @ ENIAMwalRenderer.adv_adjuncts_simp @ compars] | |
264 | + [Nsyn,Eq,["proper"]] @ selectors,cat,(*has_context,*)ENIAMwalRenderer.proper_noun_adjuncts_simp @ prepnps @ compreps @ compars; | |
265 | + [Nsyn,Eq,["common"];Nsem,Eq,["measure"]] @ selectors,cat,(*has_context,*)ENIAMwalRenderer.measure_noun_adjuncts_simp @ prepnps @ compreps @ compars; | |
266 | + [Nsyn,Eq,["common"];Nsem,Neq,["measure"]] @ selectors,cat,(*has_context,*)ENIAMwalRenderer.common_noun_adjuncts_simp @ prepnps @ compreps @ compars] | |
267 | + | "adj" -> [selectors,cat,(*has_context,*)schema @ ENIAMwalRenderer.adj_adjuncts_simp @ compars] | |
268 | + | "adv" -> [selectors,cat,(*has_context,*)schema @ ENIAMwalRenderer.adv_adjuncts_simp @ compars] | |
269 | 269 | | _ -> [] |
270 | 270 | |
271 | 271 | open ENIAMlexSemanticsTypes |
... | ... |
lexSemantics/ENIAMlexSemantics.ml
... | ... | @@ -23,20 +23,22 @@ open ENIAMlexSemanticsTypes |
23 | 23 | open ENIAMwalTypes |
24 | 24 | open Xstd |
25 | 25 | |
26 | -let find_meaning m = | |
26 | +(*let snode_values = ENIAM_LCGlexiconTypes.SelectorMap.find ENIAMcategoriesPL.selector_values ENIAM_LCGlexiconTypes.SNode*) | |
27 | + | |
28 | +let find_sense m = | |
27 | 29 | try |
28 | - ENIAMplWordnet.find_meaning m.plwnluid | |
30 | + ENIAMplWordnet.find_sense m.plwnluid | |
29 | 31 | with Not_found -> |
30 | - m.name ^ "-" ^ m.variant, [], unknown_meaning_weight | |
32 | + m.name ^ "-" ^ m.variant, [], unknown_sense_weight | |
31 | 33 | |
32 | -let find_prep_meaning lemma hipero = | |
34 | +let find_prep_sense lemma hipero = | |
33 | 35 | let hipero = match hipero with |
34 | 36 | [Predef hipero] -> hipero |
35 | - | _ -> failwith "find_prep_meaning" in | |
36 | - if hipero = "ALL" then lemma, [hipero,0], unknown_meaning_weight else | |
37 | + | _ -> failwith "find_prep_sense" in | |
38 | + if hipero = "ALL" then lemma, [hipero,0], unknown_sense_weight else | |
37 | 39 | let syn_id = StringMap.find !ENIAMplWordnet.predef hipero in |
38 | 40 | let hipero = IntMap.fold (ENIAMplWordnet.get_hipero syn_id) [] (fun hipero syn_id cost -> (ENIAMplWordnet.synset_name syn_id, cost) :: hipero) in |
39 | - lemma, hipero, unknown_meaning_weight | |
41 | + lemma, hipero, unknown_sense_weight | |
40 | 42 | |
41 | 43 | let lex_sie = LCG (ENIAMwalRenderer.render_morf (SimpleLexArg("się",QUB))) |
42 | 44 | |
... | ... | @@ -47,7 +49,7 @@ let rec has_lemma_sie = function |
47 | 49 | (* FIXME: naiwnie wierzymy, że jeśli leksem jest opisany semantycznie w walentym to zawiera ramy dla wszystkich sensów *) |
48 | 50 | let find_senses t s = |
49 | 51 | (*let set = Xlist.fold s.frames StringSet.empty (fun set frame -> |
50 | - Xlist.fold frame.meanings set (fun set (name,hipero,weight) -> | |
52 | + Xlist.fold frame.senses set (fun set (name,hipero,weight) -> | |
51 | 53 | StringSet.add set name)) in*) |
52 | 54 | let senses = match t.token with |
53 | 55 | Lemma(lemma,pos,_) -> ENIAMplWordnet.find_senses lemma pos |
... | ... | @@ -62,15 +64,15 @@ let find_senses t s = |
62 | 64 | | _ -> [] in |
63 | 65 | (* let senses_sie = Xlist.fold senses_sie [] (fun senses_sie (name,hipero,weight) -> |
64 | 66 | if StringSet.mem set name then senses_sie else (name,hipero,weight) :: senses_sie) in |
65 | - let frames = if senses = [] then s.frames else {empty_frame with meanings=senses} :: s.frames in | |
66 | - let frames = if senses_sie = [] then frames else {empty_frame with meanings=senses_sie; | |
67 | + let frames = if senses = [] then s.frames else {empty_frame with senses=senses} :: s.frames in | |
68 | + let frames = if senses_sie = [] then frames else {empty_frame with senses=senses_sie; | |
67 | 69 | positions=[{empty_position with role="Lemma"; mode=["lemma"]; morfs=[lex_sie]; is_necessary=Req}]} :: frames in*) (* FIXME: czy to nie usuwa elementów z ramy? *) |
68 | 70 | let frames = Xlist.rev_map s.frames (fun f -> |
69 | - if f.meanings <> [] then f else | |
71 | + if f.senses <> [] then f else | |
70 | 72 | if has_lemma_sie f.positions then |
71 | - if senses_sie = [] then {f with meanings=[ENIAMtokens.get_lemma t.token ^ " się", [], unknown_meaning_weight]} else {f with meanings=senses_sie} | |
73 | + if senses_sie = [] then {f with senses=[ENIAMtokens.get_lemma t.token ^ " się", [], unknown_sense_weight]} else {f with senses=senses_sie} | |
72 | 74 | else |
73 | - if senses = [] then {f with meanings=[ENIAMtokens.get_lemma t.token, [], unknown_meaning_weight]} else {f with meanings=senses}) in | |
75 | + if senses = [] then {f with senses=[ENIAMtokens.get_lemma t.token, [], unknown_sense_weight]} else {f with senses=senses}) in | |
74 | 76 | {s with frames=frames} |
75 | 77 | |
76 | 78 | let find_selprefs schema = (* FIXME: RelationRole *) |
... | ... | @@ -124,6 +126,7 @@ let rec split_tokens_into_groups_paragraph a = function |
124 | 126 | Xlist.iter sentences (fun p -> split_tokens_into_groups_sentence a p.sentence) |
125 | 127 | | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> |
126 | 128 | split_tokens_into_groups_paragraph a paragraph) |
129 | + | ErrorParagraph s -> () | |
127 | 130 | |
128 | 131 | let rec split_tokens_into_groups_text a = function |
129 | 132 | RawText s -> () |
... | ... | @@ -172,10 +175,10 @@ let semantize lemma pos (selectors,schema) = |
172 | 175 | |
173 | 176 | let load_num_sem filename (num_sem,num_sem_args) = |
174 | 177 | File.fold_tab filename (num_sem,num_sem_args) (fun (num_sem,num_sem_args) -> function |
175 | - [lemma;_;nsems;meaning;sem_args] -> | |
178 | + [lemma;_;nsems;sense;sem_args] -> | |
176 | 179 | let sem_args = Xstring.split "," sem_args in |
177 | 180 | Xlist.fold (Xstring.split "," nsems) num_sem (fun num_sem nsem -> |
178 | - StringMap.add_inc num_sem lemma [nsem,meaning] (fun l -> (nsem,meaning) ::l)), | |
181 | + StringMap.add_inc num_sem lemma [nsem,sense] (fun l -> (nsem,sense) ::l)), | |
179 | 182 | StringMap.add_inc num_sem_args lemma sem_args (fun _ -> failwith "load_num_sem") |
180 | 183 | | _ -> failwith "load_num_sem") |
181 | 184 | |
... | ... | @@ -208,13 +211,13 @@ let mark_nosem frame = |
208 | 211 | let assign_prep_semantics lemma = |
209 | 212 | if StringSet.mem ENIAMcategoriesPL.compar_lexemes lemma then |
210 | 213 | [{empty_frame with |
211 | - meanings = [find_prep_meaning lemma [Predef "ALL"]]; | |
214 | + senses = [find_prep_sense lemma [Predef "ALL"]]; | |
212 | 215 | positions= [{empty_position with |
213 | 216 | dir=Forward_; gf=CORE; |
214 | 217 | morfs=ENIAMwalRenderer.compar_morfs; is_necessary=Req}]; |
215 | 218 | agf="arg"}; |
216 | 219 | {empty_frame with |
217 | - meanings = [find_prep_meaning lemma [Predef "ALL"]]; | |
220 | + senses = [find_prep_sense lemma [Predef "ALL"]]; | |
218 | 221 | positions= [{empty_position with |
219 | 222 | sel_prefs=[SynsetName "ALL"]; dir=Forward_; gf=CORE; |
220 | 223 | morfs=ENIAMwalRenderer.compar_morfs; is_necessary=Req}]; |
... | ... | @@ -223,14 +226,14 @@ let assign_prep_semantics lemma = |
223 | 226 | let roles = try StringMap.find ENIAMlexSemanticsData.prep_roles lemma with Not_found -> [] in |
224 | 227 | (* Printf.printf "assign_prep_semantics: |roles|=%d\n%!" (Xlist.size roles); *) |
225 | 228 | {empty_frame with |
226 | - meanings = [find_prep_meaning lemma [Predef "ALL"]]; | |
229 | + senses = [find_prep_sense lemma [Predef "ALL"]]; | |
227 | 230 | positions= [{empty_position with |
228 | 231 | dir=if lemma="temu" then Backward_ else Forward_; gf=CORE; |
229 | 232 | morfs=ENIAMwalRenderer.prep_morfs; is_necessary=Req}]; |
230 | 233 | agf="arg"} :: |
231 | 234 | (if roles = [] then (* FIXME: zaślepka do usunięcia po stworzeniu listy przyimków *) |
232 | 235 | [{empty_frame with |
233 | - meanings = [find_prep_meaning lemma [Predef "ALL"]]; | |
236 | + senses = [find_prep_sense lemma [Predef "ALL"]]; | |
234 | 237 | positions= [{empty_position with |
235 | 238 | sel_prefs=[SynsetName "ALL"]; dir=if lemma="temu" then Backward_ else Forward_; gf=CORE; |
236 | 239 | morfs=ENIAMwalRenderer.prep_morfs; is_necessary=Req}]; |
... | ... | @@ -238,28 +241,28 @@ let assign_prep_semantics lemma = |
238 | 241 | else |
239 | 242 | Xlist.map roles (function (case,arole,arole_attr,hipero,sel_prefs) -> |
240 | 243 | (* Printf.printf "assign_prep_semantics: case=%s arole=%s arole_attr=%s\n%!" case arole arole_attr; *) |
241 | - let meaning = find_prep_meaning lemma hipero in (* FIXME: zaślepka dla meaning i weight *) | |
244 | + let sense = find_prep_sense lemma hipero in (* FIXME: zaślepka dla sense i weight *) | |
242 | 245 | (* print_endline "assign_prep_semantics 1"; *) |
243 | 246 | let positions = [{empty_position with |
244 | 247 | sel_prefs=sel_prefs; dir=if lemma="temu" then Backward_ else Forward_; gf=CORE; |
245 | 248 | morfs=ENIAMwalRenderer.prep_morfs(*ENIAMwalRenderer.assing_prep_morfs (lemma,case)*); is_necessary=Req}] in |
246 | 249 | (* print_endline "assign_prep_semantics 2"; *) |
247 | - {empty_frame with selectors=[ENIAM_LCGlexiconTypes.Case,ENIAM_LCGlexiconTypes.Eq,[case]]; meanings=[meaning]; positions=find_selprefs positions; | |
250 | + {empty_frame with selectors=[ENIAM_LCGlexiconTypes.Case,ENIAM_LCGlexiconTypes.Eq,[case]]; senses=[sense]; positions=find_selprefs positions; | |
248 | 251 | arole=arole; arole_attr=arole_attr; arev=false; agf="adjunct"})) |
249 | 252 | |
250 | 253 | let assign_num_semantics lemma = |
251 | 254 | let sems = try StringMap.find !num_sem lemma with Not_found -> [] in |
252 | - Xlist.map sems (fun (nsem,meaning) -> | |
253 | - let meaning,arole_attr = | |
254 | - if meaning = "" then (lemma, [], unknown_meaning_weight),"Approximate" | |
255 | - else (meaning, [], unknown_meaning_weight),"Exact" in | |
255 | + Xlist.map sems (fun (nsem,sense) -> | |
256 | + let sense,arole_attr = | |
257 | + if sense = "" then (lemma, [], unknown_sense_weight),"Approximate" | |
258 | + else (sense, [], unknown_sense_weight),"Exact" in | |
256 | 259 | let arole = match nsem with |
257 | 260 | "count" -> "Count" |
258 | 261 | | "mass" -> "Measure" |
259 | 262 | | _ -> failwith "assign_num_semantics" in |
260 | 263 | {empty_frame with |
261 | 264 | selectors=[ENIAM_LCGlexiconTypes.Nsem,ENIAM_LCGlexiconTypes.Eq,[nsem]]; |
262 | - meanings=[meaning]; arole=arole; arole_attr=arole_attr; arev=false}) | |
265 | + senses=[sense]; arole=arole; arole_attr=arole_attr; arev=false}) | |
263 | 266 | |
264 | 267 | let assign_symb_num_semantics lemma pos = |
265 | 268 | let arole_attr = match pos with |
... | ... | @@ -270,14 +273,14 @@ let assign_symb_num_semantics lemma pos = |
270 | 273 | | _ -> failwith "assign_symb_num_semantics" in |
271 | 274 | [{empty_frame with |
272 | 275 | selectors=[ENIAM_LCGlexiconTypes.Nsem,ENIAM_LCGlexiconTypes.Eq,["count"]]; |
273 | - meanings=[lemma, [], unknown_meaning_weight]; arole="Count"; arole_attr=arole_attr; arev=false}] | |
276 | + senses=[lemma, [], unknown_sense_weight]; arole="Count"; arole_attr=arole_attr; arev=false}] | |
274 | 277 | |
275 | 278 | (*let set_context lemma pos frame = |
276 | 279 | if pos = "fin" || pos = "praet" || pos = "winien" || pos = "inf" || pos = "pred" || pos = "impt" || pos = "imps" || pos = "ger" || pos = "pcon" || pos = "pant" then |
277 | 280 | [{frame with has_context=true}] else |
278 | 281 | if pos = "subst" then |
279 | - if frame.meanings = [] then failwith "set_context" else | |
280 | - let Xlist.fold frame.meanings (fun -> ) in | |
282 | + if frame.senses = [] then failwith "set_context" else | |
283 | + let Xlist.fold frame.senses (fun -> ) in | |
281 | 284 | else [{frame with has_context=true}](*wydarzenie 1 czynność 1*) (*czynności 1 czyn 1*)*) |
282 | 285 | |
283 | 286 | let assign_valence tokens lex_sems group = |
... | ... | @@ -302,17 +305,17 @@ let assign_valence tokens lex_sems group = |
302 | 305 | let schemata = ENIAMadjuncts.simplify_schemata lexemes pos pos2 lemma schemata1 in |
303 | 306 | (* Printf.printf "C %s |schemata|=%d\n" lemma (Xlist.size schemata); *) |
304 | 307 | let schemata = Xlist.rev_map schemata (fun (selectors,schema) -> |
305 | - selectors,["X",["X"]],ENIAMwalRenderer.render_simple_schema schema) in | |
308 | + selectors,["X",["X"]],(*snode_values,*)ENIAMwalRenderer.render_simple_schema schema) in | |
306 | 309 | let schemata = List.flatten (Xlist.rev_map schemata (ENIAMadjuncts.add_adjuncts preps compreps compars pos2)) in |
307 | - let schemata = if schemata = [] then [[],["X",["X"]],[]] else schemata in | |
310 | + let schemata = if schemata = [] then [[],["X",["X"]],(*snode_values,*)[]] else schemata in | |
308 | 311 | (* Printf.printf "D %s |schemata|=%d\n" lemma (Xlist.size schemata); *) |
309 | 312 | let entries = List.flatten (Xlist.rev_map entries (ENIAMvalence.transform_lex_entry pos lemma)) in |
310 | 313 | let entries = Xlist.map entries (fun (selectors,entry) -> |
311 | 314 | selectors,ENIAMwalRenderer.render_lex_entry entry) in |
312 | - let connected = List.flatten (Xlist.map connected (fun (sopinion,fopinion,meanings,neg,pred,aspect,schema1) -> | |
315 | + let connected = List.flatten (Xlist.map connected (fun (sopinion,fopinion,senses,neg,pred,aspect,schema1) -> | |
313 | 316 | List.flatten (Xlist.rev_map (ENIAMvalence.transform_entry pos lemma neg pred aspect schema1) (fun (selectors,schema) -> |
314 | 317 | Xlist.rev_map (ENIAMvalence.get_aroles schema1 lemma pos) (fun (sel,arole,arole_attr,arev) -> |
315 | - {selectors=sel @ selectors; meanings=Xlist.map meanings find_meaning; positions=schema; | |
318 | + {empty_frame with selectors=sel @ selectors; senses=Xlist.map senses find_sense; positions=schema; | |
316 | 319 | arole=arole; arole_attr=arole_attr; arev=arev; agf=""; rev_hipero=false; sem_args=[]; sopinion=sopinion; fopinion=fopinion}))))) in |
317 | 320 | (* Printf.printf "E %s |connected|=%d\n" lemma (Xlist.size connected); *) |
318 | 321 | let connected = if connected = [] then List.flatten (Xlist.rev_map (make_unique schemata1) (semantize lemma pos)) else connected in |
... | ... | @@ -344,7 +347,7 @@ let assign_valence tokens lex_sems group = |
344 | 347 | let connected = Xlist.rev_map connected mark_nosem in |
345 | 348 | let connected = if connected = [] then semantize lemma pos ([],[]) else connected in |
346 | 349 | let connected = Xlist.rev_map connected (fun f -> |
347 | - if f.meanings = [] then {f with meanings=[lemma, ["X",1], unknown_meaning_weight]} else f) in | |
350 | + if f.senses = [] then {f with senses=[lemma, ["X",1], unknown_sense_weight]} else f) in | |
348 | 351 | (* let connected = List.flatten (Xlist.rev_map connected (set_context lemma pos)) in *) |
349 | 352 | (* Printf.printf "K %s |connected|=%d\n" lemma (Xlist.size connected); *) |
350 | 353 | ExtArray.set lex_sems id {(*(ExtArray.get lex_sems id) with*) |
... | ... | @@ -404,11 +407,11 @@ let disambiguate_senses lex_sems group = |
404 | 407 | Xlist.iter group (fun id -> |
405 | 408 | let t = ExtArray.get lex_sems id in |
406 | 409 | ExtArray.set lex_sems id {t with frames=Xlist.map t.frames (fun frame -> |
407 | - let meanings = Xlist.map frame.meanings (fun (name,hipero,weight) -> | |
410 | + let senses = Xlist.map frame.senses (fun (name,hipero,weight) -> | |
408 | 411 | let hipero = Xlist.fold hipero ["ALL",0] (fun hipero (name,cost) -> |
409 | 412 | if StringSet.mem prefs name then (name,cost) :: hipero else hipero) in |
410 | 413 | name,hipero,weight) in |
411 | - {frame with meanings=meanings})}) | |
414 | + {frame with senses=senses})}) | |
412 | 415 | |
413 | 416 | let remove_unused_tokens tokens groups = |
414 | 417 | let set = Xlist.fold groups IntSet.empty (fun set group -> |
... | ... | @@ -459,9 +462,9 @@ let rec create_tokens_for_artificial_nodes_rec tokens lex_sems = function |
459 | 462 | Node t -> |
460 | 463 | let t = if t.id = 0 then ( |
461 | 464 | let id = ExtArray.add tokens empty_token_env in |
462 | - let lex_sem = {empty_lex_sem with frames=[{empty_frame with meanings=[t.lemma, [t.lemma,0], unknown_meaning_weight]}]} in | |
465 | + let lex_sem = {empty_lex_sem with frames=[{empty_frame with senses=[t.lemma, [t.lemma,0], unknown_sense_weight]}]} in | |
463 | 466 | let id2 = ExtArray.add lex_sems lex_sem in |
464 | - if id <>id2 then failwith "create_tokens_for_artificial_nodes_rec" else | |
467 | + if id <> id2 then failwith "create_tokens_for_artificial_nodes_rec: tokens inconsistent with lex_sems" else | |
465 | 468 | let t = if t.symbol = Dot then |
466 | 469 | {t with symbol = match t.pos with |
467 | 470 | "<root>" -> Tuple[Val "<root>"] |
... | ... |
lexSemantics/ENIAMlexSemanticsData.ml
... | ... | @@ -239,7 +239,7 @@ let qub_roles = Xlist.fold [ |
239 | 239 | ] StringMap.empty (fun map (k,r,a) -> StringMap.add_inc map k [r,a] (fun l -> (r,a) :: l)) |
240 | 240 | |
241 | 241 | |
242 | -let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_prefs *) | |
242 | +let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,sense/hipero,sel_prefs *) | |
243 | 243 | "od","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"]; |
244 | 244 | "spod","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"]; |
245 | 245 | "spomiędzy","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"]; |
... | ... |
lexSemantics/ENIAMlexSemanticsHTMLof.ml
... | ... | @@ -60,14 +60,15 @@ let html_of_lex_sems tokens lex_sems = |
60 | 60 | let core = Printf.sprintf "%3d %s %s" id orth lemma in |
61 | 61 | let lex_entries = Xlist.map t.lex_entries (fun (selectors,s) -> |
62 | 62 | "  [" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] " ^ ENIAM_LCGstringOf.grammar_symbol 0 s) in |
63 | - let schemata = Xlist.map t.schemata (fun (selectors,cat,l) -> | |
63 | + let schemata = Xlist.map t.schemata (fun (selectors,cat,(*snode,*)l) -> | |
64 | 64 | "  [" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "]" ^ |
65 | 65 | String.concat "," (Xlist.map cat (fun (m,l) -> m ^ "[" ^ String.concat "," l ^ "]")) ^ |
66 | + (*String.concat "|" snode ^*) | |
66 | 67 | " {" ^ String.concat ", " (Xlist.map l (fun (d,s) -> |
67 | 68 | ENIAM_LCGstringOf.direction d ^ ENIAM_LCGstringOf.grammar_symbol 0 s)) ^ "}") in |
68 | - (* let frames = Xlist.map t.frames (fun (selectors,meanings,schema) -> FIXME | |
69 | + (* let frames = Xlist.map t.frames (fun (selectors,senses,schema) -> FIXME | |
69 | 70 | "  [" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] {" ^ ENIAMwalStringOf.schema schema ^ "} " ^ |
70 | - String.concat ", " (Xlist.map meanings (fun m -> ENIAMwalStringOf.meaning m))) in *) | |
71 | + String.concat ", " (Xlist.map senses (fun m -> ENIAMwalStringOf.sense m))) in *) | |
71 | 72 | (String.concat "<br>\n " ([core] @ schemata (*@ frames*) @ lex_entries)) :: l))) ^ |
72 | 73 | "</P>" |
73 | 74 | |
... | ... | @@ -76,7 +77,7 @@ let html_of_lex_sems tokens lex_sems = |
76 | 77 | lex_entries: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * |
77 | 78 | ENIAM_LCGtypes.grammar_symbol) list; |
78 | 79 | frames: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * |
79 | - ENIAMwalTypes.meaning list * ENIAMwalTypes.position list) list;*) | |
80 | + ENIAMwalTypes.sense list * ENIAMwalTypes.position list) list;*) | |
80 | 81 | |
81 | 82 | let text_and_tokens_and_lex_sems text tokens lex_sems msg = |
82 | 83 | if msg = "" then sprintf "%s\n%s<BR>\n%s<BR>\n%s<BR>\n%s\n" html_header |
... | ... |
lexSemantics/ENIAMlexSemanticsStringOf.ml
... | ... | @@ -40,13 +40,14 @@ let string_of_lex_sems tokens lex_sems = |
40 | 40 | let core = Printf.sprintf "%3d %s %s" id orth lemma in |
41 | 41 | let lex_entries = Xlist.map t.lex_entries (fun (selectors,s) -> |
42 | 42 | "&[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] " ^ ENIAM_LCGstringOf.grammar_symbol 0 s) in |
43 | - let schemata = Xlist.map t.schemata (fun (selectors,cat,l) -> | |
43 | + let schemata = Xlist.map t.schemata (fun (selectors,cat,(*snode,*)l) -> | |
44 | 44 | "[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "]" ^ |
45 | 45 | String.concat "," (Xlist.map cat (fun (m,l) -> m ^ "[" ^ String.concat "," l ^ "]")) ^ |
46 | + (*String.concat "|" snode ^*) | |
46 | 47 | " {" ^ String.concat "," (Xlist.map l (fun (d,s) -> |
47 | 48 | ENIAM_LCGstringOf.direction d ^ ENIAM_LCGstringOf.grammar_symbol 0 s)) ^ "}") in |
48 | 49 | let frames = Xlist.map t.frames (fun f -> |
49 | 50 | "*" ^ arole f ^ "[" ^ ENIAMcategoriesPL.string_of_selectors f.selectors ^ "] {" ^ ENIAMwalStringOf.schema f.positions ^ "} " ^ |
50 | - String.concat "," (Xlist.map f.meanings (fun (sense,hipero,weight) -> | |
51 | + String.concat "," (Xlist.map f.senses (fun (sense,hipero,weight) -> | |
51 | 52 | Printf.sprintf "%s[%s]%.2f" sense (String.concat "," (Xlist.map hipero (fun (s,n) -> s ^ " " ^ string_of_int n))) weight))) in |
52 | 53 | (String.concat "\n " ([core] @ schemata @ frames @ lex_entries)) :: l))) |
... | ... |
lexSemantics/ENIAMlexSemanticsTypes.ml
... | ... | @@ -22,7 +22,8 @@ open Xstd |
22 | 22 | |
23 | 23 | type frame = { |
24 | 24 | selectors: (ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list; |
25 | - meanings: ((*ENIAMwalTypes.meaning **) string * (string * int) list * float) list; | |
25 | + senses: ((*ENIAMwalTypes.sense **) string * (string * int) list * float) list; | |
26 | + cats: (string * string list) list; | |
26 | 27 | positions: ENIAMwalTypes.position list; |
27 | 28 | arole: string; |
28 | 29 | arole_attr: string; |
... | ... | @@ -30,17 +31,18 @@ type frame = { |
30 | 31 | agf: string; |
31 | 32 | sem_args: string list; |
32 | 33 | rev_hipero: bool; |
33 | - (* has_context: bool; *) | |
34 | + (*snode: string list;*) | |
34 | 35 | sopinion: ENIAMwalTypes.opinion; |
35 | 36 | fopinion: ENIAMwalTypes.opinion; |
36 | 37 | } |
37 | 38 | |
38 | -let empty_frame = {selectors=[]; meanings=[]; positions=[]; arole=""; arole_attr=""; arev=false; agf=""; sem_args=[]; rev_hipero=false; (*has_context=false;*) | |
39 | +let empty_frame = {selectors=[]; senses=[]; cats=["X",["X"]]; positions=[]; arole=""; arole_attr=""; arev=false; agf=""; sem_args=[]; rev_hipero=false; (*snode=[];*) | |
39 | 40 | sopinion=ENIAMwalTypes.Nieokreslony; fopinion=ENIAMwalTypes.Nieokreslony} |
40 | 41 | |
41 | 42 | type lex_sem = { |
42 | 43 | schemata: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * |
43 | 44 | (string * string list) list * (* sensy *) |
45 | + (*string list **) (* has_context *) | |
44 | 46 | (ENIAM_LCGtypes.direction * ENIAM_LCGtypes.grammar_symbol) list) list; |
45 | 47 | lex_entries: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * |
46 | 48 | ENIAM_LCGtypes.grammar_symbol) list; |
... | ... | @@ -49,10 +51,10 @@ type lex_sem = { |
49 | 51 | } |
50 | 52 | |
51 | 53 | let empty_lex_sem = { |
52 | - schemata=[]; lex_entries=[]; frames=[]; (*cats=["X",["X"]]*)} | |
54 | + schemata=[]; lex_entries=[]; frames=[]} | |
53 | 55 | |
54 | 56 | let hipero_threshold = 3 |
55 | -let unknown_meaning_weight = -1. | |
57 | +let unknown_sense_weight = -1. | |
56 | 58 | |
57 | 59 | let lu_filename = resource_path ^ "/plWordnet/lu.tab" |
58 | 60 | let ex_hipo_filename = resource_path ^ "/plWordnet/ex_hipo.tab" |
... | ... | @@ -61,5 +63,5 @@ let syn_filename = resource_path ^ "/plWordnet/syn.tab" |
61 | 63 | let predef_filename = resource_path ^ "/lexSemantics/predef_prefs.tab" |
62 | 64 | let proper_classes_filename = resource_path ^ "/lexSemantics/proper_classes.tab" |
63 | 65 | |
64 | -let proj_filename = ENIAMwalTypes.data_path ^ "/projections.tab" | |
65 | -let proper_meanings_filename = ENIAMwalTypes.data_path ^ "/proper_meanings.tab" | |
66 | +let coercions_filename = ENIAMwalTypes.data_path ^ "/coercions.tab" | |
67 | +let proper_cats_filename = ENIAMwalTypes.data_path ^ "/proper_cats.tab" | |
... | ... |
lexSemantics/ENIAMplWordnet.ml
... | ... | @@ -155,7 +155,7 @@ let find_proper_senses senses = |
155 | 155 | List.flatten (Xlist.rev_map senses (fun sense -> |
156 | 156 | try StringMap.find !proper_classes sense with Not_found -> failwith ("find_proper_senses: " ^ sense))) |
157 | 157 | |
158 | -let find_meaning lu_id = | |
158 | +let find_sense lu_id = | |
159 | 159 | let lemma,variant,syn_id = IntMap.find !lu_names lu_id in |
160 | 160 | lemma ^ "-" ^ variant, |
161 | 161 | IntMap.fold (get_hipero syn_id) [] (fun hipero syn_id cost -> (synset_name syn_id, cost) :: hipero), |
... | ... |
lexSemantics/ENIAMvalence.ml
... | ... | @@ -56,6 +56,7 @@ let transform_gdy = function |
56 | 56 | | "imperative" -> [Comp "gdy"] |
57 | 57 | | "conditional" -> [Comp "gdyby"] |
58 | 58 | | "gerundial" -> [Comp "gdy"] |
59 | + | "no-subj" -> [Comp "gdy"] | |
59 | 60 | | "" -> [Comp "gdy";Comp "gdyby"] |
60 | 61 | | s -> failwith ("transform_gdy: " ^ s) |
61 | 62 | |
... | ... | @@ -232,9 +233,29 @@ let transform_qub_phrase lemma = function |
232 | 233 | | phrase -> failwith ("transform_qub_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase) |
233 | 234 | |
234 | 235 | let transform_qub_pos lemma = function |
235 | - | QUB as morf -> [morf] | |
236 | + | QUB as morf -> [morf] | |
236 | 237 | | pos -> failwith ("transform_qub_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos) |
237 | 238 | |
239 | +let transform_interj_phrase lemma = function | |
240 | + NP(Case "nom") as morf -> [morf] | |
241 | + | Null -> [Null] | |
242 | + | phrase -> failwith ("transform_interj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase) | |
243 | + | |
244 | +let transform_interj_pos lemma = function | |
245 | + | pos -> failwith ("transform_interj_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos) | |
246 | + | |
247 | +let transform_sinterj_phrase lemma = function | |
248 | + | phrase -> failwith ("transform_sinterj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase) | |
249 | + | |
250 | +let transform_sinterj_pos lemma = function | |
251 | + | pos -> failwith ("transform_sinterj_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos) | |
252 | + | |
253 | +let transform_aglt_phrase lemma = function | |
254 | + | phrase -> failwith ("transform_aglt_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase) | |
255 | + | |
256 | +let transform_aglt_pos lemma = function | |
257 | + | pos -> failwith ("transform_aglt_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos) | |
258 | + | |
238 | 259 | let transform_siebie_phrase lemma = function |
239 | 260 | | phrase -> failwith ("transform_siebie_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase) |
240 | 261 | |
... | ... | @@ -243,9 +264,9 @@ let transform_siebie_pos lemma = function |
243 | 264 | | pos -> failwith ("transform_siebie_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos) |
244 | 265 | |
245 | 266 | let transform_pers_subj_phrase lemma negation mood = function (* FIXME: prepnp(na,loc) *) |
246 | - | NP(Str) -> [NP(NomAgr)(*;NumP(NomAgr)*)] | |
267 | + | NP(Str) -> [NP(NomAgr);NP(VocAgr)(*;NumP(NomAgr)*)] | |
247 | 268 | | NP(Part) -> [NP(Case "gen");NP(Case "acc")(*;NumP(Case "gen");NumP(Case "acc")*)] (* tylko w 'nalewać', 'nalać', 'ponalewać', 'najechać','uzbierać' *) |
248 | - | NCP(Str,ctype,comp) -> [NCP(NomAgr,ctype,comp)] | |
269 | + | NCP(Str,ctype,comp) -> [NCP(NomAgr,ctype,comp);NCP(VocAgr,ctype,comp)] | |
249 | 270 | | CP(ctype,comp) as morf -> [morf] |
250 | 271 | | InfP _ as morf -> [morf] |
251 | 272 | | Or as morf -> [morf] |
... | ... | @@ -265,7 +286,7 @@ let transform_pers_subj_pos lemma negation mood = function |
265 | 286 | let transform_ger_subj_phrase lemma negation mood control = function |
266 | 287 | | NP(Str) -> [NP(Case "gen");PrepNP(Pnosem,"przez",Case "acc")(*;NumP(Case "gen")*)(*;PrepNumP("przez",Case "acc")*)] (* FIXME: czy przez:acc jest możliwe? *) |
267 | 288 | | NP(Part) -> [NP(Case "gen")(*;NP(Case "acc")*)(*;NumP(Case "gen");NumP(Case "acc")*)] |
268 | - | NCP(Str,ctype,comp) -> [NCP(Case "gen",ctype,comp);PrepNCP("przez",Case "acc",ctype,comp)] (* FIXME: czy przez:acc jest możliwe? *) | |
289 | + | NCP(Str,ctype,comp) -> [NCP(Case "gen",ctype,comp);PrepNCP(Pnosem,"przez",Case "acc",ctype,comp)] (* FIXME: czy przez:acc jest możliwe? *) | |
269 | 290 | | CP(ctype,comp) as morf -> [morf] |
270 | 291 | | InfP _ as morf -> [morf] (* FIXME: czy to jest możliwe? *) |
271 | 292 | | Or as morf -> [morf] |
... | ... | @@ -284,7 +305,7 @@ let transform_ger_subj_pos lemma negation mood = function (* FIXME: ADV(_) *) |
284 | 305 | |
285 | 306 | let transform_ppas_subj_phrase lemma negation mood control = function |
286 | 307 | | NP(Str) -> [PrepNP(Pnosem,"przez",Case "acc")(*;PrepNumP("przez",Case "acc")*)] |
287 | - | NCP(Str,ctype,comp) -> [PrepNCP("przez",Case "acc",ctype,comp)] | |
308 | + | NCP(Str,ctype,comp) -> [PrepNCP(Pnosem,"przez",Case "acc",ctype,comp)] | |
288 | 309 | | CP(ctype,comp) as morf -> [morf] |
289 | 310 | | Pro -> if control then [Pro] else [Null] |
290 | 311 | | morf -> failwith ("transform_ppas_subj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf) |
... | ... | @@ -300,6 +321,7 @@ let transform_pers_phrase lemma negation mood = function |
300 | 321 | | AdjP(Str) -> Xlist.map (transform_str mood negation) (fun case -> AdjP case) (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *) |
301 | 322 | | AdjP CaseAgr as morf -> if mood = "gerundial" then [AdjP AllAgr] else (failwith ("transform_pers_phrase2: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf)) |
302 | 323 | | AdjP(Case _) as morf -> [morf] (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *) |
324 | + | AdjP(NomAgr) as morf -> if mood = "no-subj" then [AdjP(Case "nom")] else [morf] | |
303 | 325 | | CP(ctype,comp) as morf -> [morf] |
304 | 326 | | PrepNP _ as morf -> [morf] |
305 | 327 | | PrepAdjP _ as morf -> [morf] (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *) |
... | ... | @@ -348,7 +370,7 @@ let transform_pers_pos lemma negation mood = function |
348 | 370 | let rec transform_comps negation mood = function |
349 | 371 | | CP(ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> CP(ctype,comp)) |
350 | 372 | | NCP(case,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> NCP(case,ctype,comp)) |
351 | - | PrepNCP(prep,case,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> PrepNCP(prep,case,ctype,comp)) | |
373 | + | PrepNCP(psem,prep,case,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> PrepNCP(psem,prep,case,ctype,comp)) | |
352 | 374 | | E phrase -> Xlist.map (transform_comps negation mood phrase) (fun phrase -> E phrase) |
353 | 375 | | morf -> [morf] |
354 | 376 | |
... | ... | @@ -363,31 +385,31 @@ let transform_preps morf = |
363 | 385 | | SimpleLexArg(lex,PREP c) -> if is_compar lex then SimpleLexArg(lex,COMPAR c) else SimpleLexArg(lex,PREP c) |
364 | 386 | | PrepNP(psem,prep,c) -> if is_compar prep then ComparP(prep,c) else PrepNP(psem,prep,c) |
365 | 387 | | PrepAdjP(prep,c) -> if is_compar prep then ComparP(prep,c) else PrepAdjP(prep,c) |
366 | - | PrepNCP(prep,case,ctype,comp) as morf -> if is_compar prep then failwith "transform_preps" else morf | |
388 | + | PrepNCP(psem,prep,case,ctype,comp) as morf -> if is_compar prep then failwith "transform_preps 1" else morf | |
367 | 389 | | morf -> morf in |
368 | 390 | match morf with |
369 | 391 | | ComparP(prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> ComparP(prep,Case case)) |
370 | - | ComparP _ -> failwith "transform_preps" | |
392 | + | ComparP _ -> failwith "transform_preps 2" | |
371 | 393 | | LexArg(id,lex,COMPAR Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> LexArg(id,lex,COMPAR (Case case))) |
372 | 394 | | SimpleLexArg(lex,COMPAR Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> SimpleLexArg(lex,COMPAR (Case case))) |
373 | 395 | | LexArg(id,lex,COMPAR (Case _)) as morf -> [morf] |
374 | 396 | | SimpleLexArg(lex,COMPAR (Case _)) as morf -> [morf] |
375 | - | LexArg(id,lex,COMPAR _) -> failwith "transform_preps" | |
376 | - | SimpleLexArg(lex,COMPAR _) -> failwith "transform_preps" | |
397 | + | LexArg(id,lex,COMPAR _) -> failwith "transform_preps 3" | |
398 | + | SimpleLexArg(lex,COMPAR _) -> failwith "transform_preps 4" | |
377 | 399 | | PrepNP(sem,"per",Str) -> [PrepNP(sem,"per",Case "nom");PrepNP(sem,"per",Case "voc")] (* FIXME: voc do poprawienie w leksykonie *) |
378 | 400 | | PrepNP(_,_,Case _) as morf -> [morf] |
379 | 401 | | PrepAdjP(_,Case _) as morf -> [morf] |
380 | - | PrepNCP(_,Case _,_,_) as morf -> [morf] | |
381 | - | PrepNP(_,"_",CaseUndef) as morf -> [morf] | |
382 | - | PrepNP _ -> failwith "transform_preps" | |
383 | - | PrepAdjP _ -> failwith "transform_preps" | |
384 | - | PrepNCP _ -> failwith "transform_preps" | |
402 | + | PrepNCP(_,_,Case _,_,_) as morf -> [morf] | |
403 | + | PrepNP(_,_,CaseUndef) as morf -> [morf] | |
404 | + | PrepNP _ as morf -> failwith ("transform_preps 5: " ^ ENIAMwalStringOf.phrase morf) | |
405 | + | PrepAdjP _ -> failwith "transform_preps 6" | |
406 | + | PrepNCP _ -> failwith "transform_preps 7" | |
385 | 407 | | LexArg(id,"w",PREP Str) -> [LexArg(id,"w",PREP (Case "acc"));LexArg(id,"w",PREP (Case "loc"));] |
386 | 408 | | SimpleLexArg("w",PREP Str) -> [SimpleLexArg("w",PREP (Case "acc"));SimpleLexArg("w",PREP (Case "loc"))] |
387 | 409 | | LexArg(id,lex,PREP (Case _)) as morf -> [morf] |
388 | 410 | | SimpleLexArg(lex,PREP (Case _)) as morf -> [morf] |
389 | - | LexArg(id,lex,PREP _) -> failwith "transform_preps" | |
390 | - | SimpleLexArg(lex,PREP _) -> failwith "transform_preps" | |
411 | + | LexArg(id,lex,PREP _) -> failwith "transform_preps 8" | |
412 | + | SimpleLexArg(lex,PREP _) -> failwith "transform_preps 9" | |
391 | 413 | | morf -> [morf] |
392 | 414 | |
393 | 415 | let transform_pers_schema lemma negation mood schema = |
... | ... | @@ -445,7 +467,8 @@ let transform_ger_schema lemma negation schema = (* FIXME: zakładam, że ger ze |
445 | 467 | | phrase -> transform_pers_phrase lemma negation "gerundial" phrase))}) |
446 | 468 | |
447 | 469 | let transform_ppas_schema lemma negation mood schema = |
448 | - if not (Xlist.fold schema false (fun b p -> if p.gf = OBJ then true else b)) then raise Not_found else | |
470 | + if not (Xlist.fold schema false (fun b p -> if p.gf = OBJ then true else b)) then | |
471 | + (*failwith ("transform_ppas_schema: attempt to make ppas schema for lemma " ^ lemma ^ "without OBJ arg")*)raise Not_found else | |
449 | 472 | Xlist.map schema (fun s -> |
450 | 473 | let morfs = List.flatten (Xlist.map s.morfs (transform_comps negation mood)) in |
451 | 474 | let morfs = List.flatten (Xlist.map morfs transform_preps) in |
... | ... | @@ -488,6 +511,9 @@ let transform_schema pos lemma schema = |
488 | 511 | | "comp" -> transform_comp_phrase,transform_comp_pos |
489 | 512 | | "qub" -> transform_qub_phrase,transform_qub_pos |
490 | 513 | | "siebie" -> transform_siebie_phrase,transform_siebie_pos |
514 | + | "interj" -> transform_interj_phrase,transform_interj_pos | |
515 | + | "sinterj" -> transform_sinterj_phrase,transform_interj_pos | |
516 | + | "aglt" -> transform_aglt_phrase,transform_interj_pos | |
491 | 517 | | _ -> failwith "transform_schema" |
492 | 518 | in |
493 | 519 | Xlist.map schema (fun s -> |
... | ... | @@ -524,21 +550,26 @@ let aspect_sel = function |
524 | 550 | open ENIAM_LCGlexiconTypes |
525 | 551 | |
526 | 552 | let transform_entry pos lemma negation pred aspect schema = |
527 | - if pos = "subst" || pos = "depr" then ( | |
553 | + match pos with | |
554 | + "subst" |"depr" -> | |
528 | 555 | if negation <> NegationUndef || pred <> PredFalse || aspect <> AspectUndef then failwith ("transform_entry 1"); |
529 | - [[],transform_schema "subst" lemma schema]) else | |
530 | - if pos = "adj" || pos = "adjc" || pos = "adjp" then ( | |
556 | + [[],transform_schema "subst" lemma schema] | |
557 | + | "adj" |"adjc" |"adjp" -> | |
531 | 558 | if negation <> NegationUndef || aspect <> AspectUndef then failwith ("transform_entry 2"); |
532 | 559 | let sel = match pred with PredTrue -> [Case,Eq,["pred"]] | _ -> [] in |
533 | - [sel,transform_schema "adj" lemma schema]) else | |
534 | - if pos = "adv" || pos = "prep" || pos = "comprep" || pos = "comp" || pos = "compar" || pos = "qub" || pos = "siebie" then ( | |
560 | + [sel,transform_schema "adj" lemma schema] | |
561 | + | "adv" | "prep" | "comprep" | "comp" | "compar" | "qub" | "siebie" -> | |
535 | 562 | if negation <> NegationUndef || (*pred <> PredFalse ||*) aspect <> AspectUndef then failwith ("transform_entry 3"); (* FIXME: typy przysłówków *) |
536 | - [[],transform_schema pos lemma schema]) else | |
563 | + [[],transform_schema pos lemma schema] | |
564 | + | _ -> | |
537 | 565 | if pred <> PredFalse then failwith ("transform_entry 4") else |
538 | 566 | if pos = "num" || pos = "intnum" then ( |
539 | 567 | if negation <> NegationUndef || aspect <> AspectUndef then failwith ("transform_entry 5"); |
540 | 568 | Xlist.map ["congr";"rec"] (fun acm -> |
541 | 569 | [Acm,Eq,[acm]],transform_num_schema acm schema)) else |
570 | + if pos = "interj" then ( | |
571 | + if negation <> NegationUndef || pred <> PredFalse || aspect <> AspectUndef then failwith ("transform_entry 6"); | |
572 | + [[],transform_schema "interj" lemma schema]) else | |
542 | 573 | List.flatten (Xlist.map (expand_negation negation) (fun negation -> |
543 | 574 | let sel = [Negation,Eq,[ENIAMwalStringOf.negation negation]] @ aspect_sel aspect in |
544 | 575 | if pos = "fin" || pos = "bedzie" then |
... | ... | @@ -555,7 +586,7 @@ let transform_entry pos lemma negation pred aspect schema = |
555 | 586 | [sel @ [Mood,Eq,["indicative"]],transform_pers_schema lemma negation "indicative" schema] else |
556 | 587 | if pos = "pcon" || pos = "pant" || pos = "inf" || pos = "pact" then |
557 | 588 | (* let role,role_attr = try get_role SUBJ schema with Not_found -> "Initiator","" in *) |
558 | - [sel, transform_nosubj_schema lemma negation "indicative" schema] else | |
589 | + [sel, transform_nosubj_schema lemma negation "no-subj" schema] else | |
559 | 590 | if pos = "ppas" then |
560 | 591 | try |
561 | 592 | (* let role,role_attr = try get_role OBJ schema with Not_found -> "Theme","" in *) |
... | ... | @@ -563,6 +594,7 @@ let transform_entry pos lemma negation pred aspect schema = |
563 | 594 | with Not_found -> [] else |
564 | 595 | if pos = "ger" then |
565 | 596 | [sel,transform_ger_schema lemma negation schema] else |
597 | + if schema = [] then [[],[]] else | |
566 | 598 | failwith ("transform_entry: " ^ pos))) |
567 | 599 | |
568 | 600 | let transform_lex_entry pos lemma = function |
... | ... |
lexSemantics/ENIAMwalParser.ml
... | ... | @@ -256,7 +256,7 @@ let rec parse_phrase = function |
256 | 256 | | "comparp",[[Text prep]] -> ComparP(prep,Str) |
257 | 257 | | "cp",[ctype;comp] -> CP(parse_ctype ctype,parse_comp comp) |
258 | 258 | | "ncp",[case;ctype;comp] -> NCP(parse_case case,parse_ctype ctype,parse_comp comp) |
259 | - | "prepncp",[[Text prep];case;ctype;comp] -> PrepNCP(prep,parse_case case,parse_ctype ctype,parse_comp comp) | |
259 | + | "prepncp",[[Text prep];case;ctype;comp] -> PrepNCP(Psem,prep,parse_case case,parse_ctype ctype,parse_comp comp) | |
260 | 260 | | "infp",[aspect] -> InfP(parse_aspect aspect) |
261 | 261 | | "fixed",[[Text lemma]] -> FixedP lemma |
262 | 262 | | "fixed",[[Text lemma1];[Text lemma2]] -> FixedP (lemma1 ^ "," ^ lemma2) |
... | ... | @@ -423,43 +423,43 @@ let load_schemata filename = |
423 | 423 | |
424 | 424 | let load_connected filename = |
425 | 425 | let l = File.load_tab filename (function |
426 | - [pos; lemma; sopinion; fopinion; meanings; neg; pred; aspect; schema] -> | |
427 | - pos, lemma, sopinion, fopinion, meanings, neg, pred, aspect, schema | |
426 | + [pos; lemma; sopinion; fopinion; senses; neg; pred; aspect; schema] -> | |
427 | + pos, lemma, sopinion, fopinion, senses, neg, pred, aspect, schema | |
428 | 428 | | _ -> failwith "load_schemata") in |
429 | - Xlist.fold l Entries.empty (fun entries (pos,lemma,sopinion,fopinion,meanings,neg,pred,aspect,schema) -> | |
429 | + Xlist.fold l Entries.empty (fun entries (pos,lemma,sopinion,fopinion,senses,neg,pred,aspect,schema) -> | |
430 | 430 | let sopinion = parse_opinion sopinion in |
431 | 431 | let fopinion = parse_opinion fopinion in |
432 | - let meanings = Xlist.map (Xstring.split "," meanings) int_of_string in | |
432 | + let senses = Xlist.map (Xstring.split "," senses) int_of_string in | |
433 | 433 | let neg = parse_negation [Text neg] in |
434 | 434 | let pred = parse_pred pred in |
435 | 435 | let aspect = parse_aspect [Text aspect] in |
436 | 436 | let schema = parse_connected_schema (split_text schema) in |
437 | - let entry = sopinion,fopinion,meanings,neg,pred,aspect,schema in | |
437 | + let entry = sopinion,fopinion,senses,neg,pred,aspect,schema in | |
438 | 438 | Entries.add_inc entries pos lemma entry) |
439 | 439 | |
440 | -let load_meanings filename = | |
440 | +let load_senses filename = | |
441 | 441 | let l = File.load_tab filename (function |
442 | 442 | [id; name; variant; plwnluid; gloss] -> {mng_id=int_of_string id; |
443 | 443 | name=name; |
444 | 444 | variant=variant; |
445 | 445 | plwnluid=int_of_string plwnluid; |
446 | 446 | gloss=gloss} |
447 | - | _ -> failwith "load_meaning") in | |
448 | - Xlist.fold l IntMap.empty (fun meanings m -> | |
449 | - IntMap.add meanings m.mng_id m) | |
447 | + | _ -> failwith "load_sense") in | |
448 | + Xlist.fold l IntMap.empty (fun senses m -> | |
449 | + IntMap.add senses m.mng_id m) | |
450 | 450 | |
451 | 451 | let phrases = ref IntMap.empty |
452 | 452 | let entries = ref StringMap.empty |
453 | 453 | let schemata = ref StringMap.empty |
454 | 454 | let connected = ref StringMap.empty |
455 | -let meanings = ref IntMap.empty | |
455 | +let senses = ref IntMap.empty | |
456 | 456 | |
457 | 457 | let initialize () = |
458 | 458 | phrases := load_phrases phrases_filename; |
459 | 459 | entries := load_entries entries_filename; |
460 | 460 | schemata := load_schemata schemata_filename; |
461 | 461 | connected := load_connected connected_filename; |
462 | - meanings := load_meanings meanings_filename; | |
462 | + senses := load_senses senses_filename; | |
463 | 463 | () |
464 | 464 | |
465 | 465 | |
... | ... |
lexSemantics/ENIAMwalReduce.ml
... | ... | @@ -23,7 +23,7 @@ open Xstd |
23 | 23 | let create_phrase_reqs s (reqs,noreqs) = function |
24 | 24 | | PrepNP(_,prep,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs |
25 | 25 | | PrepAdjP(prep,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs |
26 | - | PrepNCP(prep,_,_,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | |
26 | + | PrepNCP(_,prep,_,_,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | |
27 | 27 | | ComparP(prep,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs |
28 | 28 | | FixedP(prep) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs |
29 | 29 | | SimpleLexArg(lex,_) -> StringMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs |
... | ... | @@ -34,7 +34,7 @@ let create_phrase_reqs s (reqs,noreqs) = function |
34 | 34 | let create_phrase_reqs2 s (reqs,noreqs) = function |
35 | 35 | | PrepNP(_,prep,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs |
36 | 36 | | PrepAdjP(prep,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs |
37 | - | PrepNCP(prep,_,_,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | |
37 | + | PrepNCP(_,prep,_,_,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | |
38 | 38 | | ComparP(prep,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs |
39 | 39 | | FixedP(prep) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs |
40 | 40 | | SimpleLexArg(lex,_) -> IntMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs |
... | ... | @@ -90,6 +90,7 @@ let select_comprep_adjuncts lexemes = |
90 | 90 | not (StringSet.is_empty (StringSet.intersection reqs lexemes)) then s :: l else l) |
91 | 91 | with Not_found -> l) |
92 | 92 | |
93 | +(* FIXME: trzeba zanalizować interację tej procedury z Pro w schemacie w wersji z walentym i z semantyką dziedzinową *) | |
93 | 94 | let set_necessary pos schema = |
94 | 95 | Xlist.map schema (fun p -> |
95 | 96 | let nec = |
... | ... | @@ -101,6 +102,8 @@ let set_necessary pos schema = |
101 | 102 | | _ -> b) then Req else |
102 | 103 | if p.gf <> SUBJ && p.cr = [] (*&& p.ce = []*) then Opt else |
103 | 104 | if p.gf = SUBJ && pos = "impt" then ProNG else |
105 | + if p.gf = SUBJ && pos = "pact" then Opt else | |
106 | + if p.gf = OBJ && pos = "ppas" then Opt else | |
104 | 107 | if Xlist.fold p.morfs false (fun b -> function |
105 | 108 | NP NomAgr -> true |
106 | 109 | | NCP(NomAgr,_,_) -> true |
... | ... | @@ -126,7 +129,7 @@ let reduce_phrase (test_comprep_reqs,test_comprep_reqs2,test_lexarg_reqs,test_le |
126 | 129 | | ComparP(prep,case) as phrase -> if test_lexemes prep then phrase else raise Not_found |
127 | 130 | | CP(ctype,comp) -> CP(ctype,reduce_comp test_lexemes comp) |
128 | 131 | | NCP(case,ctype,comp) -> if test_lexemes "to" then NCP(case,ctype,reduce_comp test_lexemes comp) else raise Not_found |
129 | - | PrepNCP(prep,case,ctype,comp) -> if test_lexemes prep && test_lexemes "to" then PrepNCP(prep,case,ctype,reduce_comp test_lexemes comp) else raise Not_found | |
132 | + | PrepNCP(psem,prep,case,ctype,comp) -> if test_lexemes prep && test_lexemes "to" then PrepNCP(psem,prep,case,ctype,reduce_comp test_lexemes comp) else raise Not_found | |
130 | 133 | | SimpleLexArg(lemma,_) as phrase -> if test_lexemes lemma then phrase else raise Not_found |
131 | 134 | | LexArg(id,lemma,_) as phrase -> if test_lexemes lemma && test_lexarg_reqs id then phrase else raise Not_found |
132 | 135 | | FixedP lemma as phrase -> if test_lexemes lemma then phrase else raise Not_found |
... | ... | @@ -168,11 +171,11 @@ let merge_entries phrases entries = |
168 | 171 | Entries.map entries (fun _ _ (opinion,neg,pred,aspect,schema) -> |
169 | 172 | opinion,neg,pred,aspect,merge_schema phrases schema) |
170 | 173 | |
171 | -let merge_entries_conn phrases meanings entries = | |
172 | - Entries.map entries (fun _ _ (sopinion,fopinion,meaning_ids,neg,pred,aspect,schema) -> | |
173 | - let meanings = Xlist.map meaning_ids (fun id -> | |
174 | - try IntMap.find meanings id with Not_found -> failwith "merge_entries_conn") in | |
175 | - sopinion,fopinion,meanings,neg,pred,aspect,merge_schema phrases schema) | |
174 | +let merge_entries_conn phrases senses entries = | |
175 | + Entries.map entries (fun _ _ (sopinion,fopinion,sense_ids,neg,pred,aspect,schema) -> | |
176 | + let senses = Xlist.map sense_ids (fun id -> | |
177 | + try IntMap.find senses id with Not_found -> failwith "merge_entries_conn") in | |
178 | + sopinion,fopinion,senses,neg,pred,aspect,merge_schema phrases schema) | |
176 | 179 | |
177 | 180 | let create_tests comprep_reqs comprep_reqs2 lexarg_reqs lexemes = |
178 | 181 | let lexemes = StringSet.add (StringSet.add lexemes "_") "" in |
... | ... | @@ -191,7 +194,7 @@ let create_tests comprep_reqs comprep_reqs2 lexarg_reqs lexemes = |
191 | 194 | StringSet.mem lexemes |
192 | 195 | |
193 | 196 | |
194 | -let select_entries_full phrases entries schemata connected meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes = | |
197 | +let select_entries_full phrases entries schemata connected senses comprep_reqs comprep_reqs2 lexarg_reqs lexemes = | |
195 | 198 | let tests = create_tests comprep_reqs comprep_reqs2 lexarg_reqs lexemes in |
196 | 199 | let entries = reduce_entries lexemes entries in |
197 | 200 | let schemata = reduce_entries lexemes schemata in |
... | ... | @@ -205,25 +208,25 @@ let select_entries_full phrases entries schemata connected meanings comprep_reqs |
205 | 208 | with ImpossibleSchema -> []) in |
206 | 209 | let schemata = Entries.map schemata (fun _ _ (opinion,neg,pred,aspect,schema) -> |
207 | 210 | opinion,neg,pred,aspect,reduce_schema2 tests schema) in |
208 | - let connected = merge_entries_conn phrases meanings connected in | |
209 | - let connected = Entries.map connected (fun _ _ (sopinion,fopinion,meaning_ids,neg,pred,aspect,schema) -> | |
210 | - sopinion,fopinion,meaning_ids,neg,pred,aspect,reduce_schema2 tests schema) in | |
211 | + let connected = merge_entries_conn phrases senses connected in | |
212 | + let connected = Entries.map connected (fun _ _ (sopinion,fopinion,sense_ids,neg,pred,aspect,schema) -> | |
213 | + sopinion,fopinion,sense_ids,neg,pred,aspect,reduce_schema2 tests schema) in | |
211 | 214 | entries,schemata,connected |
212 | 215 | |
213 | -let select_all_entries phrases entries schemata connected meanings = | |
216 | +let select_all_entries phrases entries schemata connected senses = | |
214 | 217 | let schemata = merge_entries phrases schemata in |
215 | - let connected = merge_entries_conn phrases meanings connected in | |
218 | + let connected = merge_entries_conn phrases senses connected in | |
216 | 219 | entries,schemata,connected |
217 | 220 | |
218 | 221 | let select_entries lexemes = |
219 | 222 | select_entries_full !ENIAMwalParser.phrases !ENIAMwalParser.entries !ENIAMwalParser.schemata |
220 | - !ENIAMwalParser.connected !ENIAMwalParser.meanings !comprep_reqs !comprep_reqs2 !lexarg_reqs lexemes | |
223 | + !ENIAMwalParser.connected !ENIAMwalParser.senses !comprep_reqs !comprep_reqs2 !lexarg_reqs lexemes | |
221 | 224 | |
222 | 225 | (* let entries,schemata,connected = |
223 | 226 | (* let lexemes = StringSet.of_list ["Ala"; "ma"; "kot"] in *) |
224 | 227 | let lexemes = StringSet.of_list ["dorastać"; "dorobić"; "po"; "bok"; "na"] in |
225 | 228 | select_entries ENIAMwalParser.phrases ENIAMwalParser.entries ENIAMwalParser.schemata |
226 | - ENIAMwalParser.connected ENIAMwalParser.meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes *) | |
229 | + ENIAMwalParser.connected ENIAMwalParser.senses comprep_reqs comprep_reqs2 lexarg_reqs lexemes *) | |
227 | 230 | |
228 | 231 | (* let _ = |
229 | 232 | StringMap.iter comprep_reqs (fun s set -> |
... | ... |
lexSemantics/ENIAMwalRenderer.ml
... | ... | @@ -53,7 +53,7 @@ let render_pos_entry = function |
53 | 53 | | "ppas" -> [Atom "ppas"; AVar "number"; AVar "case"; AVar "gender"; AVar "negation"] |
54 | 54 | | "inf" -> [Atom "inf"; AVar "aspect"; AVar "negation"] |
55 | 55 | | "qub" -> [Atom "qub"] |
56 | - | "compar" -> [Atom "compar"; AVar "case"] | |
56 | + | "compar" -> [Atom "comparp"; AVar "case"] | |
57 | 57 | | "comp" -> [Atom "comp"; AVar "ctype"] |
58 | 58 | | "fin" -> [Atom "pers"; AVar "negation"] |
59 | 59 | | "praet" -> [Atom "pers"; AVar "negation"] |
... | ... | @@ -117,7 +117,7 @@ let render_phrase = function |
117 | 117 | | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Atom case] |
118 | 118 | | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case] |
119 | 119 | | AdjP(Case case) -> Tensor[Atom "adjp"; Top; Atom case; Top] |
120 | -(* | AdjP NomAgr -> Tensor[Atom "adjp"; AVar "number"; Atom "nom"; AVar "gender"]*) | |
120 | + | AdjP NomAgr -> Tensor[Atom "adjp"; AVar "number"; Atom "nom"; AVar "gender"] | |
121 | 121 | | AdjP AllAgr -> Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"] |
122 | 122 | (* | AdjP CaseAgr -> Tensor[Atom "adjp"; Top; AVar "case"; Top] |
123 | 123 | | PrepAdjP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]*) |
... | ... | @@ -130,7 +130,7 @@ let render_phrase = function |
130 | 130 | | PrepNumP(_,prep,Case case) -> Tensor[Atom "prepnump"; Atom prep; Atom case] *) |
131 | 131 | (* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top]*) |
132 | 132 | | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom prep] |
133 | - | ComparP(prep,Case case) -> Tensor[Atom "compar"; Atom prep; Atom case] | |
133 | + | ComparP(prep,Case case) -> Tensor[Atom "comparp"; Atom prep; Atom case] | |
134 | 134 | (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep] *) |
135 | 135 | (* | IP -> Tensor[Atom "ip";Top;Top;Top] *) |
136 | 136 | | CP (ctype,Comp comp) -> Tensor[Atom "cp"; arg_of_ctype ctype; Atom comp] |
... | ... | @@ -139,8 +139,10 @@ let render_phrase = function |
139 | 139 | | NCP(Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top] |
140 | 140 | | NCP(NomAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp] |
141 | 141 | | NCP(NomAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] |
142 | - | PrepNCP(prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] | |
143 | - | PrepNCP(prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom prep; Atom case; Top; Top] | |
142 | + | PrepNCP(Psem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] | |
143 | + | PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top] | |
144 | + | PrepNCP(Pnosem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] | |
145 | + | PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top] | |
144 | 146 | | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom aspect] |
145 | 147 | | InfP AspectUndef -> Tensor[Atom "infp"; Top] |
146 | 148 | (* | PadvP -> Tensor[Atom "padvp"] *) |
... | ... | @@ -171,54 +173,64 @@ let render_phrase = function |
171 | 173 | | E (PrepNP(Pnosem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case] |
172 | 174 | | E (NP(Case case)) -> Tensor[Atom "np"; Top; Atom case; Top; Top] |
173 | 175 | | E (NCP(Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top] |
174 | - | E (PrepNCP(prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom prep; Atom case; Top; Top] | |
176 | + | E (PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top] | |
177 | + | E (PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top] | |
175 | 178 | | phrase -> failwith ("render_phrase: " ^ ENIAMwalStringOf.phrase phrase) |
176 | 179 | |
177 | -let render_phrase_cat cat = function | |
178 | - NP(Case case) -> Tensor[Atom "np"; Atom cat; Top; Atom case; Top; Top] | |
179 | - | NP NomAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
180 | -(* | NP GenAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"] | |
181 | - | NP AllAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]*) | |
182 | - | NP CaseAgr -> Tensor[Atom "np"; Atom cat; Top; AVar "case"; Top; Top] | |
183 | - | NP CaseUndef -> Tensor[Atom "np"; Atom cat; Top; Top; Top; Top] | |
184 | - | PrepNP(Psem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] | |
185 | - | PrepNP(Psem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] | |
186 | - | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case] | |
187 | - | PrepNP(Pnosem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] | |
188 | - | PrepNP(Pnosem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] | |
189 | - | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case] | |
190 | - | AdjP(Case case) -> Tensor[Atom "adjp"; Atom cat; Top; Atom case; Top] | |
191 | -(* | AdjP NomAgr -> Tensor[Atom "adjp"; AVar "number"; Atom "nom"; AVar "gender"]*) | |
192 | - | AdjP AllAgr -> Tensor[Atom "adjp"; Atom cat; AVar "number"; AVar "case"; AVar "gender"] | |
193 | -(* | AdjP CaseAgr -> Tensor[Atom "adjp"; Top; AVar "case"; Top] | |
194 | - | PrepAdjP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]*) | |
195 | - | PrepAdjP(prep,Case case) -> Tensor[Atom "prepadjp"; Atom cat; Atom prep; Atom case] | |
196 | - (* | NumP(Case case) -> Tensor[Atom "nump"; Top; Atom case; Top; Top] | |
180 | +let render_phrase_cat cat role node = function | |
181 | + NP(Case case) -> Tensor[Atom "np"; Top; Atom case; Top; Top; Atom cat; Atom role; Atom node] | |
182 | + | NP NomAgr -> Tensor[Atom "np"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Atom cat; Atom role; Atom node] | |
183 | + | NP VocAgr -> Tensor[Atom "np"; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; Atom cat; Atom role; Atom node] | |
184 | +(* | NP GenAgr -> Tensor[Atom "np"; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"; Atom cat; Atom role; Atom node] | |
185 | + | NP AllAgr -> Tensor[Atom "np"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"; Atom cat; Atom role; Atom node]*) | |
186 | + | NP CaseAgr -> Tensor[Atom "np"; Top; AVar "case"; Top; Top; Atom cat; Atom role; Atom node] | |
187 | + | NP CaseUndef -> Tensor[Atom "np"; Top; Top; Top; Top; Atom cat; Atom role; Atom node] | |
188 | + | PrepNP(Psem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom "sem"; Top; Top; Atom cat; Atom role; Atom node] | |
189 | + | PrepNP(Psem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom "sem"; Top; Top; Atom cat; Atom role; Atom node] | |
190 | + | PrepNP(Psem,"_",Case case) -> Tensor[Atom "prepnp"; Atom "sem"; Top; Atom case; Atom cat; Atom role; Atom node] | |
191 | + | PrepNP(Psem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Top; Atom cat; Atom role; Atom node] | |
192 | + | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Atom case; Atom cat; Atom role; Atom node] | |
193 | + | PrepNP(Pnosem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom "nosem"; Top; Top; Atom cat; Atom role; Atom node] | |
194 | + | PrepNP(Pnosem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom "nosem"; Top; Top; Atom cat; Atom role; Atom node] | |
195 | + | PrepNP(Pnosem,"_",Case case) -> Tensor[Atom "prepnp"; Atom "nosem"; Top; Atom case; Atom cat; Atom role; Atom node] | |
196 | + | PrepNP(Pnosem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Top; Atom cat; Atom role; Atom node] | |
197 | + | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case; Atom cat; Atom role; Atom node] | |
198 | + | AdjP(Case case) -> Tensor[Atom "adjp"; Top; Atom case; Top; Top; Atom cat; Atom role; Atom node] | |
199 | + | AdjP NomAgr -> Tensor[Atom "adjp"; AVar "number"; Atom "nom"; AVar "gender"; Top; Atom cat; Atom role; Atom node] | |
200 | + | AdjP AllAgr -> Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"; Top; Atom cat; Atom role; Atom node] | |
201 | +(* | AdjP CaseAgr -> Tensor[Atom "adjp"; Top; AVar "case"; Top; Top; Atom cat; Atom role; Atom node] | |
202 | + | PrepAdjP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top; Atom cat; Atom role; Atom node]*) | |
203 | + | PrepAdjP(prep,Case case) -> Tensor[Atom "prepadjp"; Atom prep; Atom case; Atom cat; Atom role; Atom node] | |
204 | + (* | NumP(Case case) -> Tensor[Atom "nump"; Top; Atom case; Top; Atom node] | |
197 | 205 | | NumP NomAgr -> Tensor[Atom "nump"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] |
198 | 206 | | NumP CaseAgr -> Tensor[Atom "nump"; Top; AVar "case"; Top; Top] |
199 | 207 | | NumP CaseUndef -> Tensor[Atom "nump"; Top; Top; Top; Top] |
200 | 208 | | PrepNumP(_,"",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top] |
201 | 209 | | PrepNumP(_,prep,Case case) -> Tensor[Atom "prepnump"; Atom prep; Atom case] *) |
202 | -(* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top]*) | |
203 | - | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom cat; Atom prep] | |
204 | - | ComparP(prep,Case case) -> Tensor[Atom "compar"; Atom cat; Atom prep; Atom case] | |
205 | - (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep] *) | |
206 | - (* | IP -> Tensor[Atom "ip";Top;Top;Top] *) | |
207 | - | CP (ctype,Comp comp) -> Tensor[Atom "cp"; Atom cat; arg_of_ctype ctype; Atom comp] | |
208 | - (* | CP (ctype,CompUndef) -> Tensor[Atom "cp"; arg_of_ctype ctype; Top]*) | |
209 | - | NCP(Case case,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; arg_of_ctype ctype; Atom comp] | |
210 | - | NCP(Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; Top; Top] | |
211 | - | NCP(NomAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp] | |
212 | - | NCP(NomAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] | |
213 | - | PrepNCP(prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom cat; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] | |
214 | - | PrepNCP(prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom cat; Atom prep; Atom case; Top; Top] | |
215 | - | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom cat; Atom aspect] | |
216 | - | InfP AspectUndef -> Tensor[Atom "infp"; Atom cat; Top] | |
217 | - (* | PadvP -> Tensor[Atom "padvp"] *) | |
218 | - | AdvP "misc" -> Tensor[Atom "advp"; Atom cat; Top] (* FIXME: a może Atom "mod" zamiast Top *) | |
219 | - | AdvP "" -> Tensor[Atom "advp"; Atom cat; Top] (* FIXME: a może Atom "mod" zamiast Top *) | |
220 | - | AdvP mode -> Tensor[Atom "advp"; Atom cat; Atom mode] | |
221 | - | ColonP -> Tensor[Atom "colonp"; Atom cat] | |
210 | +(* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top; Atom cat; Atom role; Atom node]*) | |
211 | + | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom prep; Atom cat; Atom role; Atom node] | |
212 | + | ComparP(prep,Case case) -> Tensor[Atom "comparp"; Atom prep; Atom case; Atom cat; Atom role; Atom node] | |
213 | + (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep; Atom cat; Atom role; Atom node] *) | |
214 | + (* | IP -> Tensor[Atom "ip";Top;Top;Top; Atom cat; Atom role; Atom node] *) | |
215 | + | CP (ctype,Comp comp) -> Tensor[Atom "cp"; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Atom node] | |
216 | + (* | CP (ctype,CompUndef) -> Tensor[Atom "cp"; arg_of_ctype ctype; Top; Atom cat; Atom role; Atom node]*) | |
217 | + | NCP(Case case,ctype,Comp comp) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Atom node] | |
218 | + | NCP(Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top; Atom cat; Atom role; Atom node] | |
219 | + | NCP(NomAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Atom node] | |
220 | + | NCP(NomAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top; Atom cat; Atom role; Atom node] | |
221 | + | NCP(VocAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Atom node] | |
222 | + | NCP(VocAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; Top; Top; Atom cat; Atom role; Atom node] | |
223 | + | PrepNCP(Psem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Atom node] | |
224 | + | PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top; Atom cat; Atom role; Atom node] | |
225 | + | PrepNCP(Pnosem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Atom node] | |
226 | + | PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top; Atom cat; Atom role; Atom node] | |
227 | + | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom aspect; Atom cat; Atom role; Atom node] | |
228 | + | InfP AspectUndef -> Tensor[Atom "infp"; Top; Atom cat; Atom role; Atom node] | |
229 | + (* | PadvP -> Tensor[Atom "padvp"; Atom cat; Atom role; Atom node] *) | |
230 | + | AdvP "misc" -> Tensor[Atom "advp"; Top; Atom cat; Atom role; Atom node] (* FIXME: a może Atom "mod" zamiast Top *) | |
231 | + | AdvP "" -> Tensor[Atom "advp"; Top; Atom cat; Atom role; Atom node] (* FIXME: a może Atom "mod" zamiast Top *) | |
232 | + | AdvP mode -> Tensor[Atom "advp"; Top; Atom cat; Atom role; Atom node] | |
233 | + | ColonP -> Tensor[Atom "colonp"; Atom cat; Atom cat; Atom role; Atom node] | |
222 | 234 | (* | PrepP -> Tensor[Atom "prepp";Top] |
223 | 235 | | Prep("",CaseAgr) -> Tensor[Atom "prep"; Top; AVar "case"] |
224 | 236 | | Prep("",CaseUAgr) -> Tensor[Atom "prep"; Top; AVar "ucase"] |
... | ... | @@ -233,14 +245,15 @@ let render_phrase_cat cat = function |
233 | 245 | | AuxImp -> Tensor[Atom "aux-imp"] |
234 | 246 | | Pro -> One |
235 | 247 | | ProNG -> One *) |
236 | - | E (CP(CompTypeUndef,CompUndef)) -> Tensor[Atom "cp"; Atom cat; Top; Top] | |
237 | - | E (NCP(NomAgr,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] | |
238 | - | E (NP(NomAgr)) -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
239 | - | E (PrepNP(Psem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case] | |
240 | - | E (PrepNP(Pnosem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case] | |
241 | - | E (NP(Case case)) -> Tensor[Atom "np"; Atom cat; Top; Atom case; Top; Top] | |
242 | - | E (NCP(Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; Top; Top] | |
243 | - | E (PrepNCP(prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom cat; Atom prep; Atom case; Top; Top] | |
248 | + | E (CP(CompTypeUndef,CompUndef)) -> Tensor[Atom "cp"; Top; Top; Atom cat; Atom role; Atom node] | |
249 | + | E (NCP(NomAgr,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top; Atom cat; Atom role; Atom node] | |
250 | + | E (NP(NomAgr)) -> Tensor[Atom "np"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Atom cat; Atom role; Atom node] | |
251 | + | E (PrepNP(Psem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Atom case; Atom cat; Atom role; Atom node] | |
252 | + | E (PrepNP(Pnosem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case; Atom cat; Atom role; Atom node] | |
253 | + | E (NP(Case case)) -> Tensor[Atom "np"; Top; Atom case; Top; Top; Atom cat; Atom role; Atom node] | |
254 | + | E (NCP(Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top; Atom cat; Atom role; Atom node] | |
255 | + | E (PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top; Atom cat; Atom role; Atom node] | |
256 | + | E (PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top; Atom cat; Atom role; Atom node] | |
244 | 257 | | phrase -> failwith ("render_phrase_cat: " ^ ENIAMwalStringOf.phrase phrase) |
245 | 258 | |
246 | 259 | let render_morf = function |
... | ... | @@ -251,7 +264,7 @@ let render_morf = function |
251 | 264 | | SimpleLexArg(lex,pos) -> Tensor([Atom "lex";Atom lex] @ render_pos pos) |
252 | 265 | | phrase -> render_phrase phrase |
253 | 266 | |
254 | -let render_morf_cat cats = function | |
267 | +let render_morf_cat cats role node = function | |
255 | 268 | | Null -> [One] |
256 | 269 | | Pro -> [One] |
257 | 270 | | ProNG -> [One] |
... | ... | @@ -261,13 +274,13 @@ let render_morf_cat cats = function |
261 | 274 | (* | X -> Tensor[Atom "X"] |
262 | 275 | | Lex lex -> Tensor[Atom lex] *) |
263 | 276 | | LexArg(id,lex,pos) -> [Tensor([Atom "lex";Atom (string_of_int id);Atom lex] @ render_pos pos)] |
264 | - | SimpleLexArg(lex,pos) -> [Tensor([Atom "lex";Atom lex] @ render_pos pos)] | |
265 | - | phrase -> Xlist.map cats (fun cat -> render_phrase_cat cat phrase) | |
277 | + | SimpleLexArg(lex,pos) -> [Tensor([Atom "lex";Atom lex] @ render_pos pos @ [Atom role; Atom node])] | |
278 | + | phrase -> Xlist.map cats (fun cat -> render_phrase_cat cat role node phrase) | |
266 | 279 | |
267 | -let extract_sel_prefs sel_prefs = | |
280 | +(* let extract_sel_prefs sel_prefs = | |
268 | 281 | Xlist.map sel_prefs (function |
269 | 282 | SynsetName s -> s |
270 | - | _ -> failwith "extract_sel_prefs") | |
283 | + | _ -> failwith "extract_sel_prefs") *) | |
271 | 284 | |
272 | 285 | let render_schema schema = |
273 | 286 | Xlist.map schema (fun p -> |
... | ... | @@ -283,7 +296,7 @@ let translate_dir = function |
283 | 296 | |
284 | 297 | let render_schema_cat schema = |
285 | 298 | Xlist.map schema (fun p -> |
286 | - match List.flatten (Xlist.map p.morfs (render_morf_cat (extract_sel_prefs p.sel_prefs))) with | |
299 | + match List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs p.role p.node)) with | |
287 | 300 | [] -> failwith "render_schema" |
288 | 301 | | [s] -> translate_dir p.dir,s |
289 | 302 | | l -> translate_dir p.dir,Plus l) |
... | ... | @@ -298,7 +311,8 @@ let render_connected_schema schema = |
298 | 311 | |
299 | 312 | let render_connected_schema_cat schema = |
300 | 313 | Xlist.map schema (fun p -> |
301 | - {p with morfs=Xlist.map (List.flatten (Xlist.map p.morfs (render_morf_cat (extract_sel_prefs p.sel_prefs)))) (fun morf -> LCG morf)}) | |
314 | + {p with | |
315 | + morfs=Xlist.map (List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs p.role p.node))) (fun morf -> LCG morf)}) | |
302 | 316 | |
303 | 317 | (* FIXME: tu trzeba by dodać zwykłe reguły dla czasowników dotyczące ich negacji, aglutynatu itp. *) |
304 | 318 | let render_lex_entry = function |
... | ... | @@ -353,9 +367,9 @@ let render_connected_prepadjp prep cases = |
353 | 367 | adjunct (postp @ (Xlist.map cases (fun case -> |
354 | 368 | Tensor[Atom "prepadjp"; Atom prep; Atom case]))) |
355 | 369 | |
356 | -let render_compar prep = Both,Plus[One;Tensor[Atom "compar"; Atom prep; Top]] | |
370 | +let render_compar prep = Both,Plus[One;Tensor[Atom "comparp"; Atom prep; Top]] | |
357 | 371 | |
358 | -let render_connected_compar prep = adjunct [Tensor[Atom "compar"; Atom prep; Top]] | |
372 | +let render_connected_compar prep = adjunct [Tensor[Atom "comparp"; Atom prep; Top]] | |
359 | 373 | |
360 | 374 | let verb_adjuncts_simp = [ |
361 | 375 | Both, Plus[One;Tensor[Atom "advp"; Atom "pron"]]; |
... | ... |
lexSemantics/ENIAMwalRenderer_old.ml
0 → 100644
1 | +(* | |
2 | + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. | |
3 | + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | + * | |
6 | + * This library is free software: you can redistribute it and/or modify | |
7 | + * it under the terms of the GNU Lesser General Public License as published by | |
8 | + * the Free Software Foundation, either version 3 of the License, or | |
9 | + * (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + * GNU Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public License | |
17 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | + *) | |
19 | + | |
20 | +open ENIAM_LCGtypes | |
21 | +open ENIAMwalTypes | |
22 | + | |
23 | +let arg_of_ctype = function | |
24 | + Int -> Atom "int" | |
25 | + | Rel -> Atom "rel" | |
26 | + (* | Sub -> LCGtypes.Atom "sub" | |
27 | + | Coord -> LCGtypes.Atom "coord" *) | |
28 | + | CompTypeUndef -> Top | |
29 | + (* | CompTypeAgr -> LCGtypes.AVar "ctype" *) | |
30 | + | |
31 | +let render_number = function | |
32 | + Number n -> Atom n | |
33 | + | NumberUndef -> Top | |
34 | + | NumberAgr -> Top | |
35 | + | |
36 | +let render_negation = function | |
37 | + Negation -> Atom "neg" | |
38 | + | Aff -> Atom "aff" | |
39 | + | NegationUndef -> Top | |
40 | + | |
41 | +let render_pos_entry = function | |
42 | + "subst" -> [Atom "subst"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"] | |
43 | + | "ppron12" -> [Atom "ppron12"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"] | |
44 | + | "ppron3" -> [Atom "ppron3"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"] | |
45 | + | "siebie" -> [Atom "siebie"; AVar "case"] | |
46 | + | "num" -> [Atom "num"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"] | |
47 | + | "intnum" -> [Atom "num"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"] | |
48 | + | "prep" -> [Atom "prep"; AVar "case"] | |
49 | + | "adj" -> [Atom "adj"; AVar "number"; AVar "case"; AVar "gender"; AVar "grad"] | |
50 | + | "adv" -> [Atom "adv"; AVar "grad"] | |
51 | + | "ger" -> [Atom "ger"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"; AVar "negation"] | |
52 | + | "pact" -> [Atom "pact"; AVar "number"; AVar "case"; AVar "gender"; AVar "negation"] | |
53 | + | "ppas" -> [Atom "ppas"; AVar "number"; AVar "case"; AVar "gender"; AVar "negation"] | |
54 | + | "inf" -> [Atom "inf"; AVar "aspect"; AVar "negation"] | |
55 | + | "qub" -> [Atom "qub"] | |
56 | + | "compar" -> [Atom "compar"; AVar "case"] | |
57 | + | "comp" -> [Atom "comp"; AVar "ctype"] | |
58 | + | "fin" -> [Atom "pers"; AVar "negation"] | |
59 | + | "praet" -> [Atom "pers"; AVar "negation"] | |
60 | + | "pred" -> [Atom "pers"; AVar "negation"] | |
61 | + | "winien" -> [Atom "pers"; AVar "negation"] | |
62 | + | "bedzie" -> [Atom "pers"; AVar "negation"] | |
63 | + | s -> failwith ("render_pos_entry: " ^ s) | |
64 | + | |
65 | +let render_pos = function (* wprowadzam uzgodnienia a nie wartości cech, bo wartości cech są wprowadzane przez leksem a uzgodnienia wiążą je z wartościami u nadrzędnika *) | |
66 | + | SUBST(number,Case case) -> [Atom "subst"; render_number number; Atom case; Top; Top] | |
67 | + | SUBST(_,NomAgr) -> [Atom "subst"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
68 | + | SUBST(_,GenAgr) -> [Atom "subst"; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"] | |
69 | + | SUBST(_,AllAgr) -> [Atom "subst"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"] | |
70 | + | SUBST(number,CaseAgr) -> [Atom "subst"; render_number number; AVar "case"; Top; Top] | |
71 | + | SUBST(_,CaseUndef) -> [Atom "subst"; Top; Top; Top; Top] | |
72 | + | PPRON12(number,Case case) -> [Atom "ppron12"; render_number number; Atom case; Top; Top] | |
73 | + | PPRON3(number,Case case) -> [Atom "ppron3"; render_number number; Atom case; Top; Top] | |
74 | + | SIEBIE(Case case) -> [Atom "siebie"; Atom case] | |
75 | + | NUM(Case case,_) -> [Atom "num"; Top; Atom case; Top; Top] | |
76 | + | NUM(NomAgr,_) -> [Atom "num"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
77 | +(* | NUM(CaseAgr,_) -> [Atom "num"; Top; AVar "case"; Top; Top] | |
78 | + | NUM(CaseUndef,_) -> [Atom "num"; Top; Top; Top; Top]*) | |
79 | + | PREP(Case case) -> [Atom "prep"; Atom case] | |
80 | + | ADJ(_,Case case,_,Grad grad) -> [Atom "adj"; Top; Atom case; Top; Atom grad] | |
81 | +(* | ADJ(_,NomAgr,_,_) -> [Atom "adj"; AVar "number"; Atom "nom"; AVar "gender"] | |
82 | + | ADJ(_,CaseAgr,_,_) -> [Atom "adj"; Top; AVar "case"; Top]*) | |
83 | + | ADJ(_,CaseUndef,_,Grad grad) -> [Atom "adj"; Top; Top; Top; Atom grad] | |
84 | + | ADJ(_,AllAgr,_,Grad grad) -> [Atom "adj"; AVar "number"; AVar "case"; AVar "gender"; Atom grad] | |
85 | + | ADJ(_,AllAgr,_,GradUndef) -> [Atom "adj"; AVar "number"; AVar "case"; AVar "gender"; Top] | |
86 | + | ADV (Grad grad) -> [Atom "adv"; Atom grad] | |
87 | + | ADV GradUndef -> [Atom "adv"; Top] | |
88 | + | GER(_,Case case,_,_,neg) -> [Atom "ger"; Top; Atom case; Top; Top; render_negation neg] | |
89 | +(* | GER(_,NomAgr,_,_,_) -> [Atom "ger"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
90 | + | GER(_,CaseAgr,_,_,_) -> [Atom "ger"; Top; AVar "case"; Top; Top] | |
91 | + | GER(_,CaseUndef,_,_,_) -> [Atom "ger"; Top; Top; Top; Top] | |
92 | + | PACT(_,Case case,_,_,_) -> [Atom "pact"; Top; Atom case; Top] | |
93 | + | PACT(_,NomAgr,_,_,_) -> [Atom "pact"; AVar "number"; Atom "nom"; AVar "gender"]*) | |
94 | + | PACT(_,AllAgr,_,_,neg) -> [Atom "pact"; AVar "number"; AVar "case"; AVar "gender"; render_negation neg] | |
95 | +(* | PACT(_,CaseAgr,_,_,_) -> [Atom "pact"; Top; AVar "case"; Top]*) | |
96 | + | PPAS(_,Case case,_,_,neg) -> [Atom "ppas"; Top; Atom case; Top; render_negation neg] | |
97 | + | PPAS(_,CaseUndef,_,_,neg) -> [Atom "ppas"; Top; Top; Top; render_negation neg] | |
98 | + (* | PPAS(_,NomAgr,_,_,_) -> [Atom "ppas"; AVar "number"; Atom "nom"; AVar "gender"]*) | |
99 | + | PPAS(_,AllAgr,_,_,neg) -> [Atom "ppas"; AVar "number"; AVar "case"; AVar "gender"; render_negation neg] | |
100 | +(* | PPAS(_,CaseAgr,_,_,_) -> [Atom "ppas"; Top; AVar "case"; Top]*) | |
101 | + | INF(Aspect aspect,neg) -> [Atom "inf"; Atom aspect; render_negation neg] | |
102 | + | INF(AspectUndef,neg) -> [Atom "inf"; Top; render_negation neg] | |
103 | + | QUB -> [Atom "qub"] | |
104 | + | COMPAR (Case case) -> [Atom "compar"; Atom case] | |
105 | + | COMP ctype -> [Atom "comp"; arg_of_ctype ctype] | |
106 | + | PERS neg -> [Atom "pers"; render_negation neg] | |
107 | + | pos -> failwith ("render_pos: " ^ ENIAMwalStringOf.pos pos) | |
108 | + | |
109 | +let render_phrase = function | |
110 | + NP(Case case) -> Tensor[Atom "np"; Top; Atom case; Top; Top] | |
111 | + | NP NomAgr -> Tensor[Atom "np"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
112 | +(* | NP GenAgr -> Tensor[Atom "np"; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"] | |
113 | + | NP AllAgr -> Tensor[Atom "np"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]*) | |
114 | + | NP CaseAgr -> Tensor[Atom "np"; Top; AVar "case"; Top; Top] | |
115 | +(* | NP CaseUndef -> Tensor[Atom "np"; Top; Top; Top; Top] | |
116 | + | PrepNP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]*) | |
117 | + | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Atom case] | |
118 | + | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case] | |
119 | + | AdjP(Case case) -> Tensor[Atom "adjp"; Top; Atom case; Top] | |
120 | + | AdjP NomAgr -> Tensor[Atom "adjp"; AVar "number"; Atom "nom"; AVar "gender"] | |
121 | + | AdjP AllAgr -> Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"] | |
122 | +(* | AdjP CaseAgr -> Tensor[Atom "adjp"; Top; AVar "case"; Top] | |
123 | + | PrepAdjP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]*) | |
124 | + | PrepAdjP(prep,Case case) -> Tensor[Atom "prepadjp"; Atom prep; Atom case] | |
125 | + (* | NumP(Case case) -> Tensor[Atom "nump"; Top; Atom case; Top; Top] | |
126 | + | NumP NomAgr -> Tensor[Atom "nump"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
127 | + | NumP CaseAgr -> Tensor[Atom "nump"; Top; AVar "case"; Top; Top] | |
128 | + | NumP CaseUndef -> Tensor[Atom "nump"; Top; Top; Top; Top] | |
129 | + | PrepNumP(_,"",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top] | |
130 | + | PrepNumP(_,prep,Case case) -> Tensor[Atom "prepnump"; Atom prep; Atom case] *) | |
131 | +(* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top]*) | |
132 | + | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom prep] | |
133 | + | ComparP(prep,Case case) -> Tensor[Atom "compar"; Atom prep; Atom case] | |
134 | + (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep] *) | |
135 | + (* | IP -> Tensor[Atom "ip";Top;Top;Top] *) | |
136 | + | CP (ctype,Comp comp) -> Tensor[Atom "cp"; arg_of_ctype ctype; Atom comp] | |
137 | + (* | CP (ctype,CompUndef) -> Tensor[Atom "cp"; arg_of_ctype ctype; Top]*) | |
138 | + | NCP(Case case,ctype,Comp comp) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; arg_of_ctype ctype; Atom comp] | |
139 | + | NCP(Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top] | |
140 | + | NCP(NomAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp] | |
141 | + | NCP(NomAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] | |
142 | + | PrepNCP(Psem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] | |
143 | + | PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top] | |
144 | + | PrepNCP(Pnosem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] | |
145 | + | PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top] | |
146 | + | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom aspect] | |
147 | + | InfP AspectUndef -> Tensor[Atom "infp"; Top] | |
148 | + (* | PadvP -> Tensor[Atom "padvp"] *) | |
149 | + | AdvP "misc" -> Tensor[Atom "advp"; Top] (* FIXME: a może Atom "mod" zamiast Top *) | |
150 | + | AdvP mode -> Tensor[Atom "advp"; Atom mode] | |
151 | + | ColonP -> Tensor[Atom "colonp"] | |
152 | + | FixedP lex -> Tensor[Atom "fixed"; Atom lex] | |
153 | + (* | PrepP -> Tensor[Atom "prepp";Top] | |
154 | + | Prep("",CaseAgr) -> Tensor[Atom "prep"; Top; AVar "case"] | |
155 | + | Prep("",CaseUAgr) -> Tensor[Atom "prep"; Top; AVar "ucase"] | |
156 | + | Num(AllAgr,Acm acm) -> Tensor[Atom "num"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"; Atom acm] | |
157 | + | Measure(AllUAgr) -> Tensor[Atom "measure"; AVar "unumber"; AVar "ucase"; AVar "ugender"; AVar "uperson"] *) | |
158 | + | Or -> Tensor[Atom "or"] | |
159 | + (* | Qub -> Tensor[Atom "qub"]*) | |
160 | + (* | Inclusion -> Tensor[Atom "inclusion"] | |
161 | + | Adja -> Tensor[Atom "adja"] | |
162 | + | Aglt -> Tensor[Atom "aglt"; AVar "number"; AVar "person"] | |
163 | + | AuxPast -> Tensor[Atom "aux-past"; AVar "number"; AVar "gender"; AVar "person"] | |
164 | + | AuxFut -> Tensor[Atom "aux-fut"; AVar "number"; AVar "gender"; AVar "person"] | |
165 | + | AuxImp -> Tensor[Atom "aux-imp"] | |
166 | + | Pro -> One | |
167 | + | ProNG -> One *) | |
168 | + | E Or -> Tensor[Atom "or"] | |
169 | + | E (CP(CompTypeUndef,CompUndef)) -> Tensor[Atom "cp"; Top; Top] | |
170 | + | E (NCP(NomAgr,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] | |
171 | + | E (NP(NomAgr)) -> Tensor[Atom "np"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
172 | + | E (PrepNP(Psem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Atom case] | |
173 | + | E (PrepNP(Pnosem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case] | |
174 | + | E (NP(Case case)) -> Tensor[Atom "np"; Top; Atom case; Top; Top] | |
175 | + | E (NCP(Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top] | |
176 | + | E (PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top] | |
177 | + | E (PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top] | |
178 | + | phrase -> failwith ("render_phrase: " ^ ENIAMwalStringOf.phrase phrase) | |
179 | + | |
180 | +let render_phrase_cat cat = function | |
181 | + NP(Case case) -> Tensor[Atom "np"; Atom cat; Top; Atom case; Top; Top] | |
182 | + | NP NomAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
183 | + | NP VocAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"] | |
184 | +(* | NP GenAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"] | |
185 | + | NP AllAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]*) | |
186 | + | NP CaseAgr -> Tensor[Atom "np"; Atom cat; Top; AVar "case"; Top; Top] | |
187 | + | NP CaseUndef -> Tensor[Atom "np"; Atom cat; Top; Top; Top; Top] | |
188 | + | PrepNP(Psem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] | |
189 | + | PrepNP(Psem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] | |
190 | + | PrepNP(Psem,"_",Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Atom case] | |
191 | + | PrepNP(Psem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Top] | |
192 | + | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case] | |
193 | + | PrepNP(Pnosem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] | |
194 | + | PrepNP(Pnosem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] | |
195 | + | PrepNP(Pnosem,"_",Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Atom case] | |
196 | + | PrepNP(Pnosem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Top] | |
197 | + | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case] | |
198 | + | AdjP(Case case) -> Tensor[Atom "adjp"; Atom cat; Top; Atom case; Top] | |
199 | + | AdjP NomAgr -> Tensor[Atom "adjp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"] | |
200 | + | AdjP AllAgr -> Tensor[Atom "adjp"; Atom cat; AVar "number"; AVar "case"; AVar "gender"] | |
201 | +(* | AdjP CaseAgr -> Tensor[Atom "adjp"; Top; AVar "case"; Top] | |
202 | + | PrepAdjP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]*) | |
203 | + | PrepAdjP(prep,Case case) -> Tensor[Atom "prepadjp"; Atom cat; Atom prep; Atom case] | |
204 | + (* | NumP(Case case) -> Tensor[Atom "nump"; Top; Atom case; Top; Top] | |
205 | + | NumP NomAgr -> Tensor[Atom "nump"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
206 | + | NumP CaseAgr -> Tensor[Atom "nump"; Top; AVar "case"; Top; Top] | |
207 | + | NumP CaseUndef -> Tensor[Atom "nump"; Top; Top; Top; Top] | |
208 | + | PrepNumP(_,"",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top] | |
209 | + | PrepNumP(_,prep,Case case) -> Tensor[Atom "prepnump"; Atom prep; Atom case] *) | |
210 | +(* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top]*) | |
211 | + | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom cat; Atom prep] | |
212 | + | ComparP(prep,Case case) -> Tensor[Atom "compar"; Atom cat; Atom prep; Atom case] | |
213 | + (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep] *) | |
214 | + (* | IP -> Tensor[Atom "ip";Top;Top;Top] *) | |
215 | + | CP (ctype,Comp comp) -> Tensor[Atom "cp"; Atom cat; arg_of_ctype ctype; Atom comp] | |
216 | + (* | CP (ctype,CompUndef) -> Tensor[Atom "cp"; arg_of_ctype ctype; Top]*) | |
217 | + | NCP(Case case,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; arg_of_ctype ctype; Atom comp] | |
218 | + | NCP(Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; Top; Top] | |
219 | + | NCP(NomAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp] | |
220 | + | NCP(NomAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] | |
221 | + | NCP(VocAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp] | |
222 | + | NCP(VocAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; Top; Top] | |
223 | + | PrepNCP(Psem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom cat; Atom "sem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] | |
224 | + | PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom cat; Atom "sem"; Atom prep; Atom case; Top; Top] | |
225 | + | PrepNCP(Pnosem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom cat; Atom "nosem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] | |
226 | + | PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom cat; Atom "nosem"; Atom prep; Atom case; Top; Top] | |
227 | + | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom cat; Atom aspect] | |
228 | + | InfP AspectUndef -> Tensor[Atom "infp"; Atom cat; Top] | |
229 | + (* | PadvP -> Tensor[Atom "padvp"] *) | |
230 | + | AdvP "misc" -> Tensor[Atom "advp"; Atom cat; Top] (* FIXME: a może Atom "mod" zamiast Top *) | |
231 | + | AdvP "" -> Tensor[Atom "advp"; Atom cat; Top] (* FIXME: a może Atom "mod" zamiast Top *) | |
232 | + | AdvP mode -> Tensor[Atom "advp"; Atom cat; Atom mode] | |
233 | + | ColonP -> Tensor[Atom "colonp"; Atom cat] | |
234 | + (* | PrepP -> Tensor[Atom "prepp";Top] | |
235 | + | Prep("",CaseAgr) -> Tensor[Atom "prep"; Top; AVar "case"] | |
236 | + | Prep("",CaseUAgr) -> Tensor[Atom "prep"; Top; AVar "ucase"] | |
237 | + | Num(AllAgr,Acm acm) -> Tensor[Atom "num"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"; Atom acm] | |
238 | + | Measure(AllUAgr) -> Tensor[Atom "measure"; AVar "unumber"; AVar "ucase"; AVar "ugender"; AVar "uperson"] *) | |
239 | + (* | Qub -> Tensor[Atom "qub"]*) | |
240 | + (* | Inclusion -> Tensor[Atom "inclusion"] | |
241 | + | Adja -> Tensor[Atom "adja"] | |
242 | + | Aglt -> Tensor[Atom "aglt"; AVar "number"; AVar "person"] | |
243 | + | AuxPast -> Tensor[Atom "aux-past"; AVar "number"; AVar "gender"; AVar "person"] | |
244 | + | AuxFut -> Tensor[Atom "aux-fut"; AVar "number"; AVar "gender"; AVar "person"] | |
245 | + | AuxImp -> Tensor[Atom "aux-imp"] | |
246 | + | Pro -> One | |
247 | + | ProNG -> One *) | |
248 | + | E (CP(CompTypeUndef,CompUndef)) -> Tensor[Atom "cp"; Atom cat; Top; Top] | |
249 | + | E (NCP(NomAgr,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] | |
250 | + | E (NP(NomAgr)) -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | |
251 | + | E (PrepNP(Psem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case] | |
252 | + | E (PrepNP(Pnosem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case] | |
253 | + | E (NP(Case case)) -> Tensor[Atom "np"; Atom cat; Top; Atom case; Top; Top] | |
254 | + | E (NCP(Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; Top; Top] | |
255 | + | E (PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom cat; Atom "sem"; Atom prep; Atom case; Top; Top] | |
256 | + | E (PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom cat; Atom "nosem"; Atom prep; Atom case; Top; Top] | |
257 | + | phrase -> failwith ("render_phrase_cat: " ^ ENIAMwalStringOf.phrase phrase) | |
258 | + | |
259 | +let render_morf = function | |
260 | + | Null -> One | |
261 | + (* | X -> Tensor[Atom "X"] | |
262 | + | Lex lex -> Tensor[Atom lex] *) | |
263 | + | LexArg(id,lex,pos) -> Tensor([Atom "lex";Atom (string_of_int id);Atom lex] @ render_pos pos) | |
264 | + | SimpleLexArg(lex,pos) -> Tensor([Atom "lex";Atom lex] @ render_pos pos) | |
265 | + | phrase -> render_phrase phrase | |
266 | + | |
267 | +let render_morf_cat cats = function | |
268 | + | Null -> [One] | |
269 | + | Pro -> [One] | |
270 | + | ProNG -> [One] | |
271 | + | FixedP lex -> [Tensor[Atom "fixed"; Atom lex]] | |
272 | + | Or -> [Tensor[Atom "or"]] | |
273 | + | E Or -> [Tensor[Atom "or"]] | |
274 | + (* | X -> Tensor[Atom "X"] | |
275 | + | Lex lex -> Tensor[Atom lex] *) | |
276 | + | LexArg(id,lex,pos) -> [Tensor([Atom "lex";Atom (string_of_int id);Atom lex] @ render_pos pos)] | |
277 | + | SimpleLexArg(lex,pos) -> [Tensor([Atom "lex";Atom lex] @ render_pos pos)] | |
278 | + | phrase -> Xlist.map cats (fun cat -> render_phrase_cat cat phrase) | |
279 | + | |
280 | +(* let extract_sel_prefs sel_prefs = | |
281 | + Xlist.map sel_prefs (function | |
282 | + SynsetName s -> s | |
283 | + | _ -> failwith "extract_sel_prefs") *) | |
284 | + | |
285 | +let render_schema schema = | |
286 | + Xlist.map schema (fun p -> | |
287 | + match Xlist.map p.morfs render_morf with | |
288 | + [] -> failwith "render_schema" | |
289 | + | [s] -> Both,s | |
290 | + | l -> Both,Plus l) | |
291 | + | |
292 | +let translate_dir = function | |
293 | + Both_ -> Both | |
294 | + | Forward_ -> Forward | |
295 | + | Backward_ -> Backward | |
296 | + | |
297 | +let render_schema_cat schema = | |
298 | + Xlist.map schema (fun p -> | |
299 | + match List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs)) with | |
300 | + [] -> failwith "render_schema" | |
301 | + | [s] -> translate_dir p.dir,s | |
302 | + | l -> translate_dir p.dir,Plus l) | |
303 | + | |
304 | +let render_simple_schema schema = | |
305 | + Xlist.map schema (fun morfs -> | |
306 | + Both,Plus(One :: Xlist.map morfs render_morf)) | |
307 | + | |
308 | +let render_connected_schema schema = | |
309 | + Xlist.map schema (fun p -> | |
310 | + {p with morfs=Xlist.map p.morfs (fun morf -> LCG (render_morf morf))}) | |
311 | + | |
312 | +let render_connected_schema_cat schema = | |
313 | + Xlist.map schema (fun p -> | |
314 | + {p with | |
315 | + morfs=Xlist.map (List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs))) (fun morf -> LCG morf)}) | |
316 | + | |
317 | +(* FIXME: tu trzeba by dodać zwykłe reguły dla czasowników dotyczące ich negacji, aglutynatu itp. *) | |
318 | +let render_lex_entry = function | |
319 | + SimpleLexEntry(lemma,pos) -> Tensor([Atom "lex";Atom lemma] @ render_pos_entry pos) | |
320 | + | LexEntry(id,lemma,pos,NoRestr,schema) -> | |
321 | + ImpSet(Tensor([Atom "lex";Atom (string_of_int id);Atom lemma] @ render_pos_entry pos),render_schema schema) | |
322 | + (*Xlist.map (transform_entry pos lemma NegationUndef PredFalse AspectUndef schema) (fun (sel,schema) -> | |
323 | + sel,LexEntry(id,lemma,pos,NoRestr,schema))*) | |
324 | + | ComprepNPEntry(prep,NoRestr,schema) -> ImpSet(Tensor[Atom "comprepnp"; Atom prep],render_schema schema) | |
325 | + (*Xlist.map (transform_entry "comprep" s NegationUndef PredFalse AspectUndef schema) (fun (sel,schema) -> | |
326 | + sel,ComprepNPEntry(s,NoRestr,schema))*) | |
327 | + | LexEntry(id,lemma,pos,_,[]) (*as entry*) -> | |
328 | + ImpSet(Tensor([Atom "lex";Atom (string_of_int id);Atom lemma] @ render_pos_entry pos),[Both,Tensor[AVar "schema"]]) | |
329 | + | entry -> failwith ("render_entry:" ^ ENIAMwalStringOf.lex_entry entry) | |
330 | + | |
331 | +(* let schemata,entries = ENIAMvalence.prepare_all_valence ENIAMwalParser.phrases ENIAMwalParser.schemata ENIAMwalParser.entries *) | |
332 | + | |
333 | +(* let _ = | |
334 | + (* Entries.map schemata (fun pos lemma (selectors,schema) -> | |
335 | + (* Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema); *) | |
336 | + render_schema schema) *) | |
337 | + Entries.map entries (fun pos lemma (selectors,entry) -> | |
338 | + (* Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema); *) | |
339 | + selectors,render_lex_entry entry) *) | |
340 | + | |
341 | +let adjunct morfs = {empty_position with gf=ADJUNCT; is_necessary=Opt; morfs=Xlist.map morfs (fun morf -> LCG morf)} | |
342 | +let adjunct_multi dir morfs = {empty_position with gf=ADJUNCT; is_necessary=Multi; dir=dir; morfs=Xlist.map morfs (fun morf -> LCG morf)} | |
343 | +let adjunct_dir dir morfs = {empty_position with gf=ADJUNCT; is_necessary=Opt; dir=dir; morfs=Xlist.map morfs (fun morf -> LCG morf)} | |
344 | +let adjunct_ce ce morfs = {empty_position with gf=ADJUNCT; ce=[ce]; is_necessary=Opt; morfs=Xlist.map morfs (fun morf -> LCG morf)} | |
345 | + | |
346 | +let render_comprep prep = Both,Plus[One;Tensor[Atom "comprepnp"; Atom prep]] | |
347 | + | |
348 | +let render_connected_comprep prep = adjunct [Tensor[Atom "comprepnp"; Atom prep]] | |
349 | + | |
350 | +let render_prepnp prep cases = | |
351 | + Both,Plus(One :: List.flatten (Xlist.map cases (fun case -> | |
352 | + [Tensor[Atom "prepnp"; Atom prep; Atom case]; | |
353 | + Tensor[Atom "prepncp"; Atom prep; Atom case; Top; Top]]))) | |
354 | + | |
355 | +let render_connected_prepnp prep cases = | |
356 | + adjunct (List.flatten (Xlist.map cases (fun case -> | |
357 | + [Tensor[Atom "prepnp"; Atom prep; Atom case]; | |
358 | + Tensor[Atom "prepncp"; Atom prep; Atom case; Top; Top]]))) | |
359 | + | |
360 | +let render_prepadjp prep cases = | |
361 | + let postp = if prep = "z" || prep = "po" || prep = "na" then [Tensor[Atom "prepadjp"; Atom prep; Atom "postp"]] else [] in | |
362 | + Both,Plus(One :: postp @ (Xlist.map cases (fun case -> | |
363 | + Tensor[Atom "prepadjp"; Atom prep; Atom case]))) | |
364 | + | |
365 | +let render_connected_prepadjp prep cases = | |
366 | + let postp = if prep = "z" || prep = "po" || prep = "na" then [Tensor[Atom "prepadjp"; Atom prep; Atom "postp"]] else [] in | |
367 | + adjunct (postp @ (Xlist.map cases (fun case -> | |
368 | + Tensor[Atom "prepadjp"; Atom prep; Atom case]))) | |
369 | + | |
370 | +let render_compar prep = Both,Plus[One;Tensor[Atom "compar"; Atom prep; Top]] | |
371 | + | |
372 | +let render_connected_compar prep = adjunct [Tensor[Atom "compar"; Atom prep; Top]] | |
373 | + | |
374 | +let verb_adjuncts_simp = [ | |
375 | + Both, Plus[One;Tensor[Atom "advp"; Atom "pron"]]; | |
376 | + Both, Plus[One;Tensor[Atom "advp"; Atom "locat"]]; | |
377 | + Both, Plus[One;Tensor[Atom "advp"; Atom "abl"]]; | |
378 | + Both, Plus[One;Tensor[Atom "advp"; Atom "adl"]]; | |
379 | + Both, Plus[One;Tensor[Atom "advp"; Atom "perl"]]; | |
380 | + Both, Plus[One;Tensor[Atom "advp"; Atom "temp"]]; | |
381 | + Both, Plus[One;Tensor[Atom "advp"; Atom "dur"]]; | |
382 | + Both, Plus[One;Tensor[Atom "advp"; Atom "mod"]]; | |
383 | + Both, Plus[One;Tensor[Atom "np";Top;Atom "dat"; Top; Top];Tensor[Atom "ncp"; Top; Atom "dat"; Top; Top; Top; Top]]; | |
384 | + Both, Plus[One;Tensor[Atom "np";Top;Atom "inst"; Top; Top];Tensor[Atom "ncp"; Top; Atom "inst"; Top; Top; Top; Top]]; | |
385 | + Both, Plus[One;Tensor[Atom "date"];Tensor[Atom "day-lex"];Tensor[Atom "day-month"];Tensor[Atom "day"]]; | |
386 | + Forward, Plus[One;Tensor[Atom "cp";Top; Top]]; (* FIXME: to powinno być jako ostatnia lista argumentów *) | |
387 | + Both, Plus[One;Tensor[Atom "or"]]; | |
388 | + Both, Plus[One;Tensor[Atom "lex";Atom "się";Atom "qub"]]; | |
389 | + Both, Plus[One;Tensor[Atom "padvp"]]; | |
390 | +] | |
391 | + | |
392 | +let verb_connected_adjuncts_simp = [ | |
393 | + adjunct [Tensor[Atom "advp"; Atom "pron"]]; | |
394 | + adjunct [Tensor[Atom "advp"; Atom "locat"]]; | |
395 | + adjunct [Tensor[Atom "advp"; Atom "abl"]]; | |
396 | + adjunct [Tensor[Atom "advp"; Atom "adl"]]; | |
397 | + adjunct [Tensor[Atom "advp"; Atom "perl"]]; | |
398 | + adjunct [Tensor[Atom "advp"; Atom "temp"]]; | |
399 | + adjunct [Tensor[Atom "advp"; Atom "dur"]]; | |
400 | + adjunct [Tensor[Atom "advp"; Atom "mod"]]; | |
401 | + adjunct [Tensor[Atom "np";Top;Atom "dat"; Top; Top];Tensor[Atom "ncp"; Top; Atom "dat"; Top; Top; Top; Top]]; | |
402 | + adjunct [Tensor[Atom "np";Top;Atom "inst"; Top; Top];Tensor[Atom "ncp"; Top; Atom "inst"; Top; Top; Top; Top]]; | |
403 | + adjunct [Tensor[Atom "date"];Tensor[Atom "day-lex"];Tensor[Atom "day-month"];Tensor[Atom "day"]]; | |
404 | + adjunct_dir Forward_ [Tensor[Atom "cp";Top; Top]]; | |
405 | + adjunct [Tensor[Atom "or"]]; | |
406 | + adjunct [Tensor[Atom "lex";Atom "się";Atom "qub"]]; | |
407 | + adjunct_ce "3" [Tensor[Atom "padvp"]]; | |
408 | +] | |
409 | + | |
410 | +let proper_noun_adjuncts_simp = [ | |
411 | + Both, Plus[One;Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]]; | |
412 | + Forward, Plus[One;Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]]; | |
413 | + Backward, Maybe(Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]); | |
414 | + Forward, Plus[One;Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; | |
415 | +] | |
416 | + | |
417 | +let proper_noun_connected_adjuncts_simp = [ | |
418 | + adjunct [Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]]; | |
419 | + adjunct_dir Forward_ [Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]]; | |
420 | + adjunct_multi Backward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; | |
421 | + adjunct_dir Forward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; | |
422 | +] | |
423 | + | |
424 | +let common_noun_adjuncts_simp = [ | |
425 | + Both, Plus[One;Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]]; | |
426 | + Forward, Plus[One;Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]]; | |
427 | + Backward, Maybe(Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]); | |
428 | + Forward, Plus[One;Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; | |
429 | +] | |
430 | + | |
431 | +let common_noun_connected_adjuncts_simp = [ | |
432 | + adjunct [Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]]; | |
433 | + adjunct_dir Forward_ [Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]]; | |
434 | + adjunct_multi Backward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; | |
435 | + adjunct_dir Forward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; | |
436 | +] | |
437 | + | |
438 | +let measure_noun_adjuncts_simp = [ | |
439 | + Backward, Maybe(Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]); | |
440 | + Forward, Plus[One;Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; | |
441 | +] | |
442 | + | |
443 | +let measure_noun_connected_adjuncts_simp = [ | |
444 | + adjunct_multi Backward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; | |
445 | + adjunct_dir Forward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; | |
446 | +] | |
447 | + | |
448 | +let adj_adjuncts_simp = [ | |
449 | + Both, Plus[One;Tensor[Atom "advp"; Top]]; | |
450 | +] | |
451 | + | |
452 | +let adj_connected_adjuncts_simp = [ | |
453 | + adjunct [Tensor[Atom "advp"; Top]]; | |
454 | +] | |
455 | + | |
456 | +let adv_adjuncts_simp = [ | |
457 | + Both, Plus[One;Tensor[Atom "advp"; Top]]; | |
458 | + ] | |
459 | + | |
460 | +let adv_connected_adjuncts_simp = [ | |
461 | + adjunct [Tensor[Atom "advp"; Top]]; | |
462 | + ] | |
463 | + | |
464 | +let assing_prep_morfs = function | |
465 | + "po","postp" -> [ | |
466 | + LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "dat"; Atom "m1"]); | |
467 | + LCG(Tensor[Atom "adjp"; Top; Atom "postp"; Top])] | |
468 | + | "z","postp" -> [LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "nom"; Atom "f"])] | |
469 | + | "na","postp" -> [LCG(Tensor[Atom "advp"; Top])] | |
470 | + | _,case -> [ | |
471 | + LCG(Tensor[Atom "np"; Top; Atom case; Top; Top]); | |
472 | + LCG(Tensor[Atom "adjp"; Top; Atom case; Top])] | |
473 | + | |
474 | +let prep_morfs = [ | |
475 | + LCG(Tensor[Atom "np"; Top; Atom "case"; Top; Top]); | |
476 | + LCG(Tensor[Atom "adjp"; Top; Atom "case"; Top]); | |
477 | + LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "dat"; Atom "m1"]); | |
478 | + LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "nom"; Atom "f"]); | |
479 | + LCG(Tensor[Atom "advp"; Top]); | |
480 | + LCG(Tensor[Atom "year"]); | |
481 | + LCG(Tensor[Atom "hour-minute"]); | |
482 | + LCG(Tensor[Atom "day-month"]); | |
483 | + LCG(Tensor[Atom "hour"]); | |
484 | + LCG(Tensor[Atom "day"]); | |
485 | + LCG(Tensor[Atom "date"]); | |
486 | + ] | |
487 | + | |
488 | +let compar_morfs = [ | |
489 | + LCG(Tensor[Atom "np"; Top; Atom "case"; Top; Top]); | |
490 | + LCG(Tensor[Atom "adjp"; Top; Atom "case"; Top]); | |
491 | + LCG(Tensor[Atom "prepnp"; Top; Top]); | |
492 | + LCG(Tensor[Atom "prepadjp"; Top; Top]); | |
493 | + ] | |
... | ... |
lexSemantics/ENIAMwalStringOf.ml
... | ... | @@ -59,6 +59,7 @@ let case = function |
59 | 59 | | CaseUndef -> "_" |
60 | 60 | | AllAgr -> "allagr" |
61 | 61 | | NomAgr -> "nomagr" |
62 | + | VocAgr -> "vocagr" | |
62 | 63 | | GenAgr -> "genagr" |
63 | 64 | |
64 | 65 | let rec comp = function |
... | ... | @@ -140,7 +141,7 @@ let rec phrase = function |
140 | 141 | | ComparP(prep,c) -> "comparp(" ^ prep ^ "," ^ case c ^ ")" |
141 | 142 | | CP(ct,co) -> "cp(" ^ comp_type ct ^ "," ^ comp co ^ ")" |
142 | 143 | | NCP(c,ct,co) -> "ncp(" ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")" |
143 | - | PrepNCP(prep,c,ct,co) -> "prepncp(" ^ prep ^ "," ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")" | |
144 | + | PrepNCP(p,prep,c,ct,co) -> "prepncp(" ^ psem p ^ "," ^ prep ^ "," ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")" | |
144 | 145 | | InfP(a) -> "infp(" ^ aspect a (*^ req r*) ^ ")" |
145 | 146 | | AdvP(m) -> "advp(" ^ m ^ ")" |
146 | 147 | | ColonP -> "colonp" |
... | ... | @@ -250,7 +251,7 @@ let rec connected_schema schema = |
250 | 251 | "{" ^ String.concat ";" (Xlist.map s.morfs simple_morf) ^ "}:" ^ sem_frame s)) |
251 | 252 | *) |
252 | 253 | |
253 | -let meaning m = | |
254 | +let sense m = | |
254 | 255 | m.name ^ "-" ^ m.variant |
255 | 256 | |
256 | 257 | let lex_entry = function |
... | ... |
lexSemantics/ENIAMwalTypes.ml
... | ... | @@ -24,7 +24,7 @@ type opinion = Pewny | Potoczny | Watpliwy | Archaiczny | Zly | Wulgarny | Nieok |
24 | 24 | type negation = Negation | Aff | NegationUndef (*| NegationNA*) |
25 | 25 | type pred = PredTrue | PredFalse | PredUndef (*| PredNA*) |
26 | 26 | type aspect = Aspect of string | AspectUndef (*| AspectNA*) |
27 | -type case = Case of string | Str | Part | CaseAgr | CaseUndef (*| AllUAgr | CaseUAgr*) | GenAgr | NomAgr | AllAgr | |
27 | +type case = Case of string | Str | Part | CaseAgr | CaseUndef (*| AllUAgr | CaseUAgr*) | GenAgr | NomAgr | VocAgr | AllAgr | |
28 | 28 | type comp = Comp of string | Zeby | Gdy | CompUndef |
29 | 29 | type comp_type = Int | Rel | CompTypeUndef (*| CompTypeAgr*) |
30 | 30 | type number = Number of string | NumberUndef | NumberAgr |
... | ... | @@ -72,7 +72,7 @@ type phrase = |
72 | 72 | | ComparP of string * case |
73 | 73 | | CP of comp_type * comp |
74 | 74 | | NCP of case * comp_type * comp |
75 | - | PrepNCP of string * case * comp_type * comp | |
75 | + | PrepNCP of psem * string * case * comp_type * comp | |
76 | 76 | | InfP of aspect |
77 | 77 | | AdvP of string |
78 | 78 | | ColonP |
... | ... | @@ -108,34 +108,34 @@ type necessary = Req | Opt | Pro | ProNG | Multi |
108 | 108 | |
109 | 109 | type direction = Both_ | Forward_ | Backward_ |
110 | 110 | |
111 | -type position = {psn_id: int; gf: gf; role: string; role_attr: string; sel_prefs: sel_prefs list; | |
111 | +type position = {psn_id: int; gf: gf; role: string; role_attr: string; node: string; sel_prefs: sel_prefs list; cat_prefs: string list; | |
112 | 112 | mode: string list; cr: string list; ce: string list; morfs: phrase list; |
113 | 113 | dir: direction; is_necessary: necessary} |
114 | 114 | |
115 | 115 | let empty_position = |
116 | - {psn_id=(-1); gf=ARG; role=""; role_attr=""; mode=[]; sel_prefs=[]; cr=[]; ce=[]; dir=Both_; morfs=[]; is_necessary=Opt} | |
116 | + {psn_id=(-1); gf=ARG; role=""; role_attr=""; mode=[]; node="concept"; sel_prefs=[]; cat_prefs=["X"]; cr=[]; ce=[]; dir=Both_; morfs=[]; is_necessary=Opt} | |
117 | 117 | |
118 | -type meaning = {mng_id: int; | |
118 | +type sense = {mng_id: int; | |
119 | 119 | name: string; |
120 | 120 | variant: string; |
121 | 121 | plwnluid: int; |
122 | 122 | gloss: string} |
123 | 123 | |
124 | -let empty_meaning = {mng_id = (-1); | |
124 | +let empty_sense = {mng_id = (-1); | |
125 | 125 | name = ""; |
126 | 126 | variant = ""; |
127 | 127 | plwnluid = (-1); |
128 | 128 | gloss = ""} |
129 | 129 | |
130 | 130 | (* type frame_atrs = |
131 | - EmptyAtrs of meaning list | |
132 | - | DefaultAtrs of meaning list * refl * opinion * negation * pred * aspect | |
131 | + EmptyAtrs of sense list | |
132 | + | DefaultAtrs of sense list * refl * opinion * negation * pred * aspect | |
133 | 133 | | ComprepAtrs of string |
134 | - | NounAtrs of meaning list * string * nsem (** string list*) | |
135 | - | AdjAtrs of meaning list * case * string (** string * string list*) | |
136 | - | PersAtrs of meaning list * string * negation * mood * tense * aux * aspect | |
137 | - | GerAtrs of meaning list * string * negation * aspect | |
138 | - | NonPersAtrs of meaning list * string * string * string * negation * aspect *) | |
134 | + | NounAtrs of sense list * string * nsem (** string list*) | |
135 | + | AdjAtrs of sense list * case * string (** string * string list*) | |
136 | + | PersAtrs of sense list * string * negation * mood * tense * aux * aspect | |
137 | + | GerAtrs of sense list * string * negation * aspect | |
138 | + | NonPersAtrs of sense list * string * string * string * negation * aspect *) | |
139 | 139 | |
140 | 140 | (* type schema = {sch_id: int; opinion: opinion; reflexiveMark: refl; aspect: aspect; |
141 | 141 | negativity: negation; predicativity: pred; positions: position list; text_rep: string} *) |
... | ... | @@ -175,6 +175,6 @@ let phrases_filename = resource_path ^ "/Walenty/phrases.tab" |
175 | 175 | let entries_filename = resource_path ^ "/Walenty/entries.tab" |
176 | 176 | let schemata_filename = resource_path ^ "/Walenty/schemata.tab" |
177 | 177 | let connected_filename = resource_path ^ "/Walenty/connected.tab" |
178 | -let meanings_filename = resource_path ^ "/Walenty/meanings.tab" | |
178 | +let senses_filename = resource_path ^ "/Walenty/meanings.tab" | |
179 | 179 | |
180 | 180 | let user_valence_filename = data_path ^ "/valence.dic" |
... | ... |
morphology/resources/alt_supplement.tab
... | ... | @@ -4,4 +4,12 @@ sobie siebie siebie:dat.loc |
4 | 4 | sobą siebie siebie:inst |
5 | 5 | to to pred |
6 | 6 | yay yay interj |
7 | +świetnie świetnie interj | |
8 | +doskonale doskonale interj | |
9 | +idealnie idealnie interj | |
10 | +zdecydowanie zdecydowanie interj | |
11 | +ok ok interj | |
12 | +super super interj | |
13 | +dobrze dobrze interj | |
14 | +dzięki dzięki interj | |
7 | 15 | |
... | ... |
semantics/ENIAMsemGraph.ml
... | ... | @@ -25,7 +25,7 @@ let empty_concept = |
25 | 25 | {c_sense=Dot;c_name=Dot;(* c_variable: string; c_visible_var: bool;*) c_quant=Dot; c_local_quant=true; (*c_modalities: (string * type_term) list; |
26 | 26 | c_left_input_pos: int; c_right_input_pos: int;*) c_relations=Dot; c_variable="",""; c_pos=(-1); c_cat=Dot; c_label=""; c_def_label=""} |
27 | 27 | |
28 | -let empty_context = {cx_sense=Dot; cx_contents=Dot; cx_relations=Dot; cx_variable="",""; cx_pos=(-1); cx_cat=Dot} | |
28 | +let empty_context = {cx_sense=Dot; cx_contents=Dot; cx_relations=Dot; cx_variable="",""; cx_pos=(-1); cx_cat=Dot; cx_label=""; cx_def_label=""} | |
29 | 29 | |
30 | 30 | let rec make_args_list = function |
31 | 31 | Tuple l -> List.flatten (Xlist.map l make_args_list) |
... | ... | @@ -34,7 +34,7 @@ let rec make_args_list = function |
34 | 34 | let symbols = StringSet.of_list [ |
35 | 35 | "symbol"; "date"; "date-interval"; "hour-minute"; "hour"; "hour-minute-interval"; "hour-interval"; |
36 | 36 | "year"; "year-interval"; "day"; "day-interval"; "day-month"; "day-month-interval"; "month-interval"; "roman"; "roman-interval"; |
37 | - "match-result"; "url"; "email"; "obj-id"; "building-number"; | |
37 | + "match-result"; "url"; "email"; "phone-number"; "obj-id"; "building-number"; | |
38 | 38 | "month-lex"; "day-lex"] |
39 | 39 | |
40 | 40 | let rec get_person = function |
... | ... | @@ -60,27 +60,27 @@ let make_relation t c = |
60 | 60 | | "adjunct" -> MakeTripleRelation(t.arole,t.arole_attr,c) |
61 | 61 | | s -> failwith ("make_make_triple_relation: " ^ s)*) |
62 | 62 | |
63 | -(* let add_proj proj c = | |
64 | - if proj = Dot then Concept c else | |
65 | - Concept{empty_concept with c_cat=proj; c_relations=Tuple[Relation("Has","",Concept{c with c_relations=Dot});c.c_relations]} *) | |
66 | -let add_proj proj c = | |
67 | - if proj = Dot then Concept c else | |
68 | - Concept{empty_concept with c_cat=proj; c_relations=Relation("Has","",Concept c)} | |
69 | -(* let add_proj proj c = | |
70 | - if proj = Dot then Concept c else | |
71 | - let proj_rels,c_rels = split_relations c.c_relations in | |
72 | - Concept{empty_concept with c_cat=proj; c_relations=Tuple[Relation("Has","",Concept{c with c_relations=c_rels});proj_rels]} *) | |
73 | -let add_proj2 proj c = | |
74 | - if proj = Dot then c else | |
75 | - Concept{empty_concept with c_cat=proj; c_relations=Relation("Has","",c)} | |
76 | - | |
77 | - | |
78 | -let create_normal_concept tokens lex_sems t cat proj = | |
63 | +(* let add_coerced coerced c = | |
64 | + if coerced = Dot then Concept c else | |
65 | + Concept{empty_concept with c_cat=coerced; c_relations=Tuple[Relation("Has","",Concept{c with c_relations=Dot});c.c_relations]} *) | |
66 | +let add_coerced coerced c = | |
67 | + if coerced = Dot then Concept c else | |
68 | + Concept{empty_concept with c_cat=coerced; c_relations=Relation("Has","",Concept c)} (* FIXME: trzeba dodać concept do tokenów *) | |
69 | +(* let add_coerced coerced c = | |
70 | + if coerced = Dot then Concept c else | |
71 | + let coerced_rels,c_rels = split_relations c.c_relations in | |
72 | + Concept{empty_concept with c_cat=coerced; c_relations=Tuple[Relation("Has","",Concept{c with c_relations=c_rels});coerced_rels]} *) | |
73 | +let add_coerced2 coerced c = | |
74 | + if coerced = Dot then c else | |
75 | + Concept{empty_concept with c_cat=coerced; c_relations=Relation("Has","",c)} (* FIXME: trzeba dodać concept do tokenów *) | |
76 | + | |
77 | + | |
78 | +let create_normal_concept tokens lex_sems t cat coerced = | |
79 | 79 | (*if t.agf = ENIAMwalTypes.NOSEM then t.args else*) |
80 | - let cat,proj = if !user_ontology_flag then cat,proj else Dot,Dot in | |
81 | - let proj = if proj = cat then Dot else proj in | |
80 | + let cat,coerced = if !user_ontology_flag then cat,coerced else Dot,Dot in | |
81 | + let coerced = if coerced = cat then Dot else coerced in | |
82 | 82 | let c = {empty_concept with |
83 | - c_sense = if !user_ontology_flag then Val t.lemma else (*if t.lemma = "<root>" then Dot else*) t.meaning; | |
83 | + c_sense = (*if !user_ontology_flag then Val t.lemma else*) (*if t.lemma = "<root>" then Dot else*) t.sense; | |
84 | 84 | c_relations=t.args; |
85 | 85 | c_quant=if t.label = "" then t.sem_args else Dot; (* FIXME: zakładam że t.label <> "" występuje tylko dla pro *) |
86 | 86 | c_variable=string_of_int t.id,""; |
... | ... | @@ -93,7 +93,7 @@ let create_normal_concept tokens lex_sems t cat proj = |
93 | 93 | let c = {c with c_local_quant=false} in |
94 | 94 | let c,measure,cx_flag = Xlist.fold t.attrs (c,false,false) (fun (c,measure,cx_flag) -> function |
95 | 95 | "NSYN",Val "common" -> c,measure,cx_flag |
96 | - | "NSYN",Val "proper" -> {c with c_name=Val t.lemma; c_sense=Dot(*t.meaning*)(*c_sense=if Val t.pred=c.c_sense then Dot else c.c_sense*)},measure,cx_flag; (* FIXME: zaślepka na potrzeby gramatyk semantycznych *) (* Rozpoznawanie propoer names nieznanego typu - ryzykowne ale proste *) | |
96 | + | "NSYN",Val "proper" -> {c with c_name=Val t.lemma; c_sense=Dot(*t.sense*)(*c_sense=if Val t.pred=c.c_sense then Dot else c.c_sense*)},measure,cx_flag; (* FIXME: zaślepka na potrzeby gramatyk semantycznych *) (* Rozpoznawanie propoer names nieznanego typu - ryzykowne ale proste *) | |
97 | 97 | | "NSYN",Val "pronoun" -> c(*{c with c_quant=Tuple[c.c_quant;Val "indexical"]}*),measure,cx_flag |
98 | 98 | | "NSEM",Val "count" -> c(*{c with c_quant=Tuple[c.c_quant;Val "count"]}*),measure,cx_flag |
99 | 99 | | "NSEM",Val "mass" -> {c with c_quant=Tuple[c.c_quant;Val "mass"]},measure,cx_flag |
... | ... | @@ -120,13 +120,13 @@ let create_normal_concept tokens lex_sems t cat proj = |
120 | 120 | if cx_flag then |
121 | 121 | let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in |
122 | 122 | let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in |
123 | - make_relation t (Context{empty_context with cx_contents=add_proj proj c; cx_variable=string_of_int id,""; cx_pos=c.c_pos}) | |
123 | + make_relation t (Context{empty_context with cx_contents=add_coerced coerced c; cx_variable=string_of_int id,""; cx_pos=c.c_pos}) | |
124 | 124 | else |
125 | - make_relation t (add_proj proj c) else | |
125 | + make_relation t (add_coerced coerced c) else | |
126 | 126 | if t.pos = "fin" || t.pos = "bedzie" || t.pos = "praet" || t.pos = "winien" || t.pos = "impt" || t.pos = "imps" || t.pos = "pred" || t.lemma = "pro-komunikować" then |
127 | 127 | let c = {c with c_local_quant=false} in |
128 | 128 | let c = Xlist.fold t.attrs c (fun c -> function |
129 | -(* "MEANING",t -> {c with c_sense=Tuple[c.c_sense;t]} *) | |
129 | +(* "SENSE",t -> {c with c_sense=Tuple[c.c_sense;t]} *) | |
130 | 130 | | "NUM",t -> c |
131 | 131 | | "GEND",_ -> c |
132 | 132 | | "PERS",_ -> c |
... | ... | @@ -142,7 +142,7 @@ let create_normal_concept tokens lex_sems t cat proj = |
142 | 142 | let c = if t.lemma = "pro-komunikować" then {c with c_relations=Relation("Theme","",c.c_relations)} else c in (* FIXME: to by trzeba przesunąć na wcześniej *) |
143 | 143 | let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in |
144 | 144 | let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in |
145 | - let cx = {empty_context with cx_contents=add_proj proj c; cx_variable=string_of_int id,""; cx_pos=c.c_pos; cx_cat=Val "Situation"} in | |
145 | + let cx = {empty_context with cx_contents=add_coerced coerced c; cx_variable=string_of_int id,""; cx_pos=c.c_pos; cx_cat=Val "Situation"} in | |
146 | 146 | (* if t.role <> "" || t.role_attr <> "" then failwith "create_normal_concept: verb" else *) |
147 | 147 | make_relation t (Context cx) else |
148 | 148 | if t.pos = "inf" then |
... | ... | @@ -154,13 +154,13 @@ let create_normal_concept tokens lex_sems t cat proj = |
154 | 154 | | "NEGATION",Val "neg" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} |
155 | 155 | | e,t -> failwith ("create_normal_concept verb: " ^ e)) in |
156 | 156 | let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in |
157 | - let _ = ExtArray.add lex_sems in | |
158 | - let cx = {empty_context with cx_contents=add_proj proj c; cx_variable=string_of_int id,""; cx_pos=c.c_pos; cx_cat=Val "Situation"} in | |
157 | + let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in | |
158 | + let cx = {empty_context with cx_contents=add_coerced coerced c; cx_variable=string_of_int id,""; cx_pos=c.c_pos; cx_cat=Val "Situation"} in | |
159 | 159 | make_relation t (Context cx) else |
160 | 160 | if t.pos = "adj" || t.pos = "adjc" || t.pos = "adjp" || t.pos = "adja" || t.pos = "pact" || t.pos = "ppas" || t.pos = "apron" || t.pos = "ordnum" || t.pos = "roman-adj" then |
161 | 161 | let c = if t.pos = "pact" || t.pos = "ppas" then {c with c_local_quant=false} else c in |
162 | 162 | let c = Xlist.fold t.attrs c (fun c -> function |
163 | -(* "MEANING",t -> {c with c_sense=Tuple[c.c_sense;t]} *) | |
163 | +(* "SENSE",t -> {c with c_sense=Tuple[c.c_sense;t]} *) | |
164 | 164 | | "SYN",Val "common" -> c |
165 | 165 | | "SYN",Val "pronoun" -> c(*{c with c_quant=Tuple[c.c_quant;Val "indexical"]}*) |
166 | 166 | | "SYN",Val "proper" -> if t.pos = "roman-adj" then c else failwith "create_normal_concept adj: SYN=proper" |
... | ... | @@ -172,6 +172,7 @@ let create_normal_concept tokens lex_sems t cat proj = |
172 | 172 | | "GRAD",Val "com" -> {c with c_relations=Tuple[c.c_relations;SingleRelation (Val "com")]} |
173 | 173 | | "GRAD",Val "sup" -> {c with c_relations=Tuple[c.c_relations;SingleRelation (Val "sup")]} |
174 | 174 | | "ASPECT",_ -> c |
175 | + | "CTYPE",_ -> c (* FIXME1: trzeba zaznaczyć pytajność w grafie, CTYPE pojawia się w dwu węzłach *) | |
175 | 176 | (* | "TYPE",Val "int" -> {c with c_quant=Tuple[c.c_quant;Val "interrogative"]} *) |
176 | 177 | | "TYPE",_ -> c (* FIXME *) |
177 | 178 | | "PERS",_ -> c |
... | ... | @@ -183,7 +184,7 @@ let create_normal_concept tokens lex_sems t cat proj = |
183 | 184 | if t.pos = "adv" || t.pos = "pcon" || t.pos = "pant" then |
184 | 185 | let c = if t.pos = "pcon" || t.pos = "pant" then {c with c_local_quant=false} else c in |
185 | 186 | let c = Xlist.fold t.attrs c (fun c -> function |
186 | -(* "MEANING",t -> {c with c_sense=Tuple[c.c_sense;t]} *) | |
187 | +(* "SENSE",t -> {c with c_sense=Tuple[c.c_sense;t]} *) | |
187 | 188 | | "GRAD",Val "pos" -> c |
188 | 189 | | "GRAD",Val "com" -> {c with c_relations=Tuple[c.c_relations;SingleRelation (Val "com")]} |
189 | 190 | | "GRAD",Val "sup" -> {c with c_relations=Tuple[c.c_relations;SingleRelation (Val "sup")]} |
... | ... | @@ -194,7 +195,7 @@ let create_normal_concept tokens lex_sems t cat proj = |
194 | 195 | | "NEGATION",Val "aff" -> c |
195 | 196 | | "NEGATION",Val "neg" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} |
196 | 197 | | e,t -> failwith ("create_normal_concept adv: " ^ e)) in |
197 | - make_relation t (add_proj proj c) else | |
198 | + make_relation t (add_coerced coerced c) else | |
198 | 199 | if t.pos = "prep" then |
199 | 200 | (* if t.arole = "NOSEM" then make_relation t (t.args) else *) |
200 | 201 | let c,is_sem = Xlist.fold t.attrs (c,false) (fun (c,is_sem) -> function |
... | ... | @@ -203,9 +204,9 @@ let create_normal_concept tokens lex_sems t cat proj = |
203 | 204 | | "PSEM",Val "nosem" -> c,false |
204 | 205 | | e,t -> failwith ("create_normal_concept prep: " ^ e)) in |
205 | 206 | (* make_make_triple_relation t (Concept c) else *) |
206 | - if is_sem then make_relation t (add_proj2 proj (CreateContext({empty_context with cx_sense=c.c_sense; cx_variable=c.c_variable; cx_pos=c.c_pos; cx_cat=c.c_cat},c.c_relations))) | |
207 | + if is_sem then make_relation t (add_coerced2 coerced (CreateContext({empty_context with cx_sense=c.c_sense; cx_variable=c.c_variable; cx_pos=c.c_pos; cx_cat=c.c_cat},c.c_relations))) | |
207 | 208 | else make_relation t (RemoveRelation("CORE","",c.c_relations)) else |
208 | - if proj <> Dot then failwith ("create_normal_concept proj: " ^ t.lemma) else | |
209 | + if coerced <> Dot then failwith ("create_normal_concept coerced: " ^ t.lemma) else | |
209 | 210 | if t.pos = "pro" || t.pos = "ppron12" || t.pos = "ppron3" || t.pos = "siebie" then (* FIXME: indexicalność *) |
210 | 211 | let c = {c with c_local_quant=false} in |
211 | 212 | let c = Xlist.fold t.attrs c (fun c -> function |
... | ... | @@ -225,7 +226,7 @@ let create_normal_concept tokens lex_sems t cat proj = |
225 | 226 | make_relation t (Concept c) else |
226 | 227 | if t.pos = "num" || t.pos = "intnum" || t.pos = "realnum" || t.pos = "intnum-interval" || t.pos = "realnum-interval" then |
227 | 228 | let c = Xlist.fold t.attrs c (fun c -> function |
228 | -(* "MEANING",t -> {c with c_sense=Tuple[c.c_sense;t]} *) | |
229 | +(* "SENSE",t -> {c with c_sense=Tuple[c.c_sense;t]} *) | |
229 | 230 | | "ACM",_ -> c |
230 | 231 | | "NUM",_ -> c |
231 | 232 | | "CASE",_ -> c |
... | ... | @@ -249,12 +250,15 @@ let create_normal_concept tokens lex_sems t cat proj = |
249 | 250 | if t.pos = "comp" then |
250 | 251 | make_relation t (SetContextName(c.c_sense,RemoveRelation("CORE","",c.c_relations))) else |
251 | 252 | if t.pos = "conj" then |
252 | - let c = {empty_context with cx_sense=t.meaning; cx_contents=t.args; cx_variable=c.c_variable; cx_pos=c.c_pos} in | |
253 | + let c = {empty_context with cx_sense=t.sense; cx_contents=t.args; cx_variable=c.c_variable; cx_pos=c.c_pos; cx_cat=c.c_cat; cx_def_label=c.c_def_label; cx_label=c.c_label} in | |
253 | 254 | let c = Xlist.fold t.attrs c (fun c -> function |
254 | 255 | | "NUM",_ -> c |
255 | 256 | | "CASE",_ -> c |
256 | 257 | | "GEND",_ -> c |
257 | 258 | | "PERS",_ -> c |
259 | + | "ASPECT",_ -> c | |
260 | + | "controller",_ -> c | |
261 | + | "controllee",_ -> c | |
258 | 262 | | e,t -> failwith ("create_normal_concept conj: " ^ e)) in |
259 | 263 | ManageCoordination({t with attrs=[]; args=Dot},Context c) else |
260 | 264 | (* if t.pos = "interj" then |
... | ... | @@ -264,16 +268,25 @@ let create_normal_concept tokens lex_sems t cat proj = |
264 | 268 | if t.pos = "sinterj" || t.pos = "interj" then |
265 | 269 | let c = Xlist.fold t.attrs c (fun c -> function |
266 | 270 | | e,t -> failwith ("create_normal_concept sinterj: " ^ e)) in |
267 | - make_relation t (Concept c) else | |
271 | + let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in | |
272 | + let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in | |
273 | + let cx = {empty_context with cx_contents=add_coerced coerced c; cx_variable=string_of_int id,""; cx_pos=c.c_pos; cx_cat=Val "Situation"} in | |
274 | + make_relation t (Context cx) else | |
268 | 275 | if t.lemma = "<root>" then t.args else |
269 | 276 | if t.lemma = "<merge>" then RemoveRelation("null","",t.args) else |
270 | - if t.pos = "interp" && t.lemma = "?" && t.args = Dot then SingleRelation(Val "int") else | |
277 | + (* if t.pos = "interp" && t.lemma = "?" && t.args = Dot then SingleRelation(Val "int") else *) | |
278 | + if t.pos = "interp" && t.lemma = "?" then | |
279 | + make_relation t (AddSingleRelation(Val "int",RemoveRelation("CORE","",t.args))) else (* FIXME1: to powinno tworzyć kontekst i zaznaczać ze jest interrogative *) | |
271 | 280 | if t.pos = "interp" && t.lemma = ":" then |
272 | 281 | make_relation t (RemoveRelation("CORE","",t.args)) else |
273 | 282 | if t.pos = "interp" && t.lemma = "</sentence>" then |
274 | 283 | let l = (*List.rev*) (make_args_list t.args) in |
275 | 284 | Xlist.fold (List.tl l) (RemoveRelation("null","",List.hd l)) (fun t s -> AddRelation(t,"Next","Clause",RemoveRelation("null","",s))) else |
276 | 285 | if t.pos = "interp" && t.lemma = "<sentence>" then t.args else |
286 | + if t.pos = "interp" && t.lemma = "</query>" then | |
287 | + let l = (*List.rev*) (make_args_list t.args) in | |
288 | + Xlist.fold (List.tl l) (List.hd l) (fun t s -> AddRelation(t,"Next","Sentence",s)) else | |
289 | + if t.pos = "interp" && t.lemma = "<query>" then t.args else | |
277 | 290 | (* if t.pos = "interp" && t.lemma = "”s" then |
278 | 291 | let l = List.rev (make_args_list t.args) in |
279 | 292 | let x = Xlist.fold (List.tl l) (List.hd l) (fun t s -> AddRelation(RemoveRelation t,"Next","Sentence",RemoveRelation s)) in |
... | ... | @@ -310,54 +323,55 @@ let rec translate_node tokens lex_sems t = |
310 | 323 | orth=t.ENIAM_LCGtypes.orth; lemma=t.ENIAM_LCGtypes.lemma; pos=t.ENIAM_LCGtypes.pos; weight=t.ENIAM_LCGtypes.weight; |
311 | 324 | id=t.ENIAM_LCGtypes.id; symbol=create_concepts tokens lex_sems t.ENIAM_LCGtypes.symbol; arg_symbol=create_concepts tokens lex_sems t.ENIAM_LCGtypes.arg_symbol; |
312 | 325 | arg_dir=t.ENIAM_LCGtypes.arg_dir; |
313 | - attrs=[]; label=""; def_label=""; | |
326 | + attrs=[]; label=""; def_label=""; snode=""; | |
314 | 327 | args=create_concepts tokens lex_sems t.ENIAM_LCGtypes.args; |
315 | - gf=""; role=""; role_attr=""; selprefs=Dot; meaning=Dot; arole=""; arole_attr=""; arev=false; sem_args=Dot} in | |
316 | - let t,attrs,cat,proj = Xlist.fold attrs (t,[],Dot,Dot) (fun (t,attrs,cat,proj) -> function | |
317 | - "gf",Val s -> {t with gf=s},attrs,cat,proj | |
318 | - | "role",Val s -> {t with role=s},attrs,cat,proj | |
319 | - | "role-attr",Val s -> {t with role_attr=s},attrs,cat,proj | |
320 | - | "selprefs",s -> {t with selprefs=s},attrs,cat,proj | |
321 | - | "meaning",s -> {t with meaning=s},attrs,cat,proj | |
322 | - | "hipero",_ -> t,attrs,cat,proj | |
323 | - | "arole",Val s -> {t with arole=s},attrs,cat,proj | |
324 | - | "arole-attr",Val s -> {t with arole_attr=s},attrs,cat,proj | |
325 | - | "arev",Val "-" -> {t with arev=false},attrs,cat,proj | |
326 | - | "arev",Val "+" -> {t with arev=true},attrs,cat,proj | |
327 | - | "agf",Val s -> t,attrs,cat,proj | |
328 | - | "sem-args",s -> {t with sem_args=s},attrs,cat,proj | |
329 | - | "rev-hipero",_ -> t,attrs,cat,proj | |
330 | - | "fopinion",_ -> t,attrs,cat,proj | |
331 | - | "sopinion",_ -> t,attrs,cat,proj | |
332 | - | "ACM",s -> t,("ACM",s) :: attrs,cat,proj | |
333 | - | "ASPECT",s -> t,("ASPECT",s) :: attrs,cat,proj | |
334 | - | "NEGATION",s -> t,("NEGATION",s) :: attrs,cat,proj | |
335 | - | "MOOD",s -> t,("MOOD",s) :: attrs,cat,proj | |
336 | - | "TENSE",s -> t,("TENSE",s) :: attrs,cat,proj | |
337 | - | "controller",s -> t,("controller",s) :: attrs,cat,proj | |
338 | - | "controllee",s -> t,("controllee",s) :: attrs,cat,proj | |
339 | - | "coref",s -> t,attrs,cat,proj | |
340 | - | "label",Val s -> {t with label=s},attrs,cat,proj | |
341 | - | "def-label",Val s -> {t with def_label=s},attrs,cat,proj | |
342 | - | "CAT",s -> t,attrs,s,proj | |
343 | - | "PROJ",s -> t,attrs,cat,s | |
344 | - | "NUM",s -> t,("NUM",s) :: attrs,cat,proj | |
345 | - | "CASE",s -> t,("CASE",s) :: attrs,cat,proj | |
346 | - | "GEND",s -> t,("GEND",s) :: attrs,cat,proj | |
347 | - | "PERS",s -> t,("PERS",s) :: attrs,cat,proj | |
348 | - | "NSYN",s -> t,("NSYN",s) :: attrs,cat,proj | |
349 | - | "NSEM",s -> t,("NSEM",s) :: attrs,cat,proj | |
350 | - | "MODE",s -> t,("MODE",s) :: attrs,cat,proj | |
351 | - | "GRAD",s -> t,("GRAD",s) :: attrs,cat,proj | |
352 | - | "PSEM",s -> t,("PSEM",s) :: attrs,cat,proj | |
353 | - (* | k,v -> printf "translate_node: %s %s\n%!" k (ENIAMsemStringOf.linear_term 0 v); t, (k,v) :: attrs,cat,proj) in *) | |
328 | + gf=""; role=""; role_attr=""; selprefs=Dot; sense=Dot; arole=""; arole_attr=""; arev=false; sem_args=Dot} in | |
329 | + let t,attrs,cat,coerced = Xlist.fold attrs (t,[],Dot,Dot) (fun (t,attrs,cat,coerced) -> function | |
330 | + "gf",Val s -> {t with gf=s},attrs,cat,coerced | |
331 | + | "role",Val s -> {t with role=s},attrs,cat,coerced | |
332 | + | "role-attr",Val s -> {t with role_attr=s},attrs,cat,coerced | |
333 | + | "selprefs",s -> {t with selprefs=s},attrs,cat,coerced | |
334 | + | "sense",s -> {t with sense=s},attrs,cat,coerced | |
335 | + | "hipero",_ -> t,attrs,cat,coerced | |
336 | + | "arole",Val s -> {t with arole=s},attrs,cat,coerced | |
337 | + | "arole-attr",Val s -> {t with arole_attr=s},attrs,cat,coerced | |
338 | + | "arev",Val "-" -> {t with arev=false},attrs,cat,coerced | |
339 | + | "arev",Val "+" -> {t with arev=true},attrs,cat,coerced | |
340 | + | "agf",Val s -> t,attrs,cat,coerced | |
341 | + | "sem-args",s -> {t with sem_args=s},attrs,cat,coerced | |
342 | + | "rev-hipero",_ -> t,attrs,cat,coerced | |
343 | + | "fopinion",_ -> t,attrs,cat,coerced | |
344 | + | "sopinion",_ -> t,attrs,cat,coerced | |
345 | + | "ACM",s -> t,("ACM",s) :: attrs,cat,coerced | |
346 | + | "ASPECT",s -> t,("ASPECT",s) :: attrs,cat,coerced | |
347 | + | "NEGATION",s -> t,("NEGATION",s) :: attrs,cat,coerced | |
348 | + | "MOOD",s -> t,("MOOD",s) :: attrs,cat,coerced | |
349 | + | "TENSE",s -> t,("TENSE",s) :: attrs,cat,coerced | |
350 | + | "CTYPE",s -> t,("CTYPE",s) :: attrs,cat,coerced | |
351 | + | "controller",s -> t,("controller",s) :: attrs,cat,coerced | |
352 | + | "controllee",s -> t,("controllee",s) :: attrs,cat,coerced | |
353 | + | "coref",s -> t,attrs,cat,coerced | |
354 | + | "label",Val s -> {t with label=s},attrs,cat,coerced | |
355 | + | "def-label",Val s -> {t with def_label=s},attrs,cat,coerced | |
356 | + | "CAT",s -> t,attrs,s,coerced | |
357 | + | "COERCED",s -> t,attrs,cat,s | |
358 | + | "NUM",s -> t,("NUM",s) :: attrs,cat,coerced | |
359 | + | "CASE",s -> t,("CASE",s) :: attrs,cat,coerced | |
360 | + | "GEND",s -> t,("GEND",s) :: attrs,cat,coerced | |
361 | + | "PERS",s -> t,("PERS",s) :: attrs,cat,coerced | |
362 | + | "NSYN",s -> t,("NSYN",s) :: attrs,cat,coerced | |
363 | + | "NSEM",s -> t,("NSEM",s) :: attrs,cat,coerced | |
364 | + | "MODE",s -> t,("MODE",s) :: attrs,cat,coerced | |
365 | + | "GRAD",s -> t,("GRAD",s) :: attrs,cat,coerced | |
366 | + | "PSEM",s -> t,("PSEM",s) :: attrs,cat,coerced | |
367 | + (* | k,v -> printf "translate_node: %s %s\n%!" k (ENIAMsemStringOf.linear_term 0 v); t, (k,v) :: attrs,cat,coerced) in *) | |
354 | 368 | | k,v -> failwith (sprintf "translate_node: %s %s\n%!" k (ENIAMsemStringOf.linear_term 0 v))) in |
355 | - {t with attrs=attrs},cat,proj | |
369 | + {t with attrs=attrs},cat,coerced | |
356 | 370 | |
357 | 371 | and create_concepts tokens lex_sems = function |
358 | 372 | ENIAM_LCGtypes.Node t -> |
359 | - let t,cat,proj = translate_node tokens lex_sems t in | |
360 | - create_normal_concept tokens lex_sems t cat proj | |
373 | + let t,cat,coerced = translate_node tokens lex_sems t in | |
374 | + create_normal_concept tokens lex_sems t cat coerced | |
361 | 375 | | ENIAM_LCGtypes.Tuple l -> Tuple(Xlist.map l (create_concepts tokens lex_sems)) |
362 | 376 | | ENIAM_LCGtypes.Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, create_concepts tokens lex_sems t)) |
363 | 377 | | ENIAM_LCGtypes.Dot -> Dot |
... | ... | @@ -382,6 +396,7 @@ let rec make_tree_rec references = function |
382 | 396 | | SingleRelation r -> SingleRelation r |
383 | 397 | (* | TripleRelation(r,a,s,t) -> TripleRelation(r,a,make_tree_rec references s,make_tree_rec references t) *) |
384 | 398 | | AddRelation(t,r,a,s) -> AddRelation(make_tree_rec references t,r,a,make_tree_rec references s) |
399 | + | AddSingleRelation(r,s) -> AddSingleRelation(r,make_tree_rec references s) | |
385 | 400 | | RemoveRelation(r,a,t) -> RemoveRelation(r,a,make_tree_rec references t) |
386 | 401 | | SetContextName(s,t) -> SetContextName(s,make_tree_rec references t) |
387 | 402 | | CreateContext(s,t) -> CreateContext(s,make_tree_rec references t) |
... | ... | @@ -408,13 +423,16 @@ let rec validate_translation r = function |
408 | 423 | | SingleRelation _ -> () |
409 | 424 | (* | TripleRelation(_,_,s,t) -> validate_translation r s; validate_translation r t *) |
410 | 425 | | AddRelation(t,_,_,s) -> validate_translation r t; validate_translation r s |
426 | + | AddSingleRelation(_,s) -> validate_translation r s | |
411 | 427 | | RemoveRelation(_,_,t) -> validate_translation r t |
412 | 428 | | SetContextName(s,t) -> validate_translation r t |
413 | 429 | | CreateContext(s,t) -> validate_translation r t |
414 | 430 | (* | MakeTripleRelation(_,_,t) -> validate_translation r t *) |
415 | 431 | | ManageCoordination(_,t) -> validate_translation r t |
416 | 432 | | Tuple l -> Xlist.iter l (validate_translation r) |
417 | - | Variant(e,l) -> Xlist.iter l (fun (i,t) -> validate_translation r t) | |
433 | + | Variant(e,l) -> | |
434 | + if e = "" then r := "validate_translation: empty variant label" :: !r; | |
435 | + Xlist.iter l (fun (i,t) -> validate_translation r t) | |
418 | 436 | | Dot -> () |
419 | 437 | | t -> failwith ("validate_translation: " ^ ENIAMsemStringOf.linear_term 0 t) |
420 | 438 | |
... | ... | @@ -500,6 +518,14 @@ let rec reduce_tree = function |
500 | 518 | (* | TripleRelation(r,a,s,t) -> TripleRelation(r,a,reduce_tree s,reduce_tree t) *) |
501 | 519 | (* | AddRelation(Concept c,r,a,s) -> reduce_tree (Concept{c with c_relations=Tuple[Relation(Val r,Val a,s);c.c_relations]}) |
502 | 520 | | AddRelation(Context c,r,a,s) -> reduce_tree (Context{c with cx_relations=Tuple[Relation(Val r,Val a,s);c.cx_relations]})*) |
521 | + | AddSingleRelation(r,t) -> | |
522 | + (match reduce_tree t with | |
523 | + Concept t -> Concept{t with c_relations=Tuple[t.c_relations;SingleRelation r]} | |
524 | + | Context({cx_sense=Val "czy"} as t) -> Context t | |
525 | + | Context({cx_sense=Val "jaki"} as t) -> Context t | |
526 | + | Context({cx_sense=Dot} as t) -> Context{t with cx_sense=Val "czy"} | |
527 | + | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, reduce_tree (AddSingleRelation(r,t)))) | |
528 | + | t -> AddSingleRelation(r,t)) | |
503 | 529 | | AddRelation(t,r,a,s) -> simplify_tree_add_relation r a (reduce_tree s) (reduce_tree t) |
504 | 530 | (* let t = reduce_tree t in |
505 | 531 | let s = reduce_tree s in |
... | ... | @@ -511,7 +537,7 @@ let rec reduce_tree = function |
511 | 537 | (match reduce_tree t with |
512 | 538 | Relation(r,a,t) -> |
513 | 539 | if (r = r0 && a = a0) || r0 = "" then t else |
514 | - Context{empty_context with cx_contents= | |
540 | + Context{empty_context with cx_cat=Val "Situation"; cx_contents= | |
515 | 541 | Concept{empty_concept with c_relations=Relation(r,a,t)}; (*cx_variable=string_of_int id,""; cx_pos=c.c_pos*)} |
516 | 542 | (* | TripleRelation(r,a,s,t) -> |
517 | 543 | Context{empty_context with cx_contents= |
... | ... | @@ -544,7 +570,8 @@ let rec reduce_tree = function |
544 | 570 | (match reduce_tree c with |
545 | 571 | Context c -> |
546 | 572 | let t,args = extract_aroles {t with arole=""} c.cx_contents in |
547 | - make_relation t (Context {c with cx_contents=args}) | |
573 | + (*make_relation t (Context {c with cx_contents=args})*) (* FIXME: to trzeba poprawić tak by działało w obu wersjach parserów *) | |
574 | + Relation(t.role,"",Context {c with cx_contents=args}) | |
548 | 575 | | Variant(e,l) -> reduce_tree (Variant(e,Xlist.map l (fun (i,c) -> i,ManageCoordination(t,c)))) |
549 | 576 | | c -> ManageCoordination(t,c)) |
550 | 577 | | Tuple l -> Tuple(List.rev (Xlist.rev_map l reduce_tree)) |
... | ... | @@ -561,7 +588,9 @@ let rec validate_reduction r = function |
561 | 588 | | SingleRelation _ -> () |
562 | 589 | (* | TripleRelation(_,_,s,t) -> validate_reduction r s; validate_reduction r t *) |
563 | 590 | | Tuple l -> Xlist.iter l (validate_reduction r) |
564 | - | Variant(e,l) -> Xlist.iter l (fun (i,t) -> validate_reduction r t) | |
591 | + | Variant(e,l) -> | |
592 | + if e = "" then r := "validate_reduction: empty variant label" :: !r; | |
593 | + Xlist.iter l (fun (i,t) -> validate_reduction r t) | |
565 | 594 | | Dot -> () |
566 | 595 | | t -> r := ("validate_reduction: " ^ ENIAMsemStringOf.linear_term 0 t) :: !r |
567 | 596 | |
... | ... | @@ -605,6 +634,38 @@ let rec remove_variant_labels map = function |
605 | 634 | | Val s -> Val s |
606 | 635 | | t -> failwith ("remove_variant_labels: " ^ ENIAMsemStringOf.linear_term 0 t) |
607 | 636 | |
637 | +let rec set_variant_labels map = function | |
638 | + Concept c -> Concept{c with | |
639 | + c_sense=set_variant_labels map c.c_sense; | |
640 | + c_name=set_variant_labels map c.c_name; | |
641 | + c_quant=set_variant_labels map c.c_quant; | |
642 | + c_cat=set_variant_labels map c.c_cat; | |
643 | + c_relations=set_variant_labels map c.c_relations} | |
644 | + | Context c -> Context{c with | |
645 | + cx_sense=set_variant_labels map c.cx_sense; | |
646 | + cx_contents=set_variant_labels map c.cx_contents; | |
647 | + cx_cat=set_variant_labels map c.cx_cat; | |
648 | + cx_relations=set_variant_labels map c.cx_relations} | |
649 | + | Relation(r,a,t) -> Relation(r,a,set_variant_labels map t) | |
650 | + | RevRelation(r,a,t) -> RevRelation(r,a,set_variant_labels map t) | |
651 | + | SingleRelation r -> SingleRelation r | |
652 | + | Tuple l -> Tuple(List.rev (Xlist.rev_map l (set_variant_labels map))) | |
653 | + | Variant(e,l) -> | |
654 | + let e = try StringMap.find map e with Not_found -> ENIAM_LCGreductions.get_variant_label () in | |
655 | + let l = Xlist.rev_map l (fun (i,t) -> i, set_variant_labels map t) in | |
656 | + Variant(e,List.rev l) | |
657 | + | Dot -> Dot | |
658 | + | Val s -> Val s | |
659 | + | t -> failwith ("set_variant_labels: " ^ ENIAMsemStringOf.linear_term 0 t) | |
660 | + | |
661 | +let manage_variant_labels t = | |
662 | + ENIAM_LCGreductions.reset_variant_label (); | |
663 | + let qmap = count_variant_labels StringQMap.empty t in | |
664 | + let map = StringQMap.fold qmap StringMap.empty (fun map k _ -> | |
665 | + if k = "" then map else | |
666 | + StringMap.add map k (ENIAM_LCGreductions.get_variant_label ())) in | |
667 | + set_variant_labels map t | |
668 | + | |
608 | 669 | let rec simplify_tree = function |
609 | 670 | Concept c -> Concept{c with |
610 | 671 | c_sense=simplify_tree c.c_sense; |
... | ... | @@ -625,6 +686,7 @@ let rec simplify_tree = function |
625 | 686 | let l = Xlist.fold l [] (fun l t -> |
626 | 687 | match simplify_tree t with |
627 | 688 | Dot -> l |
689 | + | Tuple l2 -> l2 @ l | |
628 | 690 | | t -> t :: l) in |
629 | 691 | (match l with |
630 | 692 | [] -> Dot |
... | ... | @@ -655,7 +717,8 @@ let rec simplify_tree = function |
655 | 717 | c_cat = simplify_tree (Variant(e,lt3))} |
656 | 718 | | Context c -> |
657 | 719 | let lt1,lt2,lt3 = Xlist.fold l ([],[],[]) (fun (lt1,lt2,lt3) -> function |
658 | - i,Context c2 -> if c.cx_sense = c2.cx_sense then (i,c2.cx_contents) :: lt1, (i,c2.cx_relations) :: lt2, (i,c2.cx_cat) :: lt3 else raise Not_found | |
720 | + i,Context c2 -> if c.cx_sense = c2.cx_sense && c.cx_label = c2.cx_label && | |
721 | + c.cx_def_label = c2.cx_def_label then (i,c2.cx_contents) :: lt1, (i,c2.cx_relations) :: lt2, (i,c2.cx_cat) :: lt3 else raise Not_found | |
659 | 722 | | _ -> raise Not_found) in |
660 | 723 | let e = if e = "" then ENIAM_LCGreductions.get_variant_label () else e in |
661 | 724 | Context{c with |
... | ... |
semantics/ENIAMsemGraphOf.ml
... | ... | @@ -32,7 +32,7 @@ let string_of_node t = |
32 | 32 | let l = [ |
33 | 33 | "ORTH",Val t.orth;"LEMMA",Val t.lemma;"POS",Val t.pos;"ID",Val (string_of_int t.id);"LABEL",Val t.label;"DEF-LABEL",Val t.def_label;"WEIGHT",Val (string_of_float t.weight); |
34 | 34 | "SYMBOL",t.symbol;"ARG_SYMBOL",t.arg_symbol;"ARG_DIR",Val t.arg_dir; |
35 | - "GF",Val t.gf;"ROLE",Val t.role;"ROLE_ATTR",Val t.role_attr;"SELPREFS",t.selprefs;"MEANING",t.meaning; | |
35 | + "GF",Val t.gf;"ROLE",Val t.role;"ROLE_ATTR",Val t.role_attr;"SELPREFS",t.selprefs;"SENSE",t.sense; | |
36 | 36 | "AROLE",Val t.arole;"AROLE_ATTR",Val t.role_attr;"AREV",Val (string_of_bool t.arev);"SEM_ARGS",t.sem_args] @ t.attrs in |
37 | 37 | "{ " ^ String.concat " | " (Xlist.map l (fun (e,t) -> "{ " ^ e ^ " | " ^ escape_string (ENIAMsemStringOf.linear_term 0 t) ^ " }")) ^ " }" |
38 | 38 | |
... | ... | @@ -225,7 +225,9 @@ let rec print_graph2_rec file edge_rev edge_label edge_style edge_head upper = f |
225 | 225 | | Context t -> |
226 | 226 | let id = !id_counter in |
227 | 227 | incr id_counter; |
228 | - fprintf file " subgraph cluster%d {\nlabel=\"%s%s\"\n" id | |
228 | + fprintf file " subgraph cluster%d {\nlabel=\"%s%s%s%s\"\n" id | |
229 | + (if t.cx_label="" then "" else "?" ^ t.cx_label ^ " ") | |
230 | + (if t.cx_def_label="" then "" else "*" ^ t.cx_def_label ^ " ") | |
229 | 231 | (if t.cx_cat=Dot then "" else escape_string (ENIAMsemStringOf.linear_term 0 t.cx_cat ^ " ")) |
230 | 232 | (if t.cx_sense = Dot then "" else escape_string (ENIAMsemStringOf.linear_term 0 t.cx_sense)); |
231 | 233 | let iid = print_graph2_rec file false "" "" "" 0 t.cx_contents in |
... | ... | @@ -268,6 +270,13 @@ let rec print_graph2_rec file edge_rev edge_label edge_style edge_head upper = f |
268 | 270 | let _ = print_graph2_rec file false "" "" "" id t in |
269 | 271 | let _ = print_graph2_rec file false "" "" "" id s in |
270 | 272 | id |
273 | + | AddSingleRelation(role,t) -> | |
274 | + let id = !id_counter in | |
275 | + incr id_counter; | |
276 | + fprintf file " %d [shape=circle,label=\"AddSingleRelation\\n%s\"]\n" id (ENIAMsemStringOf.linear_term 0 role); | |
277 | + print_edge2 file edge_rev edge_label edge_style edge_head "" upper id; | |
278 | + let _ = print_graph2_rec file false "" "" "" id t in | |
279 | + id | |
271 | 280 | | RemoveRelation(role,role_attr,t) -> |
272 | 281 | let id = !id_counter in |
273 | 282 | incr id_counter; |
... | ... |
semantics/ENIAMsemLatexOf.ml
... | ... | @@ -50,7 +50,7 @@ let rec linear_term c = function |
50 | 50 | "WEIGHT",Val (string_of_float t.weight);"SYMBOL",t.symbol; |
51 | 51 | "ARG_SYMBOL",t.arg_symbol;"ARG_DIR",Val t.arg_dir; |
52 | 52 | "GF",Val t.gf; "ROLE", Val t.role; "ROLE-ATTR", Val t.role_attr; |
53 | - "SELPREFS",t.selprefs; "MEANING",t.meaning; | |
53 | + "SELPREFS",t.selprefs; "SENSE",t.sense; | |
54 | 54 | "AROLE", Val t.role; "AROLE-ATTR", Val t.role_attr; "AREV", Val (if t.arev then "+" else "-"); |
55 | 55 | "SEM-ARGS",t.sem_args; "ARGS",t.args] @ t.attrs) (fun (e,t) -> |
56 | 56 | "\\text{" ^ (Xlatex.escape_string e) ^ "} & " ^ (linear_term 0 t)))) ^ "\\end{array}\\right]}" |
... | ... | @@ -66,7 +66,7 @@ let rec linear_term c = function |
66 | 66 | | Context c -> |
67 | 67 | "{\\left[\\begin{array}{ll}" ^ |
68 | 68 | (String.concat "\\\\ " (Xlist.map ([ |
69 | - "SENSE",c.cx_sense;"CAT",c.cx_cat; | |
69 | + "SENSE",c.cx_sense;"CAT",c.cx_cat;"LABEL",Val c.cx_label;"DEF-LABEL",Val c.cx_def_label; | |
70 | 70 | "VARIABLE",Val (fst c.cx_variable ^ "_" ^ snd c.cx_variable);"POS",Val (string_of_int c.cx_pos); |
71 | 71 | "RELATIONS",c.cx_relations;"CONTENTS",c.cx_contents]) (fun (e,t) -> |
72 | 72 | "\\text{" ^ (Xlatex.escape_string e) ^ "} & " ^ (linear_term 0 t)))) ^ "\\end{array}\\right]}" |
... | ... | @@ -75,6 +75,7 @@ let rec linear_term c = function |
75 | 75 | | SingleRelation r -> "{\\bf singlerelation}(" ^ linear_term 0 r ^ ")" |
76 | 76 | (* | TripleRelation(r,a,c,t) -> "{\\bf triplerelation}(" ^ (*linear_term 0*) r ^ "," ^ (*linear_term 0*) a ^ "," ^ linear_term 0 c ^ "," ^ linear_term 0 t ^ ")" *) |
77 | 77 | | AddRelation(t,r,a,s) -> "{\\bf addrelation}(" ^ linear_term 0 t ^ "," ^ r ^ "," ^ a ^ "," ^ linear_term 0 s ^ ")" |
78 | + | AddSingleRelation(r,s) -> "{\\bf addrelation}(" ^ linear_term 0 r ^ "," ^ linear_term 0 s ^ ")" | |
78 | 79 | | RemoveRelation(r,a,t) -> "{\\bf removerelation}(" ^ r ^ "," ^ a ^ "," ^ linear_term 0 t ^ ")" |
79 | 80 | | SetContextName(s,t) -> "{\\bf setcontextname}(" ^ linear_term 0 s ^ "," ^ linear_term 0 t ^ ")" |
80 | 81 | | CreateContext(s,t) -> "{\\bf createcontext}(" ^ linear_term 0 (Context s) ^ "," ^ linear_term 0 t ^ ")" |
... | ... |
semantics/ENIAMsemLexicon.ml
... | ... | @@ -45,7 +45,7 @@ let parse_multi p = function |
45 | 45 | | tokens -> tokens,p |
46 | 46 | |
47 | 47 | let parse_morf p = function |
48 | - [T "1"] -> {p with is_necessary=Opt} | |
48 | + [T "1"] -> if p.is_necessary=Multi then p else {p with is_necessary=Opt} | |
49 | 49 | | tokens -> |
50 | 50 | let l = Xlist.map (try Lexer.split_symbol (T "*") [] tokens with _ -> failwith "parse_morf: split_symbol *") (function |
51 | 51 | [T s] -> Atom s |
... | ... | @@ -75,6 +75,7 @@ let parse_role p = function |
75 | 75 | | "Has" -> {p with role="Has"; sel_prefs=[SynsetName "ALL"]} |
76 | 76 | | "PHas" -> {p with role="PHas"; sel_prefs=[SynsetName "ALL"]} |
77 | 77 | | "PApoz" -> {p with role="PApoz"; sel_prefs=[SynsetName "ALL"]} |
78 | + | "Merge" -> {p with role="Merge"; sel_prefs=[SynsetName "ALL"]} | |
78 | 79 | | s -> failwith ("parse_role: " ^ s) |
79 | 80 | |
80 | 81 | let parse_entry = function |
... | ... |
semantics/ENIAMsemStringOf.ml
... | ... | @@ -43,7 +43,7 @@ let rec linear_term c = function |
43 | 43 | | Context c -> |
44 | 44 | "[" ^ |
45 | 45 | (String.concat "; " (Xlist.map ([ |
46 | - "SENSE",c.cx_sense;"CAT",c.cx_cat; | |
46 | + "SENSE",c.cx_sense;"CAT",c.cx_cat;"LABEL",Val c.cx_label;"DEF-LABEL",Val c.cx_def_label; | |
47 | 47 | "VARIABLE",Val (fst c.cx_variable ^ "_" ^ snd c.cx_variable);"POS",Val (string_of_int c.cx_pos); |
48 | 48 | "RELATIONS",c.cx_relations;"CONTENTS",c.cx_contents]) (fun (e,t) -> |
49 | 49 | e ^ ": " ^ (linear_term 0 t)))) ^ "]" |
... | ... | @@ -52,6 +52,7 @@ let rec linear_term c = function |
52 | 52 | | SingleRelation r -> "singlerelation(" ^ linear_term 0 r ^ ")" |
53 | 53 | (* | TripleRelation(r,a,c,t) -> "triplerelation(" ^ r ^ "," ^ a ^ "," ^ linear_term 0 c ^ "," ^ linear_term 0 t ^ ")" *) |
54 | 54 | | AddRelation(t,r,a,s) -> "addrelation(" ^ linear_term 0 t ^ "," ^ r ^ "," ^ a ^ "," ^ linear_term 0 s ^ ")" |
55 | + | AddSingleRelation(r,s) -> "addsinglerelation(" ^ linear_term 0 r ^ "," ^ linear_term 0 s ^ ")" | |
55 | 56 | | RemoveRelation(r,a,t) -> "removerelation(" ^ r ^ "," ^ a ^ "," ^ linear_term 0 t ^ ")" |
56 | 57 | | SetContextName(s,t) -> "setcontextname(" ^ linear_term 0 s ^ "," ^ linear_term 0 t ^ ")" |
57 | 58 | | CreateContext(s,t) -> "createcontext(" ^ linear_term 0 (Context s) ^ "," ^ linear_term 0 t ^ ")" |
... | ... |
semantics/ENIAMsemTypes.ml
... | ... | @@ -30,9 +30,9 @@ type node = { |
30 | 30 | amorf: ENIAMwalTypes.morf; |
31 | 31 | arole: string; |
32 | 32 | arole_attr: string; |
33 | - meaning: string; | |
33 | + sense: string; | |
34 | 34 | hipero: StringSet.t; |
35 | - meaning_weight: float; | |
35 | + sense_weight: float; | |
36 | 36 | position: ENIAMwalTypes.schema_field;*) |
37 | 37 | attrs: (string * linear_term) list; |
38 | 38 | args: linear_term; |
... | ... | @@ -40,13 +40,14 @@ type node = { |
40 | 40 | role: string; |
41 | 41 | role_attr: string; |
42 | 42 | selprefs: linear_term; |
43 | - meaning: linear_term; | |
43 | + sense: linear_term; | |
44 | 44 | arole: string; |
45 | 45 | arole_attr: string; |
46 | 46 | arev: bool; |
47 | 47 | sem_args: linear_term; |
48 | 48 | label: string; |
49 | 49 | def_label: string; |
50 | + snode: string; | |
50 | 51 | } |
51 | 52 | |
52 | 53 | and concept = |
... | ... | @@ -57,7 +58,7 @@ and concept = |
57 | 58 | c_pos: int; c_cat: linear_term} |
58 | 59 | |
59 | 60 | and context = |
60 | - {cx_sense: linear_term; cx_contents: linear_term; | |
61 | + {cx_sense: linear_term; cx_contents: linear_term; cx_label: string; cx_def_label: string; | |
61 | 62 | cx_relations: linear_term; cx_variable: (string * string); cx_pos: int; cx_cat: linear_term} |
62 | 63 | |
63 | 64 | and linear_term = |
... | ... | @@ -72,9 +73,10 @@ and linear_term = |
72 | 73 | | RevRelation of string * string * linear_term (* role * role_attr * concept *) |
73 | 74 | | SingleRelation of linear_term |
74 | 75 | (* | TripleRelation of string * string * linear_term * linear_term (* role * role_attr * concept *) *) |
75 | - | AddRelation of linear_term * string * string * linear_term (* nadrządnik * role * role_attr * podrzędnik *) | |
76 | + | AddRelation of linear_term * string * string * linear_term (* nadrzędnik * role * role_attr * podrzędnik *) | |
77 | + | AddSingleRelation of linear_term * linear_term (* role * podrzędnik *) | |
76 | 78 | | RemoveRelation of string * string * linear_term |
77 | - | SetContextName of linear_term * linear_term (* meaning * concept *) | |
79 | + | SetContextName of linear_term * linear_term (* sense * concept *) | |
78 | 80 | | CreateContext of context * linear_term (* context * args *) |
79 | 81 | (* | MakeTripleRelation of string * string * linear_term (* role * role_attr * concept *) *) |
80 | 82 | | ManageCoordination of node * linear_term |
... | ... |
semantics/ENIAMsemValence.ml
... | ... | @@ -22,7 +22,7 @@ open ENIAM_LCGlexiconTypes |
22 | 22 | open ENIAMlexSemanticsTypes |
23 | 23 | open Xstd |
24 | 24 | |
25 | -type pos = {role: linear_term; role_attr: linear_term; selprefs: linear_term; gf: ENIAMwalTypes.gf; | |
25 | +type pos = {role: linear_term; role_attr: linear_term; selprefs: linear_term; catprefs: string list; gf: ENIAMwalTypes.gf; | |
26 | 26 | cr: string list; ce: string list; |
27 | 27 | is_necessary: bool; is_pro: bool; is_prong: bool; is_multi: bool; dir: string; morfs: StringSet.t} |
28 | 28 | |
... | ... | @@ -148,6 +148,8 @@ let rec apply_selector v2 chosen_map = function |
148 | 148 | | Nsem,("NSEM",v) :: l -> match_value v2 chosen_map v |
149 | 149 | | Case,("CASE",v) :: l -> match_value v2 chosen_map v |
150 | 150 | | Mode,("MODE",v) :: l -> match_value v2 chosen_map v |
151 | + | Acm,("ACM",v) :: l -> match_value v2 chosen_map v | |
152 | + | Cat,("CAT",v) :: l -> match_value v2 chosen_map v | |
151 | 153 | | sel,(attr,v) :: l -> (*print_endline ("apply_selector: " ^ ENIAMcategoriesPL.string_of_selector sel ^ " " ^ attr);*) apply_selector v2 chosen_map (sel,l) |
152 | 154 | |
153 | 155 | let rec apply_neg_selector vals chosen_map = function |
... | ... | @@ -218,7 +220,22 @@ let string_of_arg arg = |
218 | 220 | String.concat ", " (Xlist.map arg (fun ((arg_symbol,dir),t) -> (string_of_argdir dir) ^ arg_symbol ^ ":" ^ ENIAM_LCGstringOf.linear_term 0 t)) |
219 | 221 | |
220 | 222 | let string_of_position p = |
221 | - (string_of_argdir p.dir) ^ String.concat "+" (StringSet.to_list p.morfs) | |
223 | + (string_of_argdir p.dir) ^ | |
224 | + (if p.is_multi then "?" else "") ^ | |
225 | + String.concat "+" (StringSet.to_list p.morfs) | |
226 | + | |
227 | +let manage_arg p t = | |
228 | + let t = SetAttr("gf",Val (ENIAMwalStringOf.gf p.gf),t) in | |
229 | + let t = | |
230 | + if p.gf = ENIAMwalTypes.SUBJ || p.gf = ENIAMwalTypes.OBJ || p.gf = ENIAMwalTypes.ARG then | |
231 | + SetAttr("role",p.role,SetAttr("role-attr",p.role_attr,SetAttr("selprefs",p.selprefs,t))) | |
232 | + else if p.gf = ENIAMwalTypes.CORE then SetAttr("selprefs",p.selprefs,t) | |
233 | + else if p.gf = ENIAMwalTypes.ADJUNCT || p.gf = ENIAMwalTypes.NOSEM || p.gf = ENIAMwalTypes.CORE then t | |
234 | + else failwith "manage_arg: ni 2" in | |
235 | + let t = Xlist.fold p.cr t (fun t cr -> SetAttr("controller",Val cr,t)) in | |
236 | + let t = Xlist.fold p.ce t (fun t ce -> SetAttr("controllee",Val ce,t)) in | |
237 | + let t = if p.gf = ENIAMwalTypes.NOSEM then Dot else t in | |
238 | + t | |
222 | 239 | |
223 | 240 | let rec match_arg_positions lemma arg rev = function |
224 | 241 | p :: positions -> |
... | ... | @@ -228,16 +245,13 @@ let rec match_arg_positions lemma arg rev = function |
228 | 245 | (match l with |
229 | 246 | [] -> (*print_endline "match_arg_positions: not matched";*) match_arg_positions lemma arg (p :: rev) positions |
230 | 247 | | [t] -> |
231 | - let t = SetAttr("gf",Val (ENIAMwalStringOf.gf p.gf),t) in | |
232 | - let t = | |
233 | - if p.gf = ENIAMwalTypes.SUBJ || p.gf = ENIAMwalTypes.OBJ || p.gf = ENIAMwalTypes.ARG then | |
234 | - SetAttr("role",p.role,SetAttr("role-attr",p.role_attr,SetAttr("selprefs",p.selprefs,t))) | |
235 | - else if p.gf = ENIAMwalTypes.CORE then SetAttr("selprefs",p.selprefs,t) | |
236 | - else if p.gf = ENIAMwalTypes.ADJUNCT || p.gf = ENIAMwalTypes.NOSEM || p.gf = ENIAMwalTypes.CORE then t | |
237 | - else failwith "match_arg_positions: ni 2" in | |
238 | - let t = Xlist.fold p.cr t (fun t cr -> SetAttr("controller",Val cr,t)) in | |
239 | - let t = Xlist.fold p.ce t (fun t ce -> SetAttr("controllee",Val ce,t)) in | |
240 | - let t = if p.gf = ENIAMwalTypes.NOSEM then Dot else t in | |
248 | + let t = manage_arg p t in | |
249 | + if p.is_multi then (t, rev @ (p :: positions)) :: (match_arg_positions lemma arg (p :: rev) positions) | |
250 | + else (t, rev @ positions) :: (match_arg_positions lemma arg (p :: rev) positions) | |
251 | + | [t1;t2] -> (* FIXME: przydałoby się to uogólnić na listę dowolnej długości *) | |
252 | + let t1 = manage_arg p t1 in | |
253 | + let t2 = manage_arg p t2 in | |
254 | + let t = Variant("",["1",t1;"2",t2]) in | |
241 | 255 | if p.is_multi then (t, rev @ (p :: positions)) :: (match_arg_positions lemma arg (p :: rev) positions) |
242 | 256 | else (t, rev @ positions) :: (match_arg_positions lemma arg (p :: rev) positions) |
243 | 257 | | _ -> failwith ("match_arg_positions: lemma=" ^ lemma ^ " arg=" ^ string_of_arg arg ^ " position=" ^ string_of_position p)) |
... | ... | @@ -257,16 +271,17 @@ let rec match_args_positions_rec lemma prong_attrs positions = function |
257 | 271 | [Xlist.fold positions [] (fun found p -> |
258 | 272 | if not p.is_pro then found else |
259 | 273 | let attrs = if p.is_prong then prong_attrs else [] in |
260 | - let cats = p.selprefs(*ENIAM_LCGrules.make_variant (ENIAMwalRenderer.extract_sel_prefs p.sel_prefs)*) in | |
274 | + let cats = p.catprefs(*ENIAM_LCGrules.make_variant (ENIAMwalRenderer.extract_sel_prefs p.sel_prefs)*) in | |
261 | 275 | let lemma = get_pro_lemma attrs in |
262 | - let attrs = ["CAT",cats;"PROJ",cats] @ attrs in | |
263 | 276 | let sem_args = try StringMap.find ENIAMlexSemanticsData.pron_sem_args lemma with Not_found -> failwith "match_args_positions_rec" in |
264 | - let attrs = ["meaning",Val lemma;"hipero",Tuple[Val "ALL"; Val "0"];"role",p.role; | |
277 | + let attrs = ["sense",Val lemma;"hipero",Tuple[Val "ALL"; Val "0"];"role",p.role; | |
265 | 278 | "role-attr",p.role_attr; "selprefs",p.selprefs; "gf",Val (ENIAMwalStringOf.gf p.gf); |
266 | 279 | "agf",Val ""; "sem-args",make_sem_args sem_args; "rev-hipero",Val "+"] @ attrs in |
267 | 280 | let attrs = Xlist.fold p.cr attrs (fun attrs cr -> ("controller",Val cr) :: attrs) in |
268 | 281 | let attrs = Xlist.fold p.ce attrs (fun attrs ce -> ("controllee",Val ce) :: attrs) in |
269 | - Node{ENIAM_LCGrenderer.empty_node with lemma=lemma; pos="pro"; attrs=attrs} :: found)] | |
282 | + Xlist.fold cats found (fun found cat -> | |
283 | + let attrs = ["CAT",Val cat;"COERCED",Val cat] @ attrs in | |
284 | + Node{ENIAM_LCGrenderer.empty_node with lemma=lemma; pos="pro"; attrs=attrs} :: found))] | |
270 | 285 | |
271 | 286 | (* FIXME: opcjonalność podrzędników argumentów zleksykalizowanych *) |
272 | 287 | |
... | ... | @@ -314,6 +329,7 @@ let translate_position id p = |
314 | 329 | [] -> Dot |
315 | 330 | | [s] -> Val s |
316 | 331 | | l -> Tuple(Xlist.rev_map l (fun s -> Val s))); |
332 | + catprefs = p.ENIAMwalTypes.cat_prefs; | |
317 | 333 | gf=p.ENIAMwalTypes.gf; |
318 | 334 | cr=Xlist.map p.ENIAMwalTypes.cr (fun cr -> id ^ "-" ^ cr); |
319 | 335 | ce=Xlist.map p.ENIAMwalTypes.ce (fun ce -> id ^ "-" ^ ce); |
... | ... | @@ -369,17 +385,19 @@ let rec assign_frames_rec tokens lex_sems tree arg_symbols visited = function |
369 | 385 | let frame = ENIAMsemLexicon.extend_frame phsymbol frame in |
370 | 386 | (* print_endline "passed"; *) |
371 | 387 | (attrs,symbol,frame,Xlist.rev_map frame.positions (translate_position (string_of_int t.id))) :: frames |
372 | - with Not_found -> (*print_endline "rejected";*) frames) in | |
373 | - if frames = [] then failwith "assign_frames_rec: no frame" else | |
388 | + with Not_found -> | |
389 | + (* print_endline "rejected"; *) | |
390 | + frames) in | |
391 | + if frames = [] then failwith ("assign_frames_rec: no frame phsymbol='" ^ phsymbol ^ "' node='" ^ t.lemma ^ "'") else | |
374 | 392 | let prong_attrs = get_prong_attrs t.attrs in |
375 | 393 | let e = ENIAM_LCGreductions.get_variant_label () in |
376 | 394 | let l,_ = Xlist.fold frames ([],1) (fun (l,n) (attrs,symbol,frame,positions) -> |
377 | 395 | (* Printf.printf "assign_frames_rec 3: lemma=%s args=[%s] positions=[%s]\n%!" t.lemma (String.concat "; " (Xlist.map args string_of_arg)) (String.concat "; " (Xlist.map positions string_of_position)); *) |
378 | - if frame.meanings = [] then failwith ("assign_frames_rec: no meanings '" ^ t.lemma ^ "'") else | |
396 | + if frame.senses = [] then failwith ("assign_frames_rec: no senses '" ^ t.lemma ^ "'") else | |
379 | 397 | Xlist.fold (match_args_positions t.lemma prong_attrs args positions) (l,n) (fun (l,n) args -> |
380 | - Xlist.fold frame.meanings (l,n) (fun (l,n) (meaning,hipero,weight) -> | |
398 | + Xlist.fold frame.senses (l,n) (fun (l,n) (sense,hipero,weight) -> | |
381 | 399 | (string_of_int n, Node{t with attrs= |
382 | - ("meaning",Val meaning) :: | |
400 | + ("sense",Val sense) :: | |
383 | 401 | ("hipero",ENIAM_LCGrules.make_variant (Xlist.map hipero (fun (h,n) -> Tuple[Val h;Val(string_of_int n)]))) :: |
384 | 402 | ("arole",Val frame.arole) :: |
385 | 403 | ("arole-attr",Val frame.arole_attr) :: |
... | ... | @@ -390,7 +408,9 @@ let rec assign_frames_rec tokens lex_sems tree arg_symbols visited = function |
390 | 408 | ("fopinion",Val (ENIAMwalStringOf.opinion frame.fopinion)) :: |
391 | 409 | ("sopinion",Val (ENIAMwalStringOf.opinion frame.sopinion)) :: attrs; args=args; symbol=symbol}) :: |
392 | 410 | l,n+1))) in |
393 | - if l = [] then ((*print_endline ("assign_frames_rec 4: no frame assingment found for " ^ t.lemma ^ " " ^ ENIAM_LCGstringOf.linear_term 0 t.symbol);*)raise (NoFrame(t.lemma,ENIAM_LCGstringOf.linear_term 0 t.symbol,visited))) else | |
411 | + if l = [] then ( | |
412 | + (* print_endline ("assign_frames_rec 4: no frame assingment found for " ^ t.lemma ^ " " ^ ENIAM_LCGstringOf.linear_term 0 t.symbol); *) | |
413 | + raise (NoFrame(t.lemma,ENIAM_LCGstringOf.linear_term 0 t.symbol,visited))) else | |
394 | 414 | Variant(e,l),visited |
395 | 415 | | Variant(e,l) -> |
396 | 416 | let a = ref "" in |
... | ... |
semantics/ENIAMsemXMLof.ml
... | ... | @@ -36,7 +36,7 @@ let rec linear_term = function |
36 | 36 | Xml.Element("attrs",[],Xlist.map t.attrs (fun (k,v) -> Xml.Element("attr",["name",k],[linear_term v]))); |
37 | 37 | Xml.Element("args",[],[linear_term t.args]); |
38 | 38 | Xml.Element("selprefs",[],[linear_term t.selprefs]); |
39 | - Xml.Element("meaning",[],[linear_term t.meaning]); | |
39 | + Xml.Element("sense",[],[linear_term t.sense]); | |
40 | 40 | Xml.Element("sem_args",[],[linear_term t.sem_args])]) |
41 | 41 | | Ref i -> Xml.Element("Ref",["index",string_of_int i],[]) |
42 | 42 | | Concept c -> |
... | ... | @@ -49,8 +49,8 @@ let rec linear_term = function |
49 | 49 | Xml.Element("relations",[],[linear_term c.c_relations]); |
50 | 50 | Xml.Element("cat",[],[linear_term c.c_cat])]) |
51 | 51 | | Context c -> |
52 | - Xml.Element("Context", | |
53 | - ["variable",fst c.cx_variable ^ "_" ^ snd c.cx_variable;"pos",string_of_int c.cx_pos], | |
52 | + Xml.Element("Context",["label",c.cx_label;"def_label",c.cx_def_label; | |
53 | + "variable",fst c.cx_variable ^ "_" ^ snd c.cx_variable;"pos",string_of_int c.cx_pos], | |
54 | 54 | [Xml.Element("sense",[],[linear_term c.cx_sense]); |
55 | 55 | Xml.Element("contents",[],[linear_term c.cx_contents]); |
56 | 56 | Xml.Element("relations",[],[linear_term c.cx_relations]); |
... | ... | @@ -61,6 +61,9 @@ let rec linear_term = function |
61 | 61 | | AddRelation(t,r,a,s) -> |
62 | 62 | Xml.Element("AddRelation",["role",r;"role_attribute",a], |
63 | 63 | [Xml.Element("",[],[linear_term t]);Xml.Element("",[],[linear_term s])]) |
64 | + | AddSingleRelation(r,s) -> | |
65 | + Xml.Element("AddSingleRelation",[], | |
66 | + [Xml.Element("",[],[linear_term r]);Xml.Element("",[],[linear_term s])]) | |
64 | 67 | | RemoveRelation(r,a,t) -> Xml.Element("RemoveRelation",["role",r;"role_attribute",a],[linear_term t]) |
65 | 68 | | SetContextName(s,t) -> |
66 | 69 | Xml.Element("SetContextName",[],[linear_term s;linear_term t]) |
... | ... |
semsources/dzieła/HT/spójniki_wspol.xlsx
0 → 100644
No preview for this file type
semsources/dzieła/HT/synsety i antonimy (1).xlsx
0 → 100644
No preview for this file type
semsources/dzieła/HT/synsety i antonimy .xlsx
0 → 100644
No preview for this file type
semsources/dzieła/HT/synsety.zip
0 → 100644
No preview for this file type
semsources/dzieła/HT/synsety_opis.pdf
0 → 100644
No preview for this file type
semsources/dzieła/HT/synsety_spojniki_podrz_wszystkie.xlsx
0 → 100644
No preview for this file type
semsources/dzieła/JP/Anotacja jednostek leksykalnych - ENIAM.xlsx
0 → 100644
No preview for this file type
semsources/dzieła/JP/Anotacja semantyczna kublików.xlsx
0 → 100644
No preview for this file type
semsources/dzieła/JP/Opis do dzieła- instrumentalny.docx
0 → 100644
No preview for this file type
semsources/dzieła/JP/raport z anotacji semantycznej kublików.docx
0 → 100644
No preview for this file type