Commit 233ef97530daf01385c95d087d36604b453d2063

Authored by Daniel Oklesiński
2 parents 078b7deb 4afde66a

merge

Showing 79 changed files with 3569 additions and 707 deletions

Too many changes to show.

To preserve performance only 54 of 79 files are displayed.

LCGlexicon/ENIAM_LCGlexicon.ml
... ... @@ -165,7 +165,7 @@ let make_rules x_flag filename =
165 165 dict_of_grammar lexicon
166 166  
167 167 let find_rules rules cats =
168   - let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith ("find_rules: unable to find rules for category " ^ cats.pos) in
  168 + let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith ("find_rules: unable to find rules for category '" ^ cats.pos ^ "' lemma='" ^ cats.lemma ^ "'") in
169 169 (* Printf.printf "find_rules: %s %s |rules|=%d\n" cats.lemma cats.pos (Xlist.size rules); *)
170 170 let rules = try StringMap.find lex_rules cats.lemma @ rules with Not_found -> rules in
171 171 Xlist.fold rules [] (fun rules (selectors,syntax,semantics) ->
... ... @@ -190,9 +190,12 @@ let assign_valence valence rules =
190 190 if ENIAM_LCGrenderer.count_avar "schema" syntax > 0 then
191 191 Xlist.fold valence l (fun l (selectors,schema) ->
192 192 try
  193 + (* Printf.printf "selectors: %s\n" (string_of_selectors selectors); *)
  194 + (* Printf.printf "cats: %s\n%!" (string_of_cats cats); *)
193 195 let cats = apply_selectors cats selectors in
  196 + (* print_endline "passed"; *)
194 197 (cats,(bracket,quant,ENIAM_LCGrenderer.substitute_schema "schema" schema syntax),semantics) :: l
195   - with Not_found -> l)
  198 + with Not_found -> ((*print_endline "rejected";*) l))
196 199 else (cats,(bracket,quant,syntax),semantics) :: l)
197 200  
198 201 type labels = {
... ... @@ -231,10 +234,13 @@ let make_quantification e rules =
231 234 let make_node id orth lemma pos syntax weight cat_list is_raised =
232 235 let attrs = Xlist.fold cat_list [] (fun attrs -> function
233 236 | Lemma -> attrs
  237 + | IncludeLemmata -> attrs
234 238 | Pos -> attrs
235 239 | Pos2 -> attrs
236 240 | Cat -> ("CAT",SubstVar "cat") :: attrs
237   - | Proj -> ("PROJ",SubstVar "proj") :: attrs
  241 + | Coerced -> ("COERCED",SubstVar "coerced") :: attrs
  242 + | Role -> ("ROLE",SubstVar "role") :: attrs
  243 + | SNode -> ("NODE",SubstVar "node") :: attrs
238 244 | Number -> ("NUM",SubstVar "number") :: attrs
239 245 | Case -> ("CASE",SubstVar "case") :: attrs
240 246 | Gender -> ("GEND",SubstVar "gender") :: attrs
... ... @@ -251,16 +257,24 @@ let make_node id orth lemma pos syntax weight cat_list is_raised =
251 257 | Ctype -> ("CTYPE", SubstVar "ctype") :: attrs
252 258 | Mode -> ("MODE", SubstVar "mode") :: attrs
253 259 | Psem -> ("PSEM", SubstVar "psem") :: attrs
  260 + | Icat -> attrs
254 261 | Inumber -> attrs
255 262 | Igender -> attrs
256 263 | Iperson -> attrs
257 264 | Nperson -> attrs
  265 + | Ncat -> attrs
258 266 | Plemma -> attrs
259 267 | Unumber -> attrs
260 268 | Ucase -> attrs
261 269 | Ugender -> attrs
262 270 | Uperson -> attrs
263   - | Amode -> attrs) in
  271 + | Amode -> attrs
  272 + | Irole -> attrs
  273 + | Prole -> attrs
  274 + | Nrole -> attrs
  275 + | Inode -> attrs
  276 + | Pnode -> attrs
  277 + | Nnode -> attrs) in
264 278 (* | s -> (string_of_selector s, Dot) :: attrs) in *)
265 279 (* | "lex" -> ("LEX",Val "+") :: attrs *)
266 280 (* | s -> failwith ("make_node: " ^ (string_of_selector s))) in *)
... ... @@ -312,6 +326,7 @@ let create_entries rules id orth cats valence lex_entries =
312 326 (* variable_name_ref := []; *)
313 327 if cats.pos="interp" && cats.lemma="<clause>" then (BracketSet(Forward),Dot) :: l else
314 328 if cats.pos="interp" && cats.lemma="</clause>" then (BracketSet(Backward),Dot) :: l else
  329 + if (cats.pos2="noun" || cats.pos2="verb" || cats.pos2="adj" || cats.pos2="adv") && cats.cat="X" && not !default_category_flag && cats.pos <> "aglt" then l else
315 330 let e = get_labels () in
316 331 (* print_endline "create_entries 1"; *)
317 332 let rules = find_rules rules cats in
... ...
LCGlexicon/ENIAM_LCGlexiconParser.ml
... ... @@ -95,6 +95,7 @@ let match_relation = function
95 95  
96 96 let rec split_mid i0 rev = function
97 97 [i,s] -> List.rev ((i,s) :: rev)
  98 + | (i1,s) :: (i2,"|") :: (i3,"|") :: l -> raise (ParseError("split_mid", "duplicated delimeter found", i2))
98 99 | (i1,s) :: (i2,"|") :: l -> split_mid i2 ((i1,s) :: rev) l
99 100 | [] -> raise (ParseError("split_mid", "empty", i0))
100 101 | (i,s) :: l -> raise (ParseError("split_mid", "delimiter not found: " ^ String.concat " " (s :: Xlist.map l snd), i))
... ...
LCGlexicon/ENIAM_LCGlexiconTypes.ml
... ... @@ -17,7 +17,7 @@
17 17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 18 *)
19 19  
20   -type categories = {lemma: string; pos: string; pos2: string; cat: string; proj: string list;
  20 +type categories = {lemma: string; pos: string; pos2: string; cat: string; coerced: string list; roles: string list; snode: string list;
21 21 numbers: string list; cases: string list; genders: string list; persons: string list;
22 22 grads: string list; praeps: string list; acms: string list;
23 23 aspects: string list; negations: string list; moods: string list; tenses: string list;
... ... @@ -25,10 +25,12 @@ type categories = {lemma: string; pos: string; pos2: string; cat: string; proj:
25 25 }
26 26  
27 27 type selector =
28   - Lemma | (*NewLemma |*) Pos | Pos2 | Cat | Proj | Number | Case | Gender | Person | Grad | Praep |
  28 + Lemma | IncludeLemmata | (*NewLemma |*) Pos | Pos2 | Cat | Coerced | Role | SNode |
  29 + Number | Case | Gender | Person | Grad | Praep |
29 30 Acm | Aspect | Negation | Mood | Tense | Nsyn | Nsem | Ctype | Mode | Psem |
30   - Inumber | Igender | Iperson | Nperson | Plemma |
31   - Unumber | Ucase | Ugender | Uperson | Amode
  31 + Icat | Inumber | Igender | Iperson | Nperson | Ncat | Plemma |
  32 + Unumber | Ucase | Ugender | Uperson | Amode |
  33 + Irole | Prole | Nrole | Inode | Pnode | Nnode
32 34  
33 35 module OrderedSelector = struct
34 36 type t = selector
... ... @@ -73,12 +75,14 @@ type selector_relation = Eq | Neq (*| StrictEq*)
73 75  
74 76 *)
75 77  
76   -let empty_cats = {lemma=""; pos=""; pos2=""; cat="X"; proj=[];
  78 +let empty_cats = {lemma=""; pos=""; pos2=""; cat="X"; coerced=[]; roles=[]; snode=[];
77 79 numbers=[]; cases=[]; genders=[]; persons=[];
78 80 grads=[]; praeps=[]; acms=[]; aspects=[]; negations=[]; moods=[]; tenses=[];
79 81 nsyn=[]; nsem=[]; modes=[]; psem=[];
80 82 }
81 83  
  84 +let default_category_flag = ref true
  85 +
82 86 let resource_path =
83 87 try Sys.getenv "ENIAM_RESOURCE_PATH"
84 88 with Not_found ->
... ... @@ -94,7 +98,7 @@ let data_path =
94 98 let rules_filename = resource_path ^ "/LCGlexicon/lexicon-pl.dic"
95 99 let user_lexicon_filename = data_path ^ "/lexicon.dic"
96 100 let user_cats_filename = data_path ^ "/senses.tab"
97   -let user_proj_filename = data_path ^ "/projections.tab"
  101 +let user_coerced_filename = data_path ^ "/coercions.tab"
98 102  
99 103 let subst_uncountable_lexemes_filename = resource_path ^ "/LCGlexicon/subst_uncountable.dat"
100 104 let subst_uncountable_lexemes_filename2 = resource_path ^ "/LCGlexicon/subst_uncountable_stare.dat"
... ...
LCGlexicon/ENIAM_LCGlexiconTypes_old.ml 0 → 100644
  1 +(*
  2 + * ENIAM_LCGlexicon is a library that provides LCG lexicon form Polish
  3 + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  5 + *
  6 + * This library is free software: you can redistribute it and/or modify
  7 + * it under the terms of the GNU Lesser General Public License as published by
  8 + * the Free Software Foundation, either version 3 of the License, or
  9 + * (at your option) any later version.
  10 + *
  11 + * This library is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 + * GNU Lesser General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU Lesser General Public License
  17 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18 + *)
  19 +
  20 +type categories = {lemma: string; pos: string; pos2: string; cat: string; coerced: string list;
  21 + numbers: string list; cases: string list; genders: string list; persons: string list;
  22 + grads: string list; praeps: string list; acms: string list;
  23 + aspects: string list; negations: string list; moods: string list; tenses: string list;
  24 + nsyn: string list; nsem: string list; modes: string list; psem: string list;
  25 + }
  26 +
  27 +type selector =
  28 + Lemma | (*NewLemma |*) Pos | Pos2 | Cat | Coerced | Number | Case | Gender | Person | Grad | Praep |
  29 + Acm | Aspect | Negation | Mood | Tense | Nsyn | Nsem | Ctype | Mode | Psem |
  30 + Icat | Inumber | Igender | Iperson | Nperson | Ncat | Plemma |
  31 + Unumber | Ucase | Ugender | Uperson | Amode
  32 +
  33 +module OrderedSelector = struct
  34 + type t = selector
  35 + let compare = compare
  36 +end
  37 +
  38 +module SelectorMap=Xmap.Make(OrderedSelector)
  39 +module SelectorSet=Xset.Make(OrderedSelector)
  40 +
  41 +type rule =
  42 + Bracket
  43 + | Quant of (selector * ENIAM_LCGtypes.internal_grammar_symbol) list
  44 + | Raised of selector list
  45 + | Syntax of ENIAM_LCGtypes.grammar_symbol
  46 + | Sem of string
  47 +
  48 +type rule_sem =
  49 + BasicSem of selector list
  50 + | RaisedSem of selector list * selector list
  51 + | TermSem of selector list * string
  52 + | QuotSem of selector list
  53 + | InclusionSem of selector list
  54 + | ConjSem of selector list
  55 +
  56 +type selector_relation = Eq | Neq (*| StrictEq*)
  57 +
  58 +(* x="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jedną z wartości atrybutu x, reguła zostanie wykonana dla x z usuniętymi pozostałymi wartościami *)
  59 +(* x!="s" oznacza, że żeby reguła została użyta token musi mieć jako jedną z wartości atrybutu x symbol inny od "s", reguła zostanie wykonana dla x z usuniętą wartością "s" *)
  60 +(* x=="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jednyną z wartość atrybutu x *)
  61 +
  62 +(* wzajemne zależności między kategoriami (np między case i person w subst) są rozstrzygane w ENIAMcategories *)
  63 +
  64 +(* Basic oznacza że kwantyfikacja i term są generowane zgodnie ze standardowymi regułami:
  65 + - kwantyfikacja przebiega po wszystkich zdefiniowanych kategoriariach i wartościach wziętych z cats
  66 + - typ jest zadany bezpośrednio
  67 + - term tworzy wierzchołek w strukturze zależnościowej etykietowany wszystkimi zdefiniowanymi kategoriami
  68 +
  69 + Quant oznacza że typ i term są generowane zgodnie ze standardowymi regułami:
  70 + - kwantyfikacja jest zadana bezpośrednio
  71 + - typ jest zadany bezpośrednio
  72 + - term tworzy wierzchołek w strukturze zależnościowej etykietowany wszystkimi zdefiniowanymi kategoriami
  73 +
  74 +*)
  75 +
  76 +let empty_cats = {lemma=""; pos=""; pos2=""; cat="X"; coerced=[];
  77 + numbers=[]; cases=[]; genders=[]; persons=[];
  78 + grads=[]; praeps=[]; acms=[]; aspects=[]; negations=[]; moods=[]; tenses=[];
  79 + nsyn=[]; nsem=[]; modes=[]; psem=[];
  80 + }
  81 +
  82 +let default_category_flag = ref true
  83 +
  84 +let resource_path =
  85 + try Sys.getenv "ENIAM_RESOURCE_PATH"
  86 + with Not_found ->
  87 + if Sys.file_exists "/usr/share/eniam" then "/usr/share/eniam" else
  88 + if Sys.file_exists "/usr/local/share/eniam" then "/usr/local/share/eniam" else
  89 + if Sys.file_exists "resources" then "resources" else
  90 + failwith "resource directory does not exists"
  91 +
  92 +let data_path =
  93 + try Sys.getenv "ENIAM_USER_DATA_PATH"
  94 + with Not_found -> "data"
  95 +
  96 +let rules_filename = resource_path ^ "/LCGlexicon/lexicon-pl.dic"
  97 +let user_lexicon_filename = data_path ^ "/lexicon.dic"
  98 +let user_cats_filename = data_path ^ "/senses.tab"
  99 +let user_coerced_filename = data_path ^ "/coercions.tab"
  100 +
  101 +let subst_uncountable_lexemes_filename = resource_path ^ "/LCGlexicon/subst_uncountable.dat"
  102 +let subst_uncountable_lexemes_filename2 = resource_path ^ "/LCGlexicon/subst_uncountable_stare.dat"
  103 +let subst_container_lexemes_filename = resource_path ^ "/LCGlexicon/subst_container.dat"
  104 +let subst_numeral_lexemes_filename = resource_path ^ "/LCGlexicon/subst_numeral.dat"
  105 +let subst_time_lexemes_filename = resource_path ^ "/LCGlexicon/subst_time.dat"
  106 +
  107 +let adv_modes_filename = resource_path ^ "/Walenty/adv_modes.tab"
  108 +let num_nsems_filename = resource_path ^ "/LCGlexicon/num.tab"
... ...
LCGlexicon/ENIAM_LCGlexicon_old.ml 0 → 100644
  1 +(*
  2 + * ENIAM_LCGlexicon is a library that provides LCG lexicon form Polish
  3 + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  5 + *
  6 + * This library is free software: you can redistribute it and/or modify
  7 + * it under the terms of the GNU Lesser General Public License as published by
  8 + * the Free Software Foundation, either version 3 of the License, or
  9 + * (at your option) any later version.
  10 + *
  11 + * This library is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 + * GNU Lesser General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU Lesser General Public License
  17 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18 + *)
  19 +
  20 +open Xstd
  21 +open ENIAM_LCGtypes
  22 +open ENIAM_LCGlexiconTypes
  23 +open ENIAMcategoriesPL
  24 +
  25 +let rec find_selector s = function
  26 + (t,Eq,x :: _) :: l -> if t = s then x else find_selector s l
  27 + | (t,_,_) :: l -> if t = s then failwith "find_selector 1" else find_selector s l
  28 + | [] -> failwith "find_selector 2"
  29 +
  30 +let rec get_syntax rev = function
  31 + Syntax syntax :: rule -> syntax, (List.rev rev) @ rule
  32 + | t :: rule -> get_syntax (t :: rev) rule
  33 + | [] -> failwith "get_syntax"
  34 +
  35 +let rec get_quant rev = function
  36 + Quant quant :: rule -> quant, (List.rev rev) @ rule
  37 + | t :: rule -> get_quant (t :: rev) rule
  38 + | [] -> [], List.rev rev
  39 +
  40 +let rec get_bracket rev = function
  41 + Bracket :: rule -> true, (List.rev rev) @ rule
  42 + | t :: rule -> get_bracket (t :: rev) rule
  43 + | [] -> false, List.rev rev
  44 +
  45 +let rec get_raised rev = function
  46 + Raised raised :: rule -> raised, (List.rev rev) @ rule
  47 + | t :: rule -> get_raised (t :: rev) rule
  48 + | [] -> raise Not_found
  49 +
  50 +let rec get_sem_term rev = function
  51 + Sem sem_term :: rule -> sem_term, (List.rev rev) @ rule
  52 + | t :: rule -> get_sem_term (t :: rev) rule
  53 + | [] -> raise Not_found
  54 +
  55 +let merge_quant pos_quants quants =
  56 + let map = Xlist.fold quants SelectorMap.empty (fun map (k,v) -> SelectorMap.add map k v) in
  57 + let l,map = Xlist.fold pos_quants ([],map) (fun (l,map) (cat,v) ->
  58 + if SelectorMap.mem map cat then (cat,SelectorMap.find map cat) :: l, SelectorMap.remove map cat
  59 + else (cat,v) :: l, map) in
  60 + List.rev (SelectorMap.fold map l (fun l cat v -> (cat,v) :: l))
  61 +
  62 +let assign_quantifiers (selectors,rule,weight) =
  63 + let pos = find_selector Pos selectors in
  64 + let categories =
  65 + try StringMap.find pos_categories pos
  66 + with Not_found -> failwith ("assign_quantifiers: unknown part of speech " ^ pos) in
  67 + let categories = Xlist.map categories (fun s -> s,Top) in
  68 + let syntax,rule = get_syntax [] rule in
  69 + let quant,rule = get_quant [] rule in
  70 + let bracket,rule = get_bracket [] rule in
  71 + let quant = merge_quant categories quant in
  72 + selectors, (bracket,quant,syntax),(rule,weight)
  73 +
  74 +let rec check_quantifiers_int_rec (selectors,syntax) quants = function
  75 + Atom x -> ()
  76 + | AVar "schema" -> ()
  77 + | AVar x ->
  78 + if not (SelectorSet.mem quants (selector_of_string x))
  79 + then failwith ("Variable '" ^ x ^ "' is not quantified in rule " ^ string_of_selectors selectors ^ ": " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax)
  80 + | With l -> Xlist.iter l (check_quantifiers_int_rec (selectors,syntax) quants)
  81 + | Zero -> ()
  82 + | Top -> ()
  83 +
  84 +let rec check_quantifiers_rec rule quants = function
  85 + Tensor l -> Xlist.iter l (check_quantifiers_int_rec rule quants)
  86 + | Plus l -> Xlist.iter l (check_quantifiers_rec rule quants)
  87 + | Imp(s,d,t) -> check_quantifiers_rec rule quants s; check_quantifiers_rec rule quants t
  88 + | One -> ()
  89 + | ImpSet(s,l) -> check_quantifiers_rec rule quants s; Xlist.iter l (fun (_,t) -> check_quantifiers_rec rule quants t)
  90 + | Star s -> check_quantifiers_rec rule quants s
  91 + | Maybe s -> check_quantifiers_rec rule quants s
  92 + | _ -> failwith "check_quantifiers_rec"
  93 +
  94 +let check_quantifiers (selectors,(bracket,quant,syntax),_) =
  95 + let quants = Xlist.fold quant SelectorSet.empty (fun quants (q,_) -> SelectorSet.add quants q) in
  96 + check_quantifiers_rec (selectors,syntax) quants syntax
  97 +
  98 +let assign_semantics (selectors,(bracket,quant,syntax),(rule,weight)) =
  99 + let semantics = try
  100 + let raised,rule = get_raised [] rule in
  101 + if rule <> [] then failwith "assign_semantics 1" else
  102 + RaisedSem(Xlist.map quant fst, raised)
  103 + with Not_found -> (try
  104 + let term,rule = get_sem_term [] rule in
  105 + if rule <> [] then failwith "assign_semantics 2" else
  106 + TermSem(Xlist.map quant fst,term)
  107 + with Not_found -> BasicSem(Xlist.map quant fst)) in
  108 + selectors,(bracket,quant,syntax),(semantics,weight)
  109 +
  110 +let rec add_x_args_rec = function
  111 + Imp(s,d,t) -> Imp(add_x_args_rec s,d,t)
  112 + | ImpSet(s,l) -> ImpSet(add_x_args_rec s,l)
  113 + | Tensor[Atom "<conll_root>"] -> Tensor[Atom "<conll_root>"]
  114 + | Tensor l -> ImpSet(Tensor l,[Backward,Maybe(Tensor[Atom "X"]);Forward,Maybe(Tensor[Atom "X"])])
  115 + | t -> failwith ("add_x_args_rec: " ^ ENIAM_LCGstringOf.grammar_symbol 0 t)
  116 +
  117 +let is_raised_semantics = function
  118 + RaisedSem _ -> true
  119 + | _ -> false
  120 +
  121 +let rec is_raised_arg = function
  122 + Imp _ -> true
  123 + | Tensor _ -> false
  124 + | Plus l -> Xlist.fold l false (fun b t -> is_raised_arg t || b)
  125 + | Maybe t -> is_raised_arg t
  126 + | One -> false
  127 + | t -> failwith ("is_raised_arg: " ^ ENIAM_LCGstringOf.grammar_symbol 0 t)
  128 +
  129 +let rec is_raised_syntax = function
  130 + Imp(s,d,t) -> is_raised_syntax s || is_raised_arg t
  131 + | ImpSet(s,l) -> is_raised_syntax s || Xlist.fold l false (fun b (_,t) -> is_raised_arg t || b)
  132 + | Tensor _ -> false
  133 + | t -> failwith ("is_raised_syntax: " ^ ENIAM_LCGstringOf.grammar_symbol 0 t)
  134 +
  135 +
  136 +let add_x_args (selectors,(bracket,quant,syntax),(semantics,weight)) =
  137 + if is_raised_syntax syntax then (selectors,(bracket,quant,syntax),(semantics,weight))
  138 + else (selectors,(bracket,quant,add_x_args_rec syntax),(semantics,weight))
  139 +
  140 +let rec extract_category pat rev = function
  141 + (cat,rel,v) :: l -> if cat = pat then rel,v,(List.rev rev @ l) else extract_category pat ((cat,rel,v) :: rev) l
  142 + | [] -> raise Not_found
  143 +
  144 +let dict_of_grammar grammar =
  145 + (* print_endline "dict_of_grammar"; *)
  146 + Xlist.fold grammar StringMap.empty (fun dict (selectors,(bracket,quant,syntax),semantics) ->
  147 + let pos_rel,poss,selectors = try extract_category Pos [] selectors with Not_found -> failwith "dict_of_grammar 1" in
  148 + let lemma_rel,lemmas,selectors = try extract_category Lemma [] selectors with Not_found -> Eq,[],selectors in
  149 + if pos_rel <> Eq || lemma_rel <> Eq then failwith "dict_of_grammar 2" else
  150 + let rule = selectors,(bracket,quant,syntax),semantics in
  151 + Xlist.fold poss dict (fun dict pos ->
  152 + let dict2,l = try StringMap.find dict pos with Not_found -> StringMap.empty,[] in
  153 + let dict2,l =
  154 + if lemmas = [] then dict2,rule :: l else
  155 + Xlist.fold lemmas dict2 (fun dict2 lemma ->
  156 + StringMap.add_inc dict2 lemma [rule] (fun l -> rule :: l)),l in
  157 + StringMap.add dict pos (dict2,l)))
  158 +
  159 +let make_rules x_flag filename =
  160 + let lexicon = ENIAM_LCGlexiconParser.load_lexicon filename in
  161 + let lexicon = List.rev (Xlist.rev_map lexicon assign_quantifiers) in
  162 + Xlist.iter lexicon check_quantifiers;
  163 + let lexicon = List.rev (Xlist.rev_map lexicon assign_semantics) in
  164 + let lexicon = if x_flag then List.rev (Xlist.rev_map lexicon add_x_args) else lexicon in
  165 + dict_of_grammar lexicon
  166 +
  167 +let find_rules rules cats =
  168 + let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith ("find_rules: unable to find rules for category '" ^ cats.pos ^ "' lemma='" ^ cats.lemma ^ "'") in
  169 + (* Printf.printf "find_rules: %s %s |rules|=%d\n" cats.lemma cats.pos (Xlist.size rules); *)
  170 + let rules = try StringMap.find lex_rules cats.lemma @ rules with Not_found -> rules in
  171 + Xlist.fold rules [] (fun rules (selectors,syntax,semantics) ->
  172 + try
  173 + let cats = apply_selectors cats selectors in
  174 + (cats,syntax,semantics) :: rules
  175 + with Not_found -> rules)
  176 +
  177 +let prepare_lex_entries rules lex_entries cats =
  178 + Xlist.fold lex_entries rules (fun rules (selectors,rule) ->
  179 + let selectors = (Pos,Eq,[cats.pos]) :: selectors in
  180 + let selectors,(bracket,quant,syntax),(rule,weight) = assign_quantifiers (selectors,[Syntax rule],0.) in
  181 + let selectors,(bracket,quant,syntax),(semantics,weight) = assign_semantics (selectors,(bracket,quant,syntax),(rule,weight)) in
  182 + try
  183 + let cats = apply_selectors cats selectors in
  184 + (cats,(bracket,quant,syntax),(semantics,weight)) :: rules
  185 + with Not_found -> rules)
  186 +
  187 +let assign_valence valence rules =
  188 + Xlist.fold rules [] (fun l (cats,(bracket,quant,syntax),semantics) ->
  189 + (* Printf.printf "%s %s |valence|=%d\n" cats.lemma cats.pos (Xlist.size valence); *)
  190 + if ENIAM_LCGrenderer.count_avar "schema" syntax > 0 then
  191 + Xlist.fold valence l (fun l (selectors,schema) ->
  192 + try
  193 + let cats = apply_selectors cats selectors in
  194 + (cats,(bracket,quant,ENIAM_LCGrenderer.substitute_schema "schema" schema syntax),semantics) :: l
  195 + with Not_found -> l)
  196 + else (cats,(bracket,quant,syntax),semantics) :: l)
  197 +
  198 +type labels = {
  199 + number: string;
  200 + case: string;
  201 + gender: string;
  202 + person: string;
  203 + aspect: string;
  204 +}
  205 +
  206 +let get_label e = function
  207 + Number -> e.number
  208 + | Case -> e.case
  209 + | Gender -> e.gender
  210 + | Person -> e.person
  211 + | Aspect -> e.aspect
  212 + | _ -> ENIAM_LCGreductions.get_variant_label ()
  213 +
  214 +let get_labels () = {
  215 + number=ENIAM_LCGreductions.get_variant_label ();
  216 + case=ENIAM_LCGreductions.get_variant_label ();
  217 + gender=ENIAM_LCGreductions.get_variant_label ();
  218 + person=ENIAM_LCGreductions.get_variant_label ();
  219 + aspect=ENIAM_LCGreductions.get_variant_label ();
  220 +}
  221 +
  222 +let make_quantification e rules =
  223 + Xlist.map rules (fun (cats,(bracket,quant,syntax),semantics) ->
  224 + let syntax = Xlist.fold (List.rev quant) syntax (fun syntax (cat,t) ->
  225 + let t = if t = Top then ENIAM_LCGrenderer.make_quant_restriction (match_selector cats cat) else t in
  226 + let category = string_of_selector cat in
  227 + WithVar(category,t,get_label e cat,syntax)) in
  228 + let syntax = if bracket then ENIAM_LCGtypes.Bracket(true,true,syntax) else ENIAM_LCGtypes.Bracket(false,false,syntax) in
  229 + cats,syntax,semantics)
  230 +
  231 +let make_node id orth lemma pos syntax weight cat_list is_raised =
  232 + let attrs = Xlist.fold cat_list [] (fun attrs -> function
  233 + | Lemma -> attrs
  234 + | Pos -> attrs
  235 + | Pos2 -> attrs
  236 + | Cat -> ("CAT",SubstVar "cat") :: attrs
  237 + | Coerced -> ("COERCED",SubstVar "coerced") :: attrs
  238 + | Number -> ("NUM",SubstVar "number") :: attrs
  239 + | Case -> ("CASE",SubstVar "case") :: attrs
  240 + | Gender -> ("GEND",SubstVar "gender") :: attrs
  241 + | Person -> ("PERS",SubstVar "person") :: attrs
  242 + | Grad -> ("GRAD",SubstVar "grad") :: attrs
  243 + | Praep -> attrs
  244 + | Acm -> ("ACM",SubstVar "acm") :: attrs
  245 + | Aspect -> ("ASPECT", SubstVar "aspect") :: attrs
  246 + | Negation -> ("NEGATION",SubstVar "negation") :: attrs
  247 + | Mood -> ("MOOD", SubstVar "mood") :: attrs
  248 + | Tense -> ("TENSE", SubstVar "tense") :: attrs
  249 + | Nsyn -> ("NSYN", SubstVar "nsyn") :: attrs
  250 + | Nsem -> ("NSEM", SubstVar "nsem") :: attrs
  251 + | Ctype -> ("CTYPE", SubstVar "ctype") :: attrs
  252 + | Mode -> ("MODE", SubstVar "mode") :: attrs
  253 + | Psem -> ("PSEM", SubstVar "psem") :: attrs
  254 + | Icat -> attrs
  255 + | Inumber -> attrs
  256 + | Igender -> attrs
  257 + | Iperson -> attrs
  258 + | Nperson -> attrs
  259 + | Ncat -> attrs
  260 + | Plemma -> attrs
  261 + | Unumber -> attrs
  262 + | Ucase -> attrs
  263 + | Ugender -> attrs
  264 + | Uperson -> attrs
  265 + | Amode -> attrs) in
  266 + (* | s -> (string_of_selector s, Dot) :: attrs) in *)
  267 + (* | "lex" -> ("LEX",Val "+") :: attrs *)
  268 + (* | s -> failwith ("make_node: " ^ (string_of_selector s))) in *)
  269 + let symbol = if is_raised then
  270 + ENIAM_LCGrenderer.make_raised_symbol syntax
  271 + else ENIAM_LCGrenderer.make_symbol syntax in
  272 + {ENIAM_LCGrenderer.empty_node with
  273 + orth=orth; lemma=lemma; pos=pos; symbol=symbol;
  274 + weight=weight; id=id; attrs=List.rev attrs; args=Dot}
  275 +
  276 +let or_frame node =
  277 + (*Imp(Imp(Imp(Tensor[Atom "<root>"],Forward,
  278 + Tensor[Atom "</speaker>"]),Forward,
  279 + Imp(Tensor[Atom "ip"; Top; Top; Top],Forward,Tensor[Atom "or"])),Forward,
  280 + Tensor[Atom "or2"]),*)
  281 + (* Lambda("x",Lambda("y",Lambda("z",Node{node with gs=make_gs [] ["<root>"]; args=Tuple[
  282 + Cut(SetAttr("AROLE",Val "Clause",SetAttr("GF",Gf CLAUSE,App(Var "y",Var "x"))))]}))) *)
  283 + VariantVar("lemma",Lambda("x",Lambda("y",Lambda("z",Node{node with args=Tuple[
  284 + Cut(SetAttr("ARG_SYMBOL",Tuple[Val "TODO"],App(Var "y",Var "x")))]}))))
  285 +
  286 +let make_term id orth rules =
  287 + Xlist.map rules (fun (cats,syntax,(semantics,weight)) ->
  288 + ENIAM_LCGrenderer.reset_variable_names ();
  289 + ENIAM_LCGrenderer.add_variable_numbers ();
  290 + (* print_endline ("make_term 0: " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax); *)
  291 + match semantics with
  292 + BasicSem cat_list ->
  293 + let node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list false in
  294 + (* print_endline ("make_term 1: " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax); *)
  295 + let semantics = ENIAM_LCGrenderer.make_term node syntax in
  296 + ENIAM_LCGrenderer.simplify (syntax,semantics)
  297 + | RaisedSem(cat_list,outer_cat_list) ->
  298 + (* FIXME: jakie atrybuty powinien mieć outer node (w szczególności jaką wagę?) *)
  299 + let node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list true in
  300 + let outer_node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) outer_cat_list false in
  301 + (* print_endline ("make_term 2: " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax); *)
  302 + let semantics = ENIAM_LCGrenderer.make_raised_term node outer_node syntax in
  303 + ENIAM_LCGrenderer.simplify (syntax,semantics)
  304 + | TermSem(cat_list,"λxλyλz.NODE(yx,z)") ->
  305 + let node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list false in
  306 + (* print_endline ("make_term 3: " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax); *)
  307 + let semantics = or_frame node in
  308 + ENIAM_LCGrenderer.simplify (syntax,semantics)
  309 + | _ -> failwith "make_term: ni")
  310 +
  311 +let create_entries rules id orth cats valence lex_entries =
  312 + Xlist.fold cats [] (fun l cats ->
  313 + (* Printf.printf "create_entries: orth=%s lemma=%s pos=%s\n" orth cats.lemma cats.pos; *)
  314 + (* variable_name_ref := []; *)
  315 + if cats.pos="interp" && cats.lemma="<clause>" then (BracketSet(Forward),Dot) :: l else
  316 + if cats.pos="interp" && cats.lemma="</clause>" then (BracketSet(Backward),Dot) :: l else
  317 + if (cats.pos2="noun" || cats.pos2="verb" || cats.pos2="adj" || cats.pos2="adv") && cats.cat="X" && not !default_category_flag && cats.pos <> "aglt" then l else
  318 + let e = get_labels () in
  319 + (* print_endline "create_entries 1"; *)
  320 + let rules = find_rules rules cats in
  321 + let rules = prepare_lex_entries rules lex_entries cats in
  322 + (* Printf.printf "create_entries 2: %s %s |rules|=%d\n" cats.lemma cats.pos (Xlist.size rules); *)
  323 + let rules = assign_valence valence rules in
  324 + (* print_endline "create_entries 3"; *)
  325 + let rules = make_quantification e rules in
  326 + (* print_endline "create_entries 4"; *)
  327 + let rules = make_term id orth rules in
  328 + (* print_endline "create_entries 5"; *)
  329 + rules @ l)
... ...
LCGlexicon/ENIAMcategoriesPL.ml
... ... @@ -29,18 +29,27 @@ let all_persons = [&quot;pri&quot;;&quot;sec&quot;;&quot;ter&quot;]
29 29  
30 30 let selector_values = Xlist.fold [
31 31 Lemma, [];
32   - Pos, ["subst";"depr";"ppron12";"ppron3";"siebie";"prep";"num";"intnum";
  32 + IncludeLemmata, [];
  33 + Pos, ["subst";"depr";"ppron12";"ppron3";"siebie";"prep";"fixed";"num";"numcomp";"intnum";
33 34 "realnum";"intnum-interval";"realnum-interval";"symbol";"ordnum";
34 35 "date";"date-interval";"hour-minute";"hour";"hour-minute-interval";
35 36 "hour-interval";"year";"year-interval";"day";"day-interval";"day-month";
36 37 "day-month-interval";"month-interval";"roman";"roman-interval";"roman-ordnum";
37   - "match-result";"url";"email";"obj-id";"building-number";"adj";"adjc";"adjp";"adja";
  38 + "match-result";"url";"email";"phone-number";"postal-code";"obj-id";"building-number";"list-item";"adj";"adjc";"adjp";"adja";
38 39 "adv";"ger";"pact";"ppas";"fin";"bedzie";"praet";"winien";"impt";
39   - "imps";"pred";"aglt";"inf";"pcon";"pant";"qub";"part";"comp";"conj";"interj";
  40 + "imps";"pred";"aglt";"inf";"pcon";"pant";"pacta";"qub";"part";"comp";"conj";"interj";
40 41 "sinterj";"burk";"interp";"xxx";"unk";"html-tag";"apron";"compar"];
41 42 Pos2, [];
42 43 Cat, [];
43   - Proj, [];
  44 + Coerced, [];
  45 + Role, [];
  46 + Irole, [];
  47 + Prole, [];
  48 + Nrole, [];
  49 + SNode, ["concept";"sit";"dot";"relations"];
  50 + Inode, ["concept";"sit";"dot";"relations"];
  51 + Pnode, ["concept";"sit";"dot";"relations"];
  52 + Nnode, ["concept";"sit";"dot";"relations"];
44 53 Number, all_numbers;
45 54 Case, "postp" :: "pred" :: all_cases;
46 55 Gender, all_genders;
... ... @@ -124,7 +133,7 @@ let noun_type proper lemma pos =
124 133 if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then "pronoun" else
125 134 if pos = "symbol" || pos = "date" || pos = "date-interval" || pos = "hour" || pos = "hour-minute" || pos = "hour-interval" || pos = "hour-minute-interval" ||
126 135 pos = "year" || pos = "year-interval" || pos = "day" || pos = "day-interval" || pos = "day-month" || pos = "day-month-interval" ||
127   - pos = "match-result" || pos = "month-interval" || pos = "roman" || pos = "roman-interval" || pos = "url" || pos = "email" || pos = "obj-id" || pos = "building-number" || pos = "date" then "proper" else
  136 + pos = "match-result" || pos = "month-interval" || pos = "roman" || pos = "roman-interval" || pos = "url" || pos = "email" || pos = "phone-number" || pos = "postal-code" || pos = "obj-id" || pos = "building-number" || pos = "date" then "proper" else
128 137 if StringSet.mem subst_pronoun_lexemes lemma then "pronoun" else
129 138 "common" in
130 139 let nsem =
... ... @@ -150,7 +159,9 @@ let num_nsem lemma =
150 159  
151 160 let part_set = StringSet.of_list ["się"; "nie"; "by"; "niech"; "niechaj"; "niechże"; "niechajże"; "czy"; "gdyby"]
152 161  
153   -let clarify_categories proper cat proj = function
  162 +let snode = SelectorMap.find selector_values SNode
  163 +
  164 +let clarify_categories proper cat coerced (*snode*) = function
154 165 lemma,"subst",[numbers;cases;genders] ->
155 166 let numbers = expand_numbers numbers in
156 167 let cases = expand_cases cases in
... ... @@ -158,9 +169,9 @@ let clarify_categories proper cat proj = function
158 169 let cases,voc = split_voc cases in
159 170 let nsyn,nsem = noun_type proper lemma "subst" in
160 171 (if cases = [] then [] else
161   - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; proj=proj; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @
  172 + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @
162 173 (if voc = [] then [] else
163   - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; proj=proj; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}])
  174 + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}])
164 175 | lemma,"subst",[numbers;cases;genders;_] ->
165 176 let numbers = expand_numbers numbers in
166 177 let cases = expand_cases cases in
... ... @@ -168,9 +179,9 @@ let clarify_categories proper cat proj = function
168 179 let cases,voc = split_voc cases in
169 180 let nsyn,nsem = noun_type proper lemma "subst" in
170 181 (if cases = [] then [] else
171   - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; proj=proj; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @
  182 + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @
172 183 (if voc = [] then [] else
173   - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; proj=proj; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}])
  184 + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}])
174 185 | lemma,"depr",[numbers;cases;genders] ->
175 186 let numbers = expand_numbers numbers in
176 187 let cases = expand_cases cases in
... ... @@ -178,29 +189,29 @@ let clarify_categories proper cat proj = function
178 189 let cases,voc = split_voc cases in
179 190 let nsyn,nsem = noun_type proper lemma "depr" in
180 191 (if cases = [] then [] else
181   - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; proj=proj; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @
  192 + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @
182 193 (if voc = [] then [] else
183   - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; proj=proj; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}])
  194 + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}])
184 195 | lemma,"ppron12",[numbers;cases;genders;persons] ->
185 196 let numbers = expand_numbers numbers in
186 197 let cases = expand_cases cases in
187 198 let genders = expand_genders genders in
188   - [{empty_cats with lemma=lemma; pos="ppron12"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons}]
  199 + [{empty_cats with lemma=lemma; pos="ppron12"; pos2="pron"; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=persons}]
189 200 | lemma,"ppron12",[numbers;cases;genders;persons;akcs] ->
190 201 let numbers = expand_numbers numbers in
191 202 let cases = expand_cases cases in
192 203 let genders = expand_genders genders in
193   - [{empty_cats with lemma=lemma; pos="ppron12"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons}]
  204 + [{empty_cats with lemma=lemma; pos="ppron12"; pos2="pron"; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=persons}]
194 205 | lemma,"ppron3",[numbers;cases;genders;persons] ->
195 206 let numbers = expand_numbers numbers in
196 207 let cases = expand_cases cases in
197 208 let genders = expand_genders genders in
198   - [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=["praep-npraep"]}]
  209 + [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=["praep-npraep"]}]
199 210 | lemma,"ppron3",[numbers;cases;genders;persons;akcs] ->
200 211 let numbers = expand_numbers numbers in
201 212 let cases = expand_cases cases in
202 213 let genders = expand_genders genders in
203   - [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=["praep-npraep"]}]
  214 + [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=["praep-npraep"]}]
204 215 | lemma,"ppron3",[numbers;cases;genders;persons;akcs;praep] ->
205 216 let numbers = expand_numbers numbers in
206 217 let cases = expand_cases cases in
... ... @@ -209,207 +220,216 @@ let clarify_categories proper cat proj = function
209 220 ["praep";"npraep"] -> ["praep-npraep"]
210 221 | ["npraep";"praep"] -> ["praep-npraep"]
211 222 | _ -> praep in
212   - [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=praep}]
  223 + [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=praep}]
213 224 | lemma,"siebie",[cases] -> (* FIXME: czy tu określać numbers genders persons? *)
214 225 let cases = expand_cases cases in
215   - [{empty_cats with lemma=lemma; pos="siebie"; pos2="pron"; numbers=all_numbers; cases=cases; genders=all_genders; persons=["ter"]}]
  226 + [{empty_cats with lemma=lemma; pos="siebie"; pos2="pron"; snode=snode; numbers=all_numbers; cases=cases; genders=all_genders; persons=["ter"]}]
216 227 | lemma,"prep",[cases;woks] ->
217 228 if StringSet.mem compar_lexemes lemma then
218 229 [{empty_cats with lemma=lemma; pos="compar"; pos2="prep"}] else
219 230 let cases = expand_cases cases in
220   - [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; cases=cases; psem=["sem";"nosem"]}]
  231 + [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; snode=snode; cases=cases; psem=["sem";"nosem"]}]
221 232 | lemma,"prep",[cases] ->
222 233 if StringSet.mem compar_lexemes lemma then
223 234 [{empty_cats with lemma=lemma; pos="compar"; pos2="prep"}] else
224 235 let cases = expand_cases cases in
225   - [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; cases=cases; psem=["sem";"nosem"]}]
  236 + [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; snode=snode; cases=cases; psem=["sem";"nosem"]}]
226 237 | lemma,"num",[numbers;cases;genders;acms] ->
227 238 let numbers = expand_numbers numbers in
228 239 let cases = expand_cases cases in
229 240 let genders = expand_genders genders in
230 241 let nsem = num_nsem lemma in
231   - [{empty_cats with lemma=lemma; pos="num"; pos2="num"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms; nsem=nsem}]
  242 + [{empty_cats with lemma=lemma; pos="num"; pos2="num"; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms; nsem=nsem}]
232 243 | lemma,"num",[numbers;cases;genders;acms;_] ->
233 244 let numbers = expand_numbers numbers in
234 245 let cases = expand_cases cases in
235 246 let genders = expand_genders genders in
236 247 let nsem = num_nsem lemma in
237   - [{empty_cats with lemma=lemma; pos="num"; pos2="num"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms; nsem=nsem}]
238   - | lemma,"numc",[] -> []
  248 + [{empty_cats with lemma=lemma; pos="num"; pos2="num"; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms; nsem=nsem}]
  249 + | lemma,"numcomp",[] -> [{empty_cats with lemma=lemma; pos="numcomp"; pos2="numcomp"; snode=snode}]
239 250 | lemma,"intnum",[] ->
240 251 let numbers,acms =
241 252 if lemma = "1" || lemma = "-1" then ["sg"],["congr"] else
242 253 let s = String.get lemma (String.length lemma - 1) in
243 254 ["pl"],if s = '2' || s = '3' || s = '4' then ["rec";"congr"] else ["rec"] in
244   - [{empty_cats with lemma=lemma; pos="intnum"; pos2="num"; numbers=numbers; cases=all_cases; genders=all_genders; persons=["ter"]; acms=acms; nsem=["count"]}]
  255 + [{empty_cats with lemma=lemma; pos="intnum"; pos2="num"; snode=snode; numbers=numbers; cases=all_cases; genders=all_genders; persons=["ter"]; acms=acms; nsem=["count"]}]
245 256 | lemma,"realnum",[] ->
246   - [{empty_cats with lemma=lemma; pos="realnum"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}]
  257 + [{empty_cats with lemma=lemma; pos="realnum"; pos2="num"; snode=snode; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}]
247 258 | lemma,"intnum-interval",[] ->
248   - [{empty_cats with lemma=lemma; pos="intnum-interval"; pos2="num"; numbers=["pl"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec";"congr"]; nsem=["count"]}]
  259 + [{empty_cats with lemma=lemma; pos="intnum-interval"; pos2="num"; snode=snode; numbers=["pl"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec";"congr"]; nsem=["count"]}]
249 260 | lemma,"realnum-interval",[] ->
250   - [{empty_cats with lemma=lemma; pos="realnum-interval"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}]
  261 + [{empty_cats with lemma=lemma; pos="realnum-interval"; pos2="num"; snode=snode; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}]
251 262 | lemma,"symbol",[] ->
252   - [{empty_cats with lemma=lemma; pos="symbol"; pos2="noun"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]}]
  263 + [{empty_cats with lemma=lemma; pos="symbol"; pos2="noun"; snode=snode; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]}]
253 264 | lemma,"ordnum",[] ->
254   - [{empty_cats with lemma=lemma; pos="ordnum"; pos2="adj"; numbers=all_numbers; cases=all_cases; genders=all_genders; grads=["pos"]}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *)
  265 + [{empty_cats with lemma=lemma; pos="ordnum"; pos2="adj"; snode=snode; numbers=all_numbers; cases=all_cases; genders=all_genders; grads=["pos"]}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *)
255 266 | lemma,"date",[] ->
256 267 let nsyn,nsem = noun_type proper lemma "date" in
257   - [{empty_cats with lemma=lemma; pos="date"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  268 + [{empty_cats with lemma=lemma; pos="date"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
258 269 | lemma,"date-interval",[] ->
259 270 let nsyn,nsem = noun_type proper lemma "date-interval" in
260   - [{empty_cats with lemma=lemma; pos="date-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  271 + [{empty_cats with lemma=lemma; pos="date-interval"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
261 272 | lemma,"hour-minute",[] ->
262 273 let nsyn,nsem = noun_type proper lemma "hour-minute" in
263   - [{empty_cats with lemma=lemma; pos="hour-minute"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  274 + [{empty_cats with lemma=lemma; pos="hour-minute"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
264 275 | lemma,"hour",[] ->
265 276 let nsyn,nsem = noun_type proper lemma "hour" in
266   - [{empty_cats with lemma=lemma; pos="hour"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  277 + [{empty_cats with lemma=lemma; pos="hour"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
267 278 | lemma,"hour-minute-interval",[] ->
268 279 let nsyn,nsem = noun_type proper lemma "hour-minute-interval" in
269   - [{empty_cats with lemma=lemma; pos="hour-minute-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  280 + [{empty_cats with lemma=lemma; pos="hour-minute-interval"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
270 281 | lemma,"hour-interval",[] ->
271 282 let nsyn,nsem = noun_type proper lemma "hour-interval" in
272   - [{empty_cats with lemma=lemma; pos="hour-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  283 + [{empty_cats with lemma=lemma; pos="hour-interval"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
273 284 | lemma,"year",[] ->
274 285 let nsyn,nsem = noun_type proper lemma "year" in
275   - [{empty_cats with lemma=lemma; pos="year"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  286 + [{empty_cats with lemma=lemma; pos="year"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
276 287 | lemma,"year-interval",[] ->
277 288 let nsyn,nsem = noun_type proper lemma "year-interval" in
278   - [{empty_cats with lemma=lemma; pos="year-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  289 + [{empty_cats with lemma=lemma; pos="year-interval"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
279 290 | lemma,"day",[] ->
280 291 let nsyn,nsem = noun_type proper lemma "day" in
281   - [{empty_cats with lemma=lemma; pos="day"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  292 + [{empty_cats with lemma=lemma; pos="day"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
282 293 | lemma,"day-interval",[] ->
283 294 let nsyn,nsem = noun_type proper lemma "day-interval" in
284   - [{empty_cats with lemma=lemma; pos="day-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  295 + [{empty_cats with lemma=lemma; pos="day-interval"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
285 296 | lemma,"day-month",[] ->
286 297 let nsyn,nsem = noun_type proper lemma "day-month" in
287   - [{empty_cats with lemma=lemma; pos="day-month"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  298 + [{empty_cats with lemma=lemma; pos="day-month"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
288 299 | lemma,"day-month-interval",[] ->
289 300 let nsyn,nsem = noun_type proper lemma "day-month-interval" in
290   - [{empty_cats with lemma=lemma; pos="day-month-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  301 + [{empty_cats with lemma=lemma; pos="day-month-interval"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
291 302 | lemma,"month-interval",[] ->
292 303 let nsyn,nsem = noun_type proper lemma "month-interval" in
293   - [{empty_cats with lemma=lemma; pos="month-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  304 + [{empty_cats with lemma=lemma; pos="month-interval"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
294 305 | lemma,"roman",[] ->
295 306 let nsyn,nsem = noun_type proper lemma "roman" in
296   - [{empty_cats with lemma=lemma; pos="roman-ordnum"; pos2="adj"; numbers=all_numbers; cases=all_cases; genders=all_genders; grads=["pos"]};
297   - {empty_cats with lemma=lemma; pos="roman"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  307 + [{empty_cats with lemma=lemma; pos="roman-ordnum"; pos2="adj"; snode=snode; numbers=all_numbers; cases=all_cases; genders=all_genders; grads=["pos"]};
  308 + {empty_cats with lemma=lemma; pos="roman"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
298 309 | lemma,"roman-interval",[] ->
299 310 let nsyn,nsem = noun_type proper lemma "roman-interval" in
300   - [{empty_cats with lemma=lemma; pos="roman-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  311 + [{empty_cats with lemma=lemma; pos="roman-interval"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
301 312 | lemma,"match-result",[] ->
302 313 let nsyn,nsem = noun_type proper lemma "match-result" in
303   - [{empty_cats with lemma=lemma; pos="match-result"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  314 + [{empty_cats with lemma=lemma; pos="match-result"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
304 315 | lemma,"url",[] ->
305 316 let nsyn,nsem = noun_type proper lemma "url" in
306   - [{empty_cats with lemma=lemma; pos="url"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  317 + [{empty_cats with lemma=lemma; pos="url"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
307 318 | lemma,"email",[] ->
308 319 let nsyn,nsem = noun_type proper lemma "email" in
309   - [{empty_cats with lemma=lemma; pos="email"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  320 + [{empty_cats with lemma=lemma; pos="email"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
  321 + | lemma,"phone-number",[] ->
  322 + let nsyn,nsem = noun_type proper lemma "phone-number" in
  323 + [{empty_cats with lemma=lemma; pos="phone-number"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
  324 + | lemma,"postal-code",[] ->
  325 + let nsyn,nsem = noun_type proper lemma "postal-code" in
  326 + [{empty_cats with lemma=lemma; pos="postal-code"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
310 327 | lemma,"obj-id",[] ->
311 328 let nsyn,nsem = noun_type proper lemma "obj-id" in
312   - [{empty_cats with lemma=lemma; pos="obj-id"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  329 + [{empty_cats with lemma=lemma; pos="obj-id"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
313 330 | lemma,"building-number",[] ->
314 331 let nsyn,nsem = noun_type proper lemma "building-number" in
315   - [{empty_cats with lemma=lemma; pos="building-number"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  332 + [{empty_cats with lemma=lemma; pos="building-number"; pos2="symbol"; snode=snode; nsyn=nsyn; nsem=nsem}]
  333 + | lemma,"fixed",[] -> [{empty_cats with lemma=lemma; pos="fixed"; pos2="fixed"; snode=snode}]
316 334 | lemma,"adj",[numbers;cases;genders;grads] -> (* FIXME: adjsyn *)
317 335 let numbers = expand_numbers numbers in
318 336 let cases = expand_cases cases in
319 337 let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in
320 338 let genders = expand_genders genders in
321 339 let pos,pos2 = if StringSet.mem adj_pronoun_lexemes lemma then "apron","pron" else "adj","adj" in
322   - [{empty_cats with lemma=lemma; pos=pos; pos2=pos2; cat=cat; proj=proj; numbers=numbers; cases=cases; genders=genders; grads=grads}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *)
  340 + [{empty_cats with lemma=lemma; pos=pos; pos2=pos2; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; grads=grads}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *)
323 341 | lemma,"adjc",[] ->
324   - [{empty_cats with lemma=lemma; pos="adjc"; pos2="adj"; cat=cat; proj=proj; numbers=["sg"]; cases=["pred"]; genders=["m1";"m2";"m3"]; grads=["pos"]}]
  342 + [{empty_cats with lemma=lemma; pos="adjc"; pos2="adj"; cat=cat; coerced=coerced; snode=snode; numbers=["sg"]; cases=["pred"]; genders=["m1";"m2";"m3"]; grads=["pos"]}]
325 343 | lemma,"adjp",[] ->
326   - [{empty_cats with lemma=lemma; pos="adjp"; pos2="adj"; cat=cat; proj=proj; numbers=all_numbers; cases=["postp"]; genders=all_genders; grads=["pos"]}]
327   - | lemma,"adja",[] -> [{empty_cats with lemma=lemma; cat=cat; proj=proj; pos="adja"; pos2="adja"}]
328   - | lemma,"adv",[grads] -> [{empty_cats with lemma=lemma; cat=cat; proj=proj; pos="adv"; pos2="adv"; grads=grads; modes=adv_mode lemma}]
329   - | lemma,"adv",[] -> [{empty_cats with lemma=lemma; cat=cat; proj=proj; pos="adv"; pos2="adv"; grads=["pos"]; modes=adv_mode lemma}]
  344 + [{empty_cats with lemma=lemma; pos="adjp"; pos2="adj"; cat=cat; coerced=coerced; snode=snode; numbers=all_numbers; cases=["postp"]; genders=all_genders; grads=["pos"]}]
  345 + | lemma,"adja",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; snode=snode; pos="adja"; pos2="adja"}]
  346 + | lemma,"adv",[grads] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; snode=snode; pos="adv"; pos2="adv"; grads=grads; modes=adv_mode lemma}]
  347 + | lemma,"adv",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; snode=snode; pos="adv"; pos2="adv"; grads=["pos"]; modes=adv_mode lemma}]
330 348 | lemma,"ger",[numbers;cases;genders;aspects;negations] ->
331 349 let numbers = expand_numbers numbers in
332 350 let cases = expand_cases cases in
333 351 let genders = expand_genders genders in
334   - [{empty_cats with lemma=lemma; pos="ger"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; aspects=aspects; negations=negations}] (* FIXME: kwestia osoby przy voc *)
  352 + [{empty_cats with lemma=lemma; pos="ger"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; aspects=aspects; negations=negations}] (* FIXME: kwestia osoby przy voc *)
335 353 | lemma,"pact",[numbers;cases;genders;aspects;negations] ->
336 354 let numbers = expand_numbers numbers in
337 355 let cases = expand_cases cases in
338 356 let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in
339 357 let genders = expand_genders genders in
340   - [{empty_cats with lemma=lemma; pos="pact"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}]
  358 + [{empty_cats with lemma=lemma; pos="pact"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}]
341 359 | lemma,"ppas",[numbers;cases;genders;aspects;negations] ->
342 360 let numbers = expand_numbers numbers in
343 361 let cases = expand_cases cases in
344 362 let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in
345 363 let genders = expand_genders genders in
346   - [{empty_cats with lemma=lemma; pos="ppas"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}]
  364 + [{empty_cats with lemma=lemma; pos="ppas"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}]
347 365 | lemma,"fin",[numbers;persons;aspects] -> (* FIXME: genders bez przymnogich *)
348 366 let numbers = expand_numbers numbers in
349 367 let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in
350   - let cats = {empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=all_genders; persons=persons; negations=["aff"; "neg"]; moods=["indicative"]} in
  368 + let cats = {empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; negations=["aff"; "neg"]; moods=["indicative"]} in
351 369 (Xlist.map aspects (function
352 370 "imperf" -> {cats with aspects=["imperf"]; tenses=["pres"]}
353 371 | "perf" -> {cats with aspects=["perf"]; tenses=["fut"]}
354 372 | _ -> failwith "clarify_categories")) @
355 373 (if persons2 = [] then [] else
356   - [{empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}])
  374 + [{empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}])
357 375 | lemma,"bedzie",[numbers;persons;aspects] ->
358 376 let numbers = expand_numbers numbers in
359 377 let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in
360   - [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] @
  378 + [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] @
361 379 (if persons2 = [] then [] else
362   - [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}])
  380 + [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}])
363 381 | lemma,"praet",[numbers;genders;aspects;nagl] ->
364 382 let numbers = expand_numbers numbers in
365 383 let genders = expand_genders genders in
366   - [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @
  384 + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @
367 385 (if Xlist.mem aspects "imperf" then
368   - [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}]
  386 + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}]
369 387 else [])
370 388 | lemma,"praet",[numbers;genders;aspects] ->
371 389 let numbers = expand_numbers numbers in
372 390 let genders = expand_genders genders in
373   - [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @
  391 + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @
374 392 (if Xlist.mem aspects "imperf" then
375   - [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}]
  393 + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}]
376 394 else [])
377 395 | lemma,"winien",[numbers;genders;aspects] ->
378 396 let numbers = expand_numbers numbers in
379 397 let genders = expand_genders genders in
380   - [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["pres"]};
381   - {empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] @
  398 + [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["pres"]};
  399 + {empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] @
382 400 (if Xlist.mem aspects "imperf" then
383   - [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}]
  401 + [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}]
384 402 else [])
385 403 | lemma,"impt",[numbers;persons;aspects] ->
386 404 let numbers = expand_numbers numbers in
387   - [{empty_cats with lemma=lemma; pos="impt"; pos2="verb"; cat=cat; proj=proj; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]
  405 + [{empty_cats with lemma=lemma; pos="impt"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]
388 406 | lemma,"imps",[aspects] ->
389   - [{empty_cats with lemma=lemma; pos="imps"; pos2="verb"; cat=cat; proj=proj; numbers=all_numbers; genders=all_genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}]
  407 + [{empty_cats with lemma=lemma; pos="imps"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=all_numbers; genders=all_genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}]
390 408 | lemma,"pred",[] -> (* FIXME: czy predykatyw zawsze jest niedokonany? *)
391   - [{empty_cats with lemma=lemma; pos="pred"; pos2="verb"; cat=cat; proj=proj; numbers=["sg"]; genders=[(*"n2"*)"n"]; persons=["ter"]; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["pres";"past";"fut"]}]
  409 + [{empty_cats with lemma=lemma; pos="pred"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=["sg"]; genders=[(*"n2"*)"n"]; persons=["ter"]; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["pres";"past";"fut"]}]
392 410 | lemma,"aglt",[numbers;persons;aspects;wok] ->
393 411 let numbers = expand_numbers numbers in
394   - [{empty_cats with lemma=lemma; pos="aglt"; pos2="verb"; numbers=numbers; persons=persons; aspects=aspects}]
395   - | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; cat=cat; proj=proj; aspects=aspects; negations=["aff"; "neg"]}]
396   - | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="pcon"; pos2="verb"; cat=cat; proj=proj; aspects=aspects; negations=["aff"; "neg"]}]
397   - | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="pant"; pos2="verb"; cat=cat; proj=proj; aspects=aspects; negations=["aff"; "neg"]}]
  412 + [{empty_cats with lemma=lemma; pos="aglt"; pos2="verb"; snode=snode; numbers=numbers; persons=persons; aspects=aspects}]
  413 + | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}]
  414 + | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="pcon"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}]
  415 + | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="pant"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}]
  416 + | lemma,"pacta",[] -> [{empty_cats with lemma=lemma; pos="pacta"; pos2="verb"; cat=cat; coerced=coerced; snode=snode}]
398 417 | lemma,"qub",[] ->
399   - if StringSet.mem part_set lemma then [{empty_cats with lemma=lemma; pos="part"; pos2="qub"}]
400   - else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"}]
401   - | lemma,"comp",[] -> [{empty_cats with lemma=lemma; pos="comp"; pos2="comp"}]
402   - | lemma,"conj",[] -> [{empty_cats with lemma=lemma; pos="conj"; pos2="conj"}]
403   - | lemma,"interj",[] -> [{empty_cats with lemma=lemma; pos="interj"; pos2="interj"}]
404   - | lemma,"sinterj",[] -> [{empty_cats with lemma=lemma; pos="sinterj"; pos2="sinterj"}]
405   - | lemma,"burk",[] -> [{empty_cats with lemma=lemma; pos="burk"; pos2="burk"}]
406   - | ",","interp",[] -> [{empty_cats with lemma=","; pos="conj"; pos2="conj"}]
407   - | lemma,"interp",[] -> [{empty_cats with lemma=lemma; pos="interp"; pos2="interp"}]
  418 + if StringSet.mem part_set lemma then [{empty_cats with lemma=lemma; pos="part"; pos2="qub"; snode=snode}]
  419 + else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"; cat=cat; snode=snode}]
  420 + | lemma,"comp",[] -> [{empty_cats with lemma=lemma; pos="comp"; pos2="comp"; snode=snode}]
  421 + | lemma,"conj",[] -> [{empty_cats with lemma=lemma; pos="conj"; pos2="conj"; snode=snode}]
  422 + | lemma,"interj",[] -> [{empty_cats with lemma=lemma; pos="interj"; pos2="interj"; cat=cat; coerced=coerced; snode=snode}]
  423 + | lemma,"sinterj",[] -> [{empty_cats with lemma=lemma; pos="sinterj"; pos2="sinterj"; (*cat=cat; coerced=coerced;*) snode=snode}]
  424 + | lemma,"burk",[] -> [{empty_cats with lemma=lemma; pos="burk"; pos2="burk"; snode=snode}]
  425 + | ",","interp",[] -> [{empty_cats with lemma=","; pos="conj"; pos2="conj"; snode=snode}]
  426 + | lemma,"interp",[] -> [{empty_cats with lemma=lemma; pos="interp"; pos2="interp"; snode=snode}]
408 427 | lemma,"unk",[] ->
409   - [{empty_cats with lemma=lemma; pos="unk"; pos2="noun"; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}]
  428 + [{empty_cats with lemma=lemma; pos="unk"; pos2="noun"; snode=snode; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}]
410 429 | lemma,"xxx",[] ->
411   - [{empty_cats with lemma=lemma; pos="xxx"; pos2="noun"; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}]
412   - | lemma,"html-tag",[] -> [{empty_cats with lemma=lemma; pos="html-tag"; pos2="html-tag"}]
  430 + [{empty_cats with lemma=lemma; pos="xxx"; pos2="noun"; snode=snode; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}]
  431 + | lemma,"html-tag",[] -> [{empty_cats with lemma=lemma; pos="html-tag"; pos2="html-tag"; snode=snode}]
  432 + | lemma,"list-item",[] -> [{empty_cats with lemma=lemma; pos="list-item"; pos2="list-item"; snode=snode}]
413 433 | lemma,c,l -> failwith ("clarify_categories: " ^ lemma ^ ":" ^ c ^ ":" ^ (String.concat ":" (Xlist.map l (String.concat "."))))
414 434  
415 435 (* FIXME: przenieść gdzieś indziej *)
... ... @@ -421,19 +441,28 @@ let clarify_categories proper cat proj = function
421 441 | _ -> [] *)
422 442  
423 443 let selector_names = StringSet.of_list [
424   - "lemma";"pos";"pos2";"cat";"proj";"number";"case";"gender";"person";"grad";
  444 + "lemma";"pos";"pos2";"cat";"coerced";"role";"irole";"prole";"nrole";"node";"inode";"pnode";"nnode";"number";"case";"gender";"person";"grad";
425 445 "praep";"acm";"aspect";"negation";"mood";"tense";"nsyn";"nsem";"ctype";"mode";"psem";
426   - "inumber";"igender";"iperson";"nperson";"plemma";
  446 + "icat";"inumber";"igender";"iperson";"nperson";"ncat";"plemma";
427 447 "unumber";"ucase";"ugender";"uperson";"amode"]
428 448  
429 449  
430 450 let string_of_selector = function
431 451 Lemma -> "lemma"
  452 + | IncludeLemmata -> "include-lemmata"
432 453 (* | NewLemma -> "newlemma" *)
433 454 | Pos -> "pos"
434 455 | Pos2 -> "pos2"
435 456 | Cat -> "cat"
436   - | Proj -> "proj"
  457 + | Coerced -> "coerced"
  458 + | Role -> "role"
  459 + | Irole -> "irole"
  460 + | Prole -> "prole"
  461 + | Nrole -> "nrole"
  462 + | SNode -> "node"
  463 + | Inode -> "inode"
  464 + | Pnode -> "pnode"
  465 + | Nnode -> "nnode"
437 466 | Number -> "number"
438 467 | Case -> "case"
439 468 | Gender -> "gender"
... ... @@ -450,10 +479,12 @@ let string_of_selector = function
450 479 | Ctype -> "ctype"
451 480 | Mode -> "mode"
452 481 | Psem -> "psem"
  482 + | Icat -> "icat"
453 483 | Inumber -> "inumber"
454 484 | Igender -> "igender"
455 485 | Iperson -> "iperson"
456 486 | Nperson -> "nperson"
  487 + | Ncat -> "ncat"
457 488 | Plemma -> "plemma"
458 489 | Unumber -> "unumber"
459 490 | Ucase -> "ucase"
... ... @@ -468,11 +499,20 @@ let string_of_selectors selectors =
468 499  
469 500 let selector_of_string = function
470 501 "lemma" -> Lemma
  502 + | "include-lemmata" -> IncludeLemmata
471 503 (* | NewLemma -> "newlemma" *)
472 504 | "pos" -> Pos
473 505 | "pos2" -> Pos2
474 506 | "cat" -> Cat
475   - | "proj" -> Proj
  507 + | "coerced" -> Coerced
  508 + | "role" -> Role
  509 + | "irole" -> Irole
  510 + | "prole" -> Prole
  511 + | "nrole" -> Nrole
  512 + | "node" -> SNode
  513 + | "inode" -> Inode
  514 + | "pnode" -> Pnode
  515 + | "nnode" -> Nnode
476 516 | "number" -> Number
477 517 | "case" -> Case
478 518 | "gender" -> Gender
... ... @@ -489,10 +529,12 @@ let selector_of_string = function
489 529 | "ctype" -> Ctype
490 530 | "mode" -> Mode
491 531 | "psem" -> Psem
  532 + | "icat" -> Icat
492 533 | "inumber" -> Inumber
493 534 | "igender" -> Igender
494 535 | "iperson" -> Iperson
495 536 | "nperson" -> Nperson
  537 + | "ncat" -> Ncat
496 538 | "plemma" -> Plemma
497 539 | "unumber" -> Unumber
498 540 | "ucase" -> Ucase
... ... @@ -506,7 +548,9 @@ let match_selector cats = function
506 548 (* | NewLemma -> [] *)
507 549 | Pos -> [cats.pos]
508 550 | Cat -> [cats.cat]
509   - | Proj -> cats.proj
  551 + | Coerced -> cats.coerced
  552 + | Role -> cats.roles
  553 + | SNode -> cats.snode
510 554 | Number -> cats.numbers
511 555 | Case -> cats.cases
512 556 | Gender -> cats.genders
... ... @@ -543,7 +587,9 @@ let set_selector cats vals = function
543 587 | Lemma -> (match vals with [v] -> {cats with lemma=v} | _ -> failwith "set_selector: Lemma")
544 588 | Pos -> (match vals with [v] -> {cats with pos=v} | _ -> failwith "set_selector: Pos")
545 589 | Cat -> (match vals with [v] -> {cats with cat=v} | _ -> failwith "set_selector: Cat")
546   - | Proj -> {cats with proj=vals}
  590 + | Coerced -> {cats with coerced=vals}
  591 + | Role -> {cats with roles=vals}
  592 + | SNode -> {cats with snode=vals}
547 593 | c -> failwith ("set_selector: " ^ string_of_selector c)
548 594  
549 595 let rec apply_selectors cats = function
... ... @@ -558,70 +604,84 @@ let rec apply_selectors cats = function
558 604 apply_selectors (set_selector cats (StringSet.to_list vals) sel) l
559 605  
560 606 let pos_categories = Xlist.fold [
561   - "subst",[Lemma;Cat;Proj;Number;Case;Gender;Person;Nsyn;Nsem;];
562   - "depr",[Lemma;Cat;Proj;Number;Case;Gender;Person;Nsyn;Nsem;];
563   - "ppron12",[Lemma;Number;Case;Gender;Person;];
564   - "ppron3",[Lemma;Number;Case;Gender;Person;Praep;];
565   - "siebie",[Lemma;Number;Case;Gender;Person;];
566   - "prep",[Lemma;Cat;Proj;Psem;Case;];
567   - "compar",[Lemma;Cat;Proj;Case;];
568   - "num",[Lemma;Number;Case;Gender;Person;Acm;Nsem;];
569   - "intnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;];
570   - "realnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;];
571   - "intnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;];
572   - "realnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;];
573   - "symbol",[Lemma;Number;Case;Gender;Person;];
574   - "ordnum",[Lemma;Number;Case;Gender;Grad;];
575   - "date",[Lemma;Nsyn;Nsem;];
576   - "date-interval",[Lemma;Nsyn;Nsem;];
577   - "hour-minute",[Lemma;Nsyn;Nsem;];
578   - "hour",[Lemma;Nsyn;Nsem;];
579   - "hour-minute-interval",[Lemma;Nsyn;Nsem;];
580   - "hour-interval",[Lemma;Nsyn;Nsem;];
581   - "year",[Lemma;Nsyn;Nsem;];
582   - "year-interval",[Lemma;Nsyn;Nsem;];
583   - "day",[Lemma;Nsyn;Nsem;];
584   - "day-interval",[Lemma;Nsyn;Nsem;];
585   - "day-month",[Lemma;Nsyn;Nsem;];
586   - "day-month-interval",[Lemma;Nsyn;Nsem;];
587   - "month-interval",[Lemma;Nsyn;Nsem;];
588   - "roman-ordnum",[Lemma;Number;Case;Gender;Grad;];
589   - "roman",[Lemma;Nsyn;Nsem;];
590   - "roman-interval",[Lemma;Nsyn;Nsem;];
591   - "match-result",[Lemma;Nsyn;Nsem;];
592   - "url",[Lemma;Nsyn;Nsem;];
593   - "email",[Lemma;Nsyn;Nsem;];
594   - "obj-id",[Lemma;Nsyn;Nsem;];
595   - "building-number",[Lemma;Nsyn;Nsem;];
596   - "adj",[Lemma;Cat;Proj;Number;Case;Gender;Grad;];
597   - "adjc",[Lemma;Cat;Proj;Number;Case;Gender;Grad;];
598   - "adjp",[Lemma;Cat;Proj;Number;Case;Gender;Grad;];
599   - "apron",[Lemma;Number;Case;Gender;Grad;];
600   - "adja",[Lemma;Cat;Proj;];
601   - "adv",[Lemma;Cat;Proj;Grad;Mode];(* ctype *)
602   - "ger",[Lemma;(*NewLemma;*)Cat;Proj;Number;Case;Gender;Person;Aspect;Negation;];
603   - "pact",[Lemma;(*NewLemma;*)Cat;Proj;Number;Case;Gender;Aspect;Negation;];
604   - "ppas",[Lemma;(*NewLemma;*)Cat;Proj;Number;Case;Gender;Aspect;Negation;];
605   - "fin",[Lemma;(*NewLemma;*)Cat;Proj;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
606   - "bedzie",[Lemma;(*NewLemma;*)Cat;Proj;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
607   - "praet",[Lemma;(*NewLemma;*)Cat;Proj;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
608   - "winien",[Lemma;(*NewLemma;*)Cat;Proj;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
609   - "impt",[Lemma;(*NewLemma;*)Cat;Proj;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
610   - "imps",[Lemma;(*NewLemma;*)Cat;Proj;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
611   - "pred",[Lemma;(*NewLemma;*)Cat;Proj;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
612   - "aglt",[Lemma;Number;Person;Aspect;];
613   - "inf",[Lemma;(*NewLemma;*)Cat;Proj;Aspect;Negation;];
614   - "pcon",[Lemma;(*NewLemma;*)Cat;Proj;Aspect;Negation;];
615   - "pant",[Lemma;(*NewLemma;*)Cat;Proj;Aspect;Negation;];
616   - "qub",[Lemma;];
617   - "part",[Lemma;];
618   - "comp",[Lemma;];(* ctype *)
619   - "conj",[Lemma;];(* ctype *)
620   - "interj",[Lemma;];
621   - "sinterj",[Lemma;];
622   - "burk",[Lemma;];
623   - "interp",[Lemma;];
624   - "unk",[Lemma;Number;Case;Gender;Person;];
625   - "xxx",[Lemma;Number;Case;Gender;Person;];
626   - "html-tag",[Lemma;];
  607 + "subst",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Person;Nsyn;Nsem;];
  608 + "depr",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Person;Nsyn;Nsem;];
  609 + "ppron12",[Lemma;SNode;Number;Case;Gender;Person;];
  610 + "ppron3",[Lemma;SNode;Number;Case;Gender;Person;Praep;];
  611 + "siebie",[Lemma;SNode;Number;Case;Gender;Person;];
  612 + "prep",[Lemma;Cat;Coerced;Role;SNode;Psem;Case;];
  613 + "compar",[Lemma;Cat;Coerced;Role;SNode;Case;];
  614 + "num",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;];
  615 + "numcomp",[Lemma;SNode];
  616 + "intnum",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;];
  617 + "realnum",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;];
  618 + "intnum-interval",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;];
  619 + "realnum-interval",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;];
  620 + "symbol",[Lemma;SNode;Number;Case;Gender;Person;];
  621 + "ordnum",[Lemma;SNode;Number;Case;Gender;Grad;];
  622 + "date",[Lemma;SNode;Nsyn;Nsem;];
  623 + "date-interval",[Lemma;SNode;Nsyn;Nsem;];
  624 + "hour-minute",[Lemma;SNode;Nsyn;Nsem;];
  625 + "hour",[Lemma;SNode;Nsyn;Nsem;];
  626 + "hour-minute-interval",[Lemma;SNode;Nsyn;Nsem;];
  627 + "hour-interval",[Lemma;SNode;Nsyn;Nsem;];
  628 + "year",[Lemma;SNode;Nsyn;Nsem;];
  629 + "year-interval",[Lemma;SNode;Nsyn;Nsem;];
  630 + "day",[Lemma;SNode;Nsyn;Nsem;];
  631 + "day-interval",[Lemma;SNode;Nsyn;Nsem;];
  632 + "day-month",[Lemma;SNode;Nsyn;Nsem;];
  633 + "day-month-interval",[Lemma;SNode;Nsyn;Nsem;];
  634 + "month-interval",[Lemma;SNode;Nsyn;Nsem;];
  635 + "roman-ordnum",[Lemma;SNode;Number;Case;Gender;Grad;];
  636 + "roman",[Lemma;SNode;Nsyn;Nsem;];
  637 + "roman-interval",[Lemma;SNode;Nsyn;Nsem;];
  638 + "match-result",[Lemma;SNode;Nsyn;Nsem;];
  639 + "url",[Lemma;SNode;Nsyn;Nsem;];
  640 + "email",[Lemma;SNode;Nsyn;Nsem;];
  641 + "phone-number",[Lemma;SNode;Nsyn;Nsem;];
  642 + "postal-code",[Lemma;SNode;Nsyn;Nsem;];
  643 + "obj-id",[Lemma;SNode;Nsyn;Nsem;];
  644 + "building-number",[Lemma;SNode;Nsyn;Nsem;];
  645 + "fixed",[Lemma;SNode;];
  646 + "adj",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Grad;];
  647 + "adjc",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Grad;];
  648 + "adjp",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Grad;];
  649 + "apron",[Lemma;Cat;Role;SNode;Number;Case;Gender;Grad;];
  650 + "adja",[Lemma;Cat;Coerced;Role;SNode;];
  651 + "adv",[Lemma;Cat;Coerced;Role;SNode;Grad;Mode];(* ctype *)
  652 + "ger",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Case;Gender;Person;Aspect;Negation;];
  653 + "pact",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Case;Gender;Aspect;Negation;];
  654 + "ppas",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Case;Gender;Aspect;Negation;];
  655 + "fin",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
  656 + "bedzie",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
  657 + "praet",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
  658 + "winien",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
  659 + "impt",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
  660 + "imps",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
  661 + "pred",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
  662 + "aglt",[Lemma;SNode;Number;Person;Aspect;];
  663 + "inf",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;];
  664 + "pcon",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;];
  665 + "pant",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;];
  666 + "pacta",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;];
  667 + "qub",[Lemma;Cat;Role;SNode;];
  668 + "part",[Lemma;SNode];
  669 + "comp",[Lemma;SNode;];(* ctype *)
  670 + "conj",[Lemma;SNode;];(* ctype *)
  671 + "interj",[Lemma;Cat;Coerced;Role;SNode;];
  672 + "sinterj",[Lemma;Cat;Coerced;Role;SNode;];
  673 + "burk",[Lemma;SNode;];
  674 + "interp",[Lemma;SNode;];
  675 + "unk",[Lemma;SNode;Number;Case;Gender;Person;];
  676 + "xxx",[Lemma;SNode;Number;Case;Gender;Person;];
  677 + "html-tag",[Lemma;SNode;];
  678 + "list-item",[Lemma;SNode;];
627 679 ] StringMap.empty (fun map (k,l) -> StringMap.add map k l)
  680 +
  681 +let string_of_cats cats =
  682 + String.concat ", " (SelectorMap.fold selector_values [] (fun l sel _ ->
  683 + try
  684 + let s = String.concat "|" (match_selector cats sel) in
  685 + if s = "" then l else
  686 + (string_of_selector sel ^ "=" ^ s) :: l
  687 + with _ -> l))
... ...
LCGlexicon/ENIAMcategoriesPL_old.ml 0 → 100644
  1 +(*
  2 + * ENIAM_LCGlexicon is a library that provides LCG lexicon form Polish
  3 + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  5 + *
  6 + * This library is free software: you can redistribute it and/or modify
  7 + * it under the terms of the GNU Lesser General Public License as published by
  8 + * the Free Software Foundation, either version 3 of the License, or
  9 + * (at your option) any later version.
  10 + *
  11 + * This library is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 + * GNU Lesser General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU Lesser General Public License
  17 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18 + *)
  19 +
  20 +open ENIAM_LCGlexiconTypes
  21 +open Xstd
  22 +
  23 +let all_numbers = ["sg";"pl"]
  24 +let all_cases = ["nom";"gen";"dat";"acc";"inst";"loc";"voc"]
  25 +(* let all_genders = ["m1";"m2";"m3";"f";"n1";"n2";"p1";"p2";"p3"] *)
  26 +let all_genders = ["m1";"m2";"m3";"f";"n"]
  27 +let all_persons = ["pri";"sec";"ter"]
  28 +(* FIXME: zamiast wszystkich możliwych wartości można używać Zero gdy nie ma uzgodnienia *)
  29 +
  30 +let selector_values = Xlist.fold [
  31 + Lemma, [];
  32 + Pos, ["subst";"depr";"ppron12";"ppron3";"siebie";"prep";"fixed";"num";"numcomp";"intnum";
  33 + "realnum";"intnum-interval";"realnum-interval";"symbol";"ordnum";
  34 + "date";"date-interval";"hour-minute";"hour";"hour-minute-interval";
  35 + "hour-interval";"year";"year-interval";"day";"day-interval";"day-month";
  36 + "day-month-interval";"month-interval";"roman";"roman-interval";"roman-ordnum";
  37 + "match-result";"url";"email";"phone-number";"postal-code";"obj-id";"building-number";"list-item";"adj";"adjc";"adjp";"adja";
  38 + "adv";"ger";"pact";"ppas";"fin";"bedzie";"praet";"winien";"impt";
  39 + "imps";"pred";"aglt";"inf";"pcon";"pant";"qub";"part";"comp";"conj";"interj";
  40 + "sinterj";"burk";"interp";"xxx";"unk";"html-tag";"apron";"compar"];
  41 + Pos2, [];
  42 + Cat, [];
  43 + Coerced, [];
  44 + Number, all_numbers;
  45 + Case, "postp" :: "pred" :: all_cases;
  46 + Gender, all_genders;
  47 + Person, all_persons;
  48 + Grad, ["pos";"com";"sup"];
  49 + Praep, ["praep";"npraep";"praep-npraep"];
  50 + Acm, ["congr";"rec"];
  51 + Ctype, ["int";"rel";"sub";"coord"];
  52 + Mode, ["abl";"adl";"locat";"perl";"dur";"temp";"mod"];
  53 + Aspect, ["perf";"imperf"];
  54 + Negation, ["neg";"aff"];
  55 + Mood, ["indicative";"imperative";"conditional"];
  56 + Tense, ["past";"pres";"fut"];
  57 + Nsyn, ["proper";"pronoun";"common"];
  58 + Nsem, ["count";"time";"mass";"measure"];
  59 + Psem, ["sem";"nosem"];
  60 + Ucase, all_cases;
  61 +] SelectorMap.empty (fun map (selector,vals) -> SelectorMap.add map selector vals)
  62 +
  63 +
  64 +let expand_numbers numbers =
  65 + if Xlist.mem numbers "_" then all_numbers else numbers
  66 +
  67 +let expand_genders genders =
  68 + if Xlist.mem genders "_" then all_genders else genders
  69 +
  70 +let expand_cases cases =
  71 + if Xlist.mem cases "_" || Xlist.mem cases "$C" then all_cases else cases
  72 +
  73 +let expand_akcs akcs =
  74 + if Xlist.mem akcs "_" then ["akc";"nakc"] else akcs
  75 +
  76 +let split_voc cases =
  77 + Xlist.fold cases ([],[]) (fun (cases,voc) -> function
  78 + "voc" -> cases, "voc" :: voc
  79 + | s -> s :: cases, voc)
  80 +
  81 +let load_subst_data filename _ =
  82 + StringSet.of_list (File.load_lines filename)
  83 +
  84 +let subst_uncountable_lexemes = ref StringSet.empty
  85 +let subst_uncountable_lexemes2 = ref StringSet.empty
  86 +let subst_container_lexemes = ref StringSet.empty
  87 +let subst_numeral_lexemes = ref StringSet.empty
  88 +let subst_time_lexemes = ref StringSet.empty
  89 +
  90 +let subst_pronoun_lexemes = StringSet.of_list ["co"; "kto"; "cokolwiek"; "ktokolwiek"; "nic"; "nikt"; "coś"; "ktoś"; "to"]
  91 +let adj_pronoun_lexemes = StringSet.of_list ["czyj"; "jaki"; "który"; "jakiś"; "ten"; "taki"]
  92 +let compar_lexemes = StringSet.of_list ["jak"; "jako"; "niż"; "niczym"; "niby"; "co"; "zamiast"]
  93 +
  94 +(* let adj_quant_lexemes = StringSet.of_list ["każdy"; "wszelki"; "wszystek"; "żaden"; "jakiś"; "pewien"; "niektóry"; "jedyny"; "sam"] *)
  95 +
  96 +let load_adv_modes filename adv_modes =
  97 + File.fold_tab filename adv_modes (fun adv_modes -> function
  98 + [adv;mode] -> StringMap.add_inc adv_modes adv [mode] (fun l -> mode :: l)
  99 + | _ -> failwith "load_adv_modes")
  100 +
  101 +let load_num_nsems filename num_nsems =
  102 + File.fold_tab filename num_nsems (fun num_nsems -> function
  103 + lemma :: _ :: nsems :: _ ->
  104 + Xlist.fold (Xstring.split "," nsems) num_nsems (fun num_nsems nsem ->
  105 + StringMap.add_inc num_nsems lemma [nsem] (fun l -> nsem :: l))
  106 + | _ -> failwith "load_num_nsems")
  107 +
  108 +let adv_modes = ref (StringMap.empty : string list StringMap.t)
  109 +let num_nsems = ref (StringMap.empty : string list StringMap.t)
  110 +
  111 +let initialize () =
  112 + subst_uncountable_lexemes := File.catch_no_file (load_subst_data subst_uncountable_lexemes_filename) StringSet.empty;
  113 + subst_uncountable_lexemes2 := File.catch_no_file (load_subst_data subst_uncountable_lexemes_filename2) StringSet.empty;
  114 + subst_container_lexemes := File.catch_no_file (load_subst_data subst_container_lexemes_filename) StringSet.empty;
  115 + subst_numeral_lexemes := File.catch_no_file (load_subst_data subst_numeral_lexemes_filename) StringSet.empty;
  116 + subst_time_lexemes := File.catch_no_file (load_subst_data subst_time_lexemes_filename) StringSet.empty;
  117 + adv_modes := File.catch_no_file (load_adv_modes adv_modes_filename) StringMap.empty;
  118 + num_nsems := File.catch_no_file (load_num_nsems num_nsems_filename) StringMap.empty;
  119 + ()
  120 +
  121 +let noun_type proper lemma pos =
  122 + let nsyn =
  123 + if proper then "proper" else
  124 + if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then "pronoun" else
  125 + if pos = "symbol" || pos = "date" || pos = "date-interval" || pos = "hour" || pos = "hour-minute" || pos = "hour-interval" || pos = "hour-minute-interval" ||
  126 + pos = "year" || pos = "year-interval" || pos = "day" || pos = "day-interval" || pos = "day-month" || pos = "day-month-interval" ||
  127 + pos = "match-result" || pos = "month-interval" || pos = "roman" || pos = "roman-interval" || pos = "url" || pos = "email" || pos = "phone-number" || pos = "postal-code" || pos = "obj-id" || pos = "building-number" || pos = "date" then "proper" else
  128 + if StringSet.mem subst_pronoun_lexemes lemma then "pronoun" else
  129 + "common" in
  130 + let nsem =
  131 + if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then ["count"] else
  132 + if StringSet.mem !subst_time_lexemes lemma then ["time"] else
  133 + let l = ["count"] in
  134 + let l = if StringSet.mem !subst_uncountable_lexemes lemma || StringSet.mem !subst_uncountable_lexemes2 lemma then "mass" :: l else l in
  135 + if StringSet.mem !subst_container_lexemes lemma then "measure" :: l else l in
  136 + [nsyn],nsem
  137 +
  138 +let adv_mode lemma =
  139 + try
  140 + StringMap.find !adv_modes lemma
  141 + with Not_found -> ["mod"]
  142 +
  143 +let num_nsem lemma =
  144 + try
  145 + StringMap.find !num_nsems lemma
  146 + with Not_found -> (*try
  147 + StringMap.find !num_nsems (String.lowercase lemma)
  148 + with Not_found ->*) failwith ("num_nsem: " ^ lemma)
  149 +
  150 +
  151 +let part_set = StringSet.of_list ["się"; "nie"; "by"; "niech"; "niechaj"; "niechże"; "niechajże"; "czy"; "gdyby"]
  152 +
  153 +let clarify_categories proper cat coerced = function
  154 + lemma,"subst",[numbers;cases;genders] ->
  155 + let numbers = expand_numbers numbers in
  156 + let cases = expand_cases cases in
  157 + let genders = expand_genders genders in
  158 + let cases,voc = split_voc cases in
  159 + let nsyn,nsem = noun_type proper lemma "subst" in
  160 + (if cases = [] then [] else
  161 + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @
  162 + (if voc = [] then [] else
  163 + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}])
  164 + | lemma,"subst",[numbers;cases;genders;_] ->
  165 + let numbers = expand_numbers numbers in
  166 + let cases = expand_cases cases in
  167 + let genders = expand_genders genders in
  168 + let cases,voc = split_voc cases in
  169 + let nsyn,nsem = noun_type proper lemma "subst" in
  170 + (if cases = [] then [] else
  171 + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @
  172 + (if voc = [] then [] else
  173 + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}])
  174 + | lemma,"depr",[numbers;cases;genders] ->
  175 + let numbers = expand_numbers numbers in
  176 + let cases = expand_cases cases in
  177 + let genders = expand_genders genders in
  178 + let cases,voc = split_voc cases in
  179 + let nsyn,nsem = noun_type proper lemma "depr" in
  180 + (if cases = [] then [] else
  181 + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @
  182 + (if voc = [] then [] else
  183 + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}])
  184 + | lemma,"ppron12",[numbers;cases;genders;persons] ->
  185 + let numbers = expand_numbers numbers in
  186 + let cases = expand_cases cases in
  187 + let genders = expand_genders genders in
  188 + [{empty_cats with lemma=lemma; pos="ppron12"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons}]
  189 + | lemma,"ppron12",[numbers;cases;genders;persons;akcs] ->
  190 + let numbers = expand_numbers numbers in
  191 + let cases = expand_cases cases in
  192 + let genders = expand_genders genders in
  193 + [{empty_cats with lemma=lemma; pos="ppron12"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons}]
  194 + | lemma,"ppron3",[numbers;cases;genders;persons] ->
  195 + let numbers = expand_numbers numbers in
  196 + let cases = expand_cases cases in
  197 + let genders = expand_genders genders in
  198 + [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=["praep-npraep"]}]
  199 + | lemma,"ppron3",[numbers;cases;genders;persons;akcs] ->
  200 + let numbers = expand_numbers numbers in
  201 + let cases = expand_cases cases in
  202 + let genders = expand_genders genders in
  203 + [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=["praep-npraep"]}]
  204 + | lemma,"ppron3",[numbers;cases;genders;persons;akcs;praep] ->
  205 + let numbers = expand_numbers numbers in
  206 + let cases = expand_cases cases in
  207 + let genders = expand_genders genders in
  208 + let praep = match praep with
  209 + ["praep";"npraep"] -> ["praep-npraep"]
  210 + | ["npraep";"praep"] -> ["praep-npraep"]
  211 + | _ -> praep in
  212 + [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=praep}]
  213 + | lemma,"siebie",[cases] -> (* FIXME: czy tu określać numbers genders persons? *)
  214 + let cases = expand_cases cases in
  215 + [{empty_cats with lemma=lemma; pos="siebie"; pos2="pron"; numbers=all_numbers; cases=cases; genders=all_genders; persons=["ter"]}]
  216 + | lemma,"prep",[cases;woks] ->
  217 + if StringSet.mem compar_lexemes lemma then
  218 + [{empty_cats with lemma=lemma; pos="compar"; pos2="prep"}] else
  219 + let cases = expand_cases cases in
  220 + [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; cases=cases; psem=["sem";"nosem"]}]
  221 + | lemma,"prep",[cases] ->
  222 + if StringSet.mem compar_lexemes lemma then
  223 + [{empty_cats with lemma=lemma; pos="compar"; pos2="prep"}] else
  224 + let cases = expand_cases cases in
  225 + [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; cases=cases; psem=["sem";"nosem"]}]
  226 + | lemma,"num",[numbers;cases;genders;acms] ->
  227 + let numbers = expand_numbers numbers in
  228 + let cases = expand_cases cases in
  229 + let genders = expand_genders genders in
  230 + let nsem = num_nsem lemma in
  231 + [{empty_cats with lemma=lemma; pos="num"; pos2="num"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms; nsem=nsem}]
  232 + | lemma,"num",[numbers;cases;genders;acms;_] ->
  233 + let numbers = expand_numbers numbers in
  234 + let cases = expand_cases cases in
  235 + let genders = expand_genders genders in
  236 + let nsem = num_nsem lemma in
  237 + [{empty_cats with lemma=lemma; pos="num"; pos2="num"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms; nsem=nsem}]
  238 + | lemma,"numcomp",[] -> [{empty_cats with lemma=lemma; pos="numcomp"; pos2="numcomp"}]
  239 + | lemma,"intnum",[] ->
  240 + let numbers,acms =
  241 + if lemma = "1" || lemma = "-1" then ["sg"],["congr"] else
  242 + let s = String.get lemma (String.length lemma - 1) in
  243 + ["pl"],if s = '2' || s = '3' || s = '4' then ["rec";"congr"] else ["rec"] in
  244 + [{empty_cats with lemma=lemma; pos="intnum"; pos2="num"; numbers=numbers; cases=all_cases; genders=all_genders; persons=["ter"]; acms=acms; nsem=["count"]}]
  245 + | lemma,"realnum",[] ->
  246 + [{empty_cats with lemma=lemma; pos="realnum"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}]
  247 + | lemma,"intnum-interval",[] ->
  248 + [{empty_cats with lemma=lemma; pos="intnum-interval"; pos2="num"; numbers=["pl"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec";"congr"]; nsem=["count"]}]
  249 + | lemma,"realnum-interval",[] ->
  250 + [{empty_cats with lemma=lemma; pos="realnum-interval"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}]
  251 + | lemma,"symbol",[] ->
  252 + [{empty_cats with lemma=lemma; pos="symbol"; pos2="noun"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]}]
  253 + | lemma,"ordnum",[] ->
  254 + [{empty_cats with lemma=lemma; pos="ordnum"; pos2="adj"; numbers=all_numbers; cases=all_cases; genders=all_genders; grads=["pos"]}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *)
  255 + | lemma,"date",[] ->
  256 + let nsyn,nsem = noun_type proper lemma "date" in
  257 + [{empty_cats with lemma=lemma; pos="date"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  258 + | lemma,"date-interval",[] ->
  259 + let nsyn,nsem = noun_type proper lemma "date-interval" in
  260 + [{empty_cats with lemma=lemma; pos="date-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  261 + | lemma,"hour-minute",[] ->
  262 + let nsyn,nsem = noun_type proper lemma "hour-minute" in
  263 + [{empty_cats with lemma=lemma; pos="hour-minute"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  264 + | lemma,"hour",[] ->
  265 + let nsyn,nsem = noun_type proper lemma "hour" in
  266 + [{empty_cats with lemma=lemma; pos="hour"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  267 + | lemma,"hour-minute-interval",[] ->
  268 + let nsyn,nsem = noun_type proper lemma "hour-minute-interval" in
  269 + [{empty_cats with lemma=lemma; pos="hour-minute-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  270 + | lemma,"hour-interval",[] ->
  271 + let nsyn,nsem = noun_type proper lemma "hour-interval" in
  272 + [{empty_cats with lemma=lemma; pos="hour-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  273 + | lemma,"year",[] ->
  274 + let nsyn,nsem = noun_type proper lemma "year" in
  275 + [{empty_cats with lemma=lemma; pos="year"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  276 + | lemma,"year-interval",[] ->
  277 + let nsyn,nsem = noun_type proper lemma "year-interval" in
  278 + [{empty_cats with lemma=lemma; pos="year-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  279 + | lemma,"day",[] ->
  280 + let nsyn,nsem = noun_type proper lemma "day" in
  281 + [{empty_cats with lemma=lemma; pos="day"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  282 + | lemma,"day-interval",[] ->
  283 + let nsyn,nsem = noun_type proper lemma "day-interval" in
  284 + [{empty_cats with lemma=lemma; pos="day-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  285 + | lemma,"day-month",[] ->
  286 + let nsyn,nsem = noun_type proper lemma "day-month" in
  287 + [{empty_cats with lemma=lemma; pos="day-month"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  288 + | lemma,"day-month-interval",[] ->
  289 + let nsyn,nsem = noun_type proper lemma "day-month-interval" in
  290 + [{empty_cats with lemma=lemma; pos="day-month-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  291 + | lemma,"month-interval",[] ->
  292 + let nsyn,nsem = noun_type proper lemma "month-interval" in
  293 + [{empty_cats with lemma=lemma; pos="month-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  294 + | lemma,"roman",[] ->
  295 + let nsyn,nsem = noun_type proper lemma "roman" in
  296 + [{empty_cats with lemma=lemma; pos="roman-ordnum"; pos2="adj"; numbers=all_numbers; cases=all_cases; genders=all_genders; grads=["pos"]};
  297 + {empty_cats with lemma=lemma; pos="roman"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  298 + | lemma,"roman-interval",[] ->
  299 + let nsyn,nsem = noun_type proper lemma "roman-interval" in
  300 + [{empty_cats with lemma=lemma; pos="roman-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  301 + | lemma,"match-result",[] ->
  302 + let nsyn,nsem = noun_type proper lemma "match-result" in
  303 + [{empty_cats with lemma=lemma; pos="match-result"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  304 + | lemma,"url",[] ->
  305 + let nsyn,nsem = noun_type proper lemma "url" in
  306 + [{empty_cats with lemma=lemma; pos="url"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  307 + | lemma,"email",[] ->
  308 + let nsyn,nsem = noun_type proper lemma "email" in
  309 + [{empty_cats with lemma=lemma; pos="email"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  310 + | lemma,"phone-number",[] ->
  311 + let nsyn,nsem = noun_type proper lemma "phone-number" in
  312 + [{empty_cats with lemma=lemma; pos="phone-number"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  313 + | lemma,"postal-code",[] ->
  314 + let nsyn,nsem = noun_type proper lemma "postal-code" in
  315 + [{empty_cats with lemma=lemma; pos="postal-code"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  316 + | lemma,"obj-id",[] ->
  317 + let nsyn,nsem = noun_type proper lemma "obj-id" in
  318 + [{empty_cats with lemma=lemma; pos="obj-id"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  319 + | lemma,"building-number",[] ->
  320 + let nsyn,nsem = noun_type proper lemma "building-number" in
  321 + [{empty_cats with lemma=lemma; pos="building-number"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  322 + | lemma,"fixed",[] -> [{empty_cats with lemma=lemma; pos="fixed"; pos2="fixed"}]
  323 + | lemma,"adj",[numbers;cases;genders;grads] -> (* FIXME: adjsyn *)
  324 + let numbers = expand_numbers numbers in
  325 + let cases = expand_cases cases in
  326 + let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in
  327 + let genders = expand_genders genders in
  328 + let pos,pos2 = if StringSet.mem adj_pronoun_lexemes lemma then "apron","pron" else "adj","adj" in
  329 + [{empty_cats with lemma=lemma; pos=pos; pos2=pos2; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; grads=grads}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *)
  330 + | lemma,"adjc",[] ->
  331 + [{empty_cats with lemma=lemma; pos="adjc"; pos2="adj"; cat=cat; coerced=coerced; numbers=["sg"]; cases=["pred"]; genders=["m1";"m2";"m3"]; grads=["pos"]}]
  332 + | lemma,"adjp",[] ->
  333 + [{empty_cats with lemma=lemma; pos="adjp"; pos2="adj"; cat=cat; coerced=coerced; numbers=all_numbers; cases=["postp"]; genders=all_genders; grads=["pos"]}]
  334 + | lemma,"adja",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; pos="adja"; pos2="adja"}]
  335 + | lemma,"adv",[grads] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; pos="adv"; pos2="adv"; grads=grads; modes=adv_mode lemma}]
  336 + | lemma,"adv",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; pos="adv"; pos2="adv"; grads=["pos"]; modes=adv_mode lemma}]
  337 + | lemma,"ger",[numbers;cases;genders;aspects;negations] ->
  338 + let numbers = expand_numbers numbers in
  339 + let cases = expand_cases cases in
  340 + let genders = expand_genders genders in
  341 + [{empty_cats with lemma=lemma; pos="ger"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; aspects=aspects; negations=negations}] (* FIXME: kwestia osoby przy voc *)
  342 + | lemma,"pact",[numbers;cases;genders;aspects;negations] ->
  343 + let numbers = expand_numbers numbers in
  344 + let cases = expand_cases cases in
  345 + let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in
  346 + let genders = expand_genders genders in
  347 + [{empty_cats with lemma=lemma; pos="pact"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}]
  348 + | lemma,"ppas",[numbers;cases;genders;aspects;negations] ->
  349 + let numbers = expand_numbers numbers in
  350 + let cases = expand_cases cases in
  351 + let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in
  352 + let genders = expand_genders genders in
  353 + [{empty_cats with lemma=lemma; pos="ppas"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}]
  354 + | lemma,"fin",[numbers;persons;aspects] -> (* FIXME: genders bez przymnogich *)
  355 + let numbers = expand_numbers numbers in
  356 + let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in
  357 + let cats = {empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; negations=["aff"; "neg"]; moods=["indicative"]} in
  358 + (Xlist.map aspects (function
  359 + "imperf" -> {cats with aspects=["imperf"]; tenses=["pres"]}
  360 + | "perf" -> {cats with aspects=["perf"]; tenses=["fut"]}
  361 + | _ -> failwith "clarify_categories")) @
  362 + (if persons2 = [] then [] else
  363 + [{empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}])
  364 + | lemma,"bedzie",[numbers;persons;aspects] ->
  365 + let numbers = expand_numbers numbers in
  366 + let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in
  367 + [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] @
  368 + (if persons2 = [] then [] else
  369 + [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}])
  370 + | lemma,"praet",[numbers;genders;aspects;nagl] ->
  371 + let numbers = expand_numbers numbers in
  372 + let genders = expand_genders genders in
  373 + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @
  374 + (if Xlist.mem aspects "imperf" then
  375 + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}]
  376 + else [])
  377 + | lemma,"praet",[numbers;genders;aspects] ->
  378 + let numbers = expand_numbers numbers in
  379 + let genders = expand_genders genders in
  380 + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @
  381 + (if Xlist.mem aspects "imperf" then
  382 + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}]
  383 + else [])
  384 + | lemma,"winien",[numbers;genders;aspects] ->
  385 + let numbers = expand_numbers numbers in
  386 + let genders = expand_genders genders in
  387 + [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["pres"]};
  388 + {empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] @
  389 + (if Xlist.mem aspects "imperf" then
  390 + [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}]
  391 + else [])
  392 + | lemma,"impt",[numbers;persons;aspects] ->
  393 + let numbers = expand_numbers numbers in
  394 + [{empty_cats with lemma=lemma; pos="impt"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]
  395 + | lemma,"imps",[aspects] ->
  396 + [{empty_cats with lemma=lemma; pos="imps"; pos2="verb"; cat=cat; coerced=coerced; numbers=all_numbers; genders=all_genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}]
  397 + | lemma,"pred",[] -> (* FIXME: czy predykatyw zawsze jest niedokonany? *)
  398 + [{empty_cats with lemma=lemma; pos="pred"; pos2="verb"; cat=cat; coerced=coerced; numbers=["sg"]; genders=[(*"n2"*)"n"]; persons=["ter"]; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["pres";"past";"fut"]}]
  399 + | lemma,"aglt",[numbers;persons;aspects;wok] ->
  400 + let numbers = expand_numbers numbers in
  401 + [{empty_cats with lemma=lemma; pos="aglt"; pos2="verb"; numbers=numbers; persons=persons; aspects=aspects}]
  402 + | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; cat=cat; coerced=coerced; aspects=aspects; negations=["aff"; "neg"]}]
  403 + | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="pcon"; pos2="verb"; cat=cat; coerced=coerced; aspects=aspects; negations=["aff"; "neg"]}]
  404 + | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="pant"; pos2="verb"; cat=cat; coerced=coerced; aspects=aspects; negations=["aff"; "neg"]}]
  405 + | lemma,"qub",[] ->
  406 + if StringSet.mem part_set lemma then [{empty_cats with lemma=lemma; pos="part"; pos2="qub"}]
  407 + else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"; cat=cat}]
  408 + | lemma,"comp",[] -> [{empty_cats with lemma=lemma; pos="comp"; pos2="comp"}]
  409 + | lemma,"conj",[] -> [{empty_cats with lemma=lemma; pos="conj"; pos2="conj"}]
  410 + | lemma,"interj",[] -> [{empty_cats with lemma=lemma; pos="interj"; pos2="interj"; cat=cat; coerced=coerced}]
  411 + | lemma,"sinterj",[] -> [{empty_cats with lemma=lemma; pos="sinterj"; pos2="sinterj"; (*cat=cat; coerced=coerced*)}]
  412 + | lemma,"burk",[] -> [{empty_cats with lemma=lemma; pos="burk"; pos2="burk"}]
  413 + | ",","interp",[] -> [{empty_cats with lemma=","; pos="conj"; pos2="conj"}]
  414 + | lemma,"interp",[] -> [{empty_cats with lemma=lemma; pos="interp"; pos2="interp"}]
  415 + | lemma,"unk",[] ->
  416 + [{empty_cats with lemma=lemma; pos="unk"; pos2="noun"; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}]
  417 + | lemma,"xxx",[] ->
  418 + [{empty_cats with lemma=lemma; pos="xxx"; pos2="noun"; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}]
  419 + | lemma,"html-tag",[] -> [{empty_cats with lemma=lemma; pos="html-tag"; pos2="html-tag"}]
  420 + | lemma,"list-item",[] -> [{empty_cats with lemma=lemma; pos="list-item"; pos2="list-item"}]
  421 + | lemma,c,l -> failwith ("clarify_categories: " ^ lemma ^ ":" ^ c ^ ":" ^ (String.concat ":" (Xlist.map l (String.concat "."))))
  422 +
  423 +(* FIXME: przenieść gdzieś indziej *)
  424 +(* let assign token =
  425 + match token.ENIAMtokenizerTypes.token with
  426 + ENIAMtokenizerTypes.Lemma(lemma,pos,interp) -> List.flatten (Xlist.map interp (fun interp -> clarify_categories false (lemma,pos,interp)))
  427 + | ENIAMtokenizerTypes.Proper(lemma,pos,interp,_) -> List.flatten (Xlist.map interp (fun interp -> clarify_categories true (lemma,pos,interp)))
  428 + | ENIAMtokenizerTypes.Interp lemma -> clarify_categories false (lemma,"interp",[])
  429 + | _ -> [] *)
  430 +
  431 +let selector_names = StringSet.of_list [
  432 + "lemma";"pos";"pos2";"cat";"coerced";"number";"case";"gender";"person";"grad";
  433 + "praep";"acm";"aspect";"negation";"mood";"tense";"nsyn";"nsem";"ctype";"mode";"psem";
  434 + "icat";"inumber";"igender";"iperson";"nperson";"ncat";"plemma";
  435 + "unumber";"ucase";"ugender";"uperson";"amode"]
  436 +
  437 +
  438 +let string_of_selector = function
  439 + Lemma -> "lemma"
  440 + (* | NewLemma -> "newlemma" *)
  441 + | Pos -> "pos"
  442 + | Pos2 -> "pos2"
  443 + | Cat -> "cat"
  444 + | Coerced -> "coerced"
  445 + | Number -> "number"
  446 + | Case -> "case"
  447 + | Gender -> "gender"
  448 + | Person -> "person"
  449 + | Grad -> "grad"
  450 + | Praep -> "praep"
  451 + | Acm -> "acm"
  452 + | Aspect -> "aspect"
  453 + | Negation -> "negation"
  454 + | Mood -> "mood"
  455 + | Tense -> "tense"
  456 + | Nsyn -> "nsyn"
  457 + | Nsem -> "nsem"
  458 + | Ctype -> "ctype"
  459 + | Mode -> "mode"
  460 + | Psem -> "psem"
  461 + | Icat -> "icat"
  462 + | Inumber -> "inumber"
  463 + | Igender -> "igender"
  464 + | Iperson -> "iperson"
  465 + | Nperson -> "nperson"
  466 + | Ncat -> "ncat"
  467 + | Plemma -> "plemma"
  468 + | Unumber -> "unumber"
  469 + | Ucase -> "ucase"
  470 + | Ugender -> "ugender"
  471 + | Uperson -> "uperson"
  472 + | Amode -> "amode"
  473 +
  474 +let string_of_selectors selectors =
  475 + String.concat ", " (Xlist.map selectors (fun (cat,rel,l) ->
  476 + let rel = if rel = Eq then "=" else "!=" in
  477 + string_of_selector cat ^ rel ^ (String.concat "|" l)))
  478 +
  479 +let selector_of_string = function
  480 + "lemma" -> Lemma
  481 + (* | NewLemma -> "newlemma" *)
  482 + | "pos" -> Pos
  483 + | "pos2" -> Pos2
  484 + | "cat" -> Cat
  485 + | "coerced" -> Coerced
  486 + | "number" -> Number
  487 + | "case" -> Case
  488 + | "gender" -> Gender
  489 + | "person" -> Person
  490 + | "grad" -> Grad
  491 + | "praep" -> Praep
  492 + | "acm" -> Acm
  493 + | "aspect" -> Aspect
  494 + | "negation" -> Negation
  495 + | "mood" -> Mood
  496 + | "tense" -> Tense
  497 + | "nsyn" -> Nsyn
  498 + | "nsem" -> Nsem
  499 + | "ctype" -> Ctype
  500 + | "mode" -> Mode
  501 + | "psem" -> Psem
  502 + | "icat" -> Icat
  503 + | "inumber" -> Inumber
  504 + | "igender" -> Igender
  505 + | "iperson" -> Iperson
  506 + | "nperson" -> Nperson
  507 + | "ncat" -> Ncat
  508 + | "plemma" -> Plemma
  509 + | "unumber" -> Unumber
  510 + | "ucase" -> Ucase
  511 + | "ugender" -> Ugender
  512 + | "uperson" -> Uperson
  513 + | "amode" -> Amode
  514 + | s -> failwith ("selector_of_string: " ^ s)
  515 +
  516 +let match_selector cats = function
  517 + Lemma -> [cats.lemma]
  518 +(* | NewLemma -> [] *)
  519 + | Pos -> [cats.pos]
  520 + | Cat -> [cats.cat]
  521 + | Coerced -> cats.coerced
  522 + | Number -> cats.numbers
  523 + | Case -> cats.cases
  524 + | Gender -> cats.genders
  525 + | Person -> cats.persons
  526 + | Grad -> cats.grads
  527 + | Praep -> cats.praeps
  528 + | Acm -> cats.acms
  529 + | Aspect -> cats.aspects
  530 + | Negation -> cats.negations
  531 + | Mood -> cats.moods
  532 + | Tense -> cats.tenses
  533 + | Nsyn -> cats.nsyn
  534 + | Nsem -> cats.nsem
  535 + | Mode -> cats.modes
  536 + | Psem -> cats.psem
  537 + | c -> failwith ("match_selector: " ^ string_of_selector c)
  538 +
  539 +let set_selector cats vals = function
  540 + Number -> {cats with numbers=vals}
  541 + | Case -> {cats with cases=vals}
  542 + | Gender -> {cats with genders=vals}
  543 + | Person -> {cats with persons=vals}
  544 + | Grad -> {cats with grads=vals}
  545 + | Praep -> {cats with praeps=vals}
  546 + | Acm -> {cats with acms=vals}
  547 + | Aspect -> {cats with aspects=vals}
  548 + | Negation -> {cats with negations=vals}
  549 + | Mood -> {cats with moods=vals}
  550 + | Tense -> {cats with tenses=vals}
  551 + | Nsyn -> {cats with nsyn=vals}
  552 + | Nsem -> {cats with nsem=vals}
  553 + | Mode -> {cats with modes=vals}
  554 + | Psem -> {cats with psem=vals}
  555 + | Lemma -> (match vals with [v] -> {cats with lemma=v} | _ -> failwith "set_selector: Lemma")
  556 + | Pos -> (match vals with [v] -> {cats with pos=v} | _ -> failwith "set_selector: Pos")
  557 + | Cat -> (match vals with [v] -> {cats with cat=v} | _ -> failwith "set_selector: Cat")
  558 + | Coerced -> {cats with coerced=vals}
  559 + | c -> failwith ("set_selector: " ^ string_of_selector c)
  560 +
  561 +let rec apply_selectors cats = function
  562 + [] -> cats
  563 + | (sel,Eq,vals) :: l ->
  564 + let vals = StringSet.intersection (StringSet.of_list (match_selector cats sel)) (StringSet.of_list vals) in
  565 + if StringSet.is_empty vals then raise Not_found else
  566 + apply_selectors (set_selector cats (StringSet.to_list vals) sel) l
  567 + | (sel,Neq,vals) :: l ->
  568 + let vals = StringSet.difference (StringSet.of_list (match_selector cats sel)) (StringSet.of_list vals) in
  569 + if StringSet.is_empty vals then raise Not_found else
  570 + apply_selectors (set_selector cats (StringSet.to_list vals) sel) l
  571 +
  572 +let pos_categories = Xlist.fold [
  573 + "subst",[Lemma;Cat;Coerced;Number;Case;Gender;Person;Nsyn;Nsem;];
  574 + "depr",[Lemma;Cat;Coerced;Number;Case;Gender;Person;Nsyn;Nsem;];
  575 + "ppron12",[Lemma;Number;Case;Gender;Person;];
  576 + "ppron3",[Lemma;Number;Case;Gender;Person;Praep;];
  577 + "siebie",[Lemma;Number;Case;Gender;Person;];
  578 + "prep",[Lemma;Cat;Coerced;Psem;Case;];
  579 + "compar",[Lemma;Cat;Coerced;Case;];
  580 + "num",[Lemma;Number;Case;Gender;Person;Acm;Nsem;];
  581 + "numcomp",[Lemma];
  582 + "intnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;];
  583 + "realnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;];
  584 + "intnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;];
  585 + "realnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;];
  586 + "symbol",[Lemma;Number;Case;Gender;Person;];
  587 + "ordnum",[Lemma;Number;Case;Gender;Grad;];
  588 + "date",[Lemma;Nsyn;Nsem;];
  589 + "date-interval",[Lemma;Nsyn;Nsem;];
  590 + "hour-minute",[Lemma;Nsyn;Nsem;];
  591 + "hour",[Lemma;Nsyn;Nsem;];
  592 + "hour-minute-interval",[Lemma;Nsyn;Nsem;];
  593 + "hour-interval",[Lemma;Nsyn;Nsem;];
  594 + "year",[Lemma;Nsyn;Nsem;];
  595 + "year-interval",[Lemma;Nsyn;Nsem;];
  596 + "day",[Lemma;Nsyn;Nsem;];
  597 + "day-interval",[Lemma;Nsyn;Nsem;];
  598 + "day-month",[Lemma;Nsyn;Nsem;];
  599 + "day-month-interval",[Lemma;Nsyn;Nsem;];
  600 + "month-interval",[Lemma;Nsyn;Nsem;];
  601 + "roman-ordnum",[Lemma;Number;Case;Gender;Grad;];
  602 + "roman",[Lemma;Nsyn;Nsem;];
  603 + "roman-interval",[Lemma;Nsyn;Nsem;];
  604 + "match-result",[Lemma;Nsyn;Nsem;];
  605 + "url",[Lemma;Nsyn;Nsem;];
  606 + "email",[Lemma;Nsyn;Nsem;];
  607 + "phone-number",[Lemma;Nsyn;Nsem;];
  608 + "postal-code",[Lemma;Nsyn;Nsem;];
  609 + "obj-id",[Lemma;Nsyn;Nsem;];
  610 + "building-number",[Lemma;Nsyn;Nsem;];
  611 + "fixed",[Lemma;];
  612 + "adj",[Lemma;Cat;Coerced;Number;Case;Gender;Grad;];
  613 + "adjc",[Lemma;Cat;Coerced;Number;Case;Gender;Grad;];
  614 + "adjp",[Lemma;Cat;Coerced;Number;Case;Gender;Grad;];
  615 + "apron",[Lemma;Number;Case;Gender;Grad;];
  616 + "adja",[Lemma;Cat;Coerced;];
  617 + "adv",[Lemma;Cat;Coerced;Grad;Mode];(* ctype *)
  618 + "ger",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Case;Gender;Person;Aspect;Negation;];
  619 + "pact",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Case;Gender;Aspect;Negation;];
  620 + "ppas",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Case;Gender;Aspect;Negation;];
  621 + "fin",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
  622 + "bedzie",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
  623 + "praet",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
  624 + "winien",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
  625 + "impt",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
  626 + "imps",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
  627 + "pred",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
  628 + "aglt",[Lemma;Number;Person;Aspect;];
  629 + "inf",[Lemma;(*NewLemma;*)Cat;Coerced;Aspect;Negation;];
  630 + "pcon",[Lemma;(*NewLemma;*)Cat;Coerced;Aspect;Negation;];
  631 + "pant",[Lemma;(*NewLemma;*)Cat;Coerced;Aspect;Negation;];
  632 + "qub",[Lemma;Cat;];
  633 + "part",[Lemma;];
  634 + "comp",[Lemma;];(* ctype *)
  635 + "conj",[Lemma;];(* ctype *)
  636 + "interj",[Lemma;Cat;Coerced;];
  637 + "sinterj",[Lemma;];
  638 + "burk",[Lemma;];
  639 + "interp",[Lemma;];
  640 + "unk",[Lemma;Number;Case;Gender;Person;];
  641 + "xxx",[Lemma;Number;Case;Gender;Person;];
  642 + "html-tag",[Lemma;];
  643 + "list-item",[Lemma;];
  644 + ] StringMap.empty (fun map (k,l) -> StringMap.add map k l)
... ...
LCGlexicon/resources/lexicon-pl.dic
1 1 @PHRASE_NAMES
2 2 lex infp np prepnp adjp ip cp ncp advp padvp
3   - adja prepadjp comprepnp compar measure num aglt aux-fut
  3 + adja prepadjp comprepnp comparp measure num aglt aux-fut
4 4 aux-past aux-imp qub interj hyphen int
5 5 rparen rparen2 rquot rquot2 rquot3 inclusion
6 6 day-interval day-lex day-month-interval date-interval
... ... @@ -153,8 +153,8 @@ lemma=w,pos=prep,case=loc: prepnp*lemma*case{\(1+advp*T),/(day-month+day+ye
153 153  
154 154 # komparatywy
155 155 # FIXME: trzeba poprawić comparnp i comparpp w walencji
156   -pos=compar: QUANT[case=nom&gen&dat&acc&inst] compar*lemma*case{\(1+advp*T),/(np*T*case*T*T+adjp*T*case*T)}{\(1+qub),/(1+inclusion)};
157   -pos=compar: QUANT[case=postp] compar*lemma*case{\(1+advp*T),/(prepnp*T*T+prepadjp*T*T)}{\(1+qub),/(1+inclusion)};
  156 +pos=compar: QUANT[case=nom&gen&dat&acc&inst] comparp*lemma*case{\(1+advp*T),/(np*T*case*T*T+adjp*T*case*T)}{\(1+qub),/(1+inclusion)};
  157 +pos=compar: QUANT[case=postp] comparp*lemma*case{\(1+advp*T),/(prepnp*T*T+prepadjp*T*T)}{\(1+qub),/(1+inclusion)};
158 158  
159 159 # frazy przymiotnikowe
160 160 # FIXME: let grad = match grads with [grad] -> grad | _ -> failwith "make_adjp: grad" in
... ...
LCGlexicon/resources/subst_container.dat
... ... @@ -488,3 +488,4 @@ mrowie
488 488 rodzaj
489 489 rozdział
490 490 gmach
  491 +zakres
... ...
LCGparser/ENIAM_LCG_XMLof.ml
... ... @@ -59,4 +59,3 @@ let linear_term_array a =
59 59 let l = Int.fold 0 (Array.length a - 1) [] (fun l i ->
60 60 Xml.Element("element",["index",string_of_int i],[linear_term a.(i)]) :: l) in
61 61 Xml.Element("array",[],List.rev l)
62   -
... ...
LCGparser/ENIAM_LCGgraphOf.ml
... ... @@ -102,7 +102,8 @@ let rec print_simplified_dependency_tree_rec2 file edge upper = function
102 102  
103 103 let rec print_simplified_dependency_tree_rec file edge upper id = function
104 104 Node t ->
105   - fprintf file " %s [label=\"%s\\n%s:%s\\n%s\\n%f\"]\n" id (escape_string t.orth) (escape_string t.lemma) t.pos (escape_string (ENIAM_LCGstringOf.linear_term 0 t.symbol)) t.weight;
  105 + (* fprintf file " %s [label=\"%s\\n%s:%s\\n%s\\n%f\"]\n" id (escape_string t.orth) (escape_string t.lemma) t.pos (escape_string (ENIAM_LCGstringOf.linear_term 0 t.symbol)) t.weight; *)
  106 + fprintf file " %s [label=\"%s\\n%s:%s\\n%s\"]\n" id (escape_string t.orth) (escape_string t.lemma) t.pos (escape_string (ENIAM_LCGstringOf.linear_term 0 t.symbol));
106 107 print_edge file edge upper id;
107 108 print_simplified_dependency_tree_rec2 file "" id t.args
108 109 | Variant(e,l) ->
... ...
LCGparser/ENIAM_LCGlatexOf.ml
... ... @@ -213,7 +213,7 @@ let chart page text_fragments g =
213 213 String.concat "" (List.rev (IntMap.fold layers [] (fun l layer nodes ->
214 214 IntMap.fold nodes l (fun l node1 contents ->
215 215 Xlist.fold contents l (fun l (node2,symbol,sem) ->
216   - let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in
  216 + let s = try Xlatex.escape_string (IntMap.find text_fragments.(node1) node2) with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in
217 217 (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$ & $%s$\\\\\n\\hline\n" layer node1 node2 s symbol sem) :: l))))) ^
218 218 "\\end{longtable}"
219 219  
... ... @@ -221,7 +221,7 @@ let chart2 page text_fragments g =
221 221 let n = match page with "a4" -> "4" | "a1" -> "10" | _ -> "6" in
222 222 "\\begin{longtable}{|l|p{" ^ n ^ "cm}|l|}\n\\hline\n" ^
223 223 String.concat "" (List.rev (ENIAM_LCGchart.fold g [] (fun l (symbol,node1,node2,sem,layer) ->
224   - let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in
  224 + let s = try Xlatex.escape_string (IntMap.find text_fragments.(node1) node2) with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in
225 225 (Printf.sprintf "%d--%d & %s & $\\begin{array}{l}%s\\end{array}$\\\\\n\\hline\n" node1 node2 s (grammar_symbol 0 symbol)) :: l))) ^
226 226 "\\end{longtable}"
227 227  
... ...
LCGparser/ENIAM_LCGrules.ml
... ... @@ -392,6 +392,24 @@ let forward_application references functs args =
392 392 | BracketSet(Forward),_ -> Xlist.fold args l (fun l -> function Bracket(false,rf,arg),arg_sem -> (Bracket(true,rf,arg),arg_sem) :: l | _ -> l)
393 393 | _ -> l)
394 394  
  395 +let forward_application_ignore_brackets references functs args =
  396 + Xlist.fold functs [] (fun l -> function
  397 + Bracket(lf,false,funct),sem ->
  398 + let argst,argsf = Xlist.fold args ([],[]) (fun (argst,argsf) -> function
  399 + Bracket(_,true,arg),arg_sem -> (arg,arg_sem) :: argst, argsf
  400 + | Bracket(_,false,arg),arg_sem -> argst, (arg,arg_sem) :: argsf
  401 + | _ -> argst,argsf) in
  402 + let l = Xlist.fold (deduce_app references Forward (funct,sem) argst) l (fun l (t,sem) ->
  403 + (Bracket(lf,true,t), (*LCGreductions.linear_term_beta_reduction2*) sem) :: l) in
  404 + Xlist.fold (deduce_app references Forward (funct,sem) argsf) l (fun l (t,sem) ->
  405 + (Bracket(lf,false,t), (*LCGreductions.linear_term_beta_reduction2*) sem) :: l)
  406 + | Bracket(lf,true,funct),sem ->
  407 + let args = Xlist.fold args [] (fun args -> function Bracket(_,_,arg),arg_sem -> (arg,arg_sem) :: args | _ -> args) in
  408 + Xlist.fold (deduce_app references Forward (funct,sem) args) l (fun l (t,sem) ->
  409 + (Bracket(lf,true,t), (*LCGreductions.linear_term_beta_reduction2*) sem) :: l)
  410 + | BracketSet(Forward),_ -> Xlist.fold args l (fun l -> function Bracket(_,rf,arg),arg_sem -> (Bracket(true,rf,arg),arg_sem) :: l | _ -> l)
  411 + | _ -> l)
  412 +
395 413 let forward_application_conll references functs args =
396 414 Xlist.fold functs [] (fun l -> function
397 415 Bracket(_,_,funct),sem ->
... ... @@ -436,6 +454,27 @@ let backward_application references args functs =
436 454 | BracketSet(Backward),_ -> (*print_endline "tt";*) Xlist.fold args l (fun l -> function Bracket(lf,false,arg),arg_sem -> (Bracket(lf,true,arg),arg_sem) :: l | _ -> l)
437 455 | _ -> l)
438 456  
  457 +let backward_application_ignore_brackets references args functs =
  458 + (* Printf.printf "backward_application: [%s] [%s]\n%!"
  459 + (String.concat "; " (Xlist.map args (fun (arg,_) -> "'" ^ ENIAM_LCGstringOf.grammar_symbol 1 arg ^ "'")))
  460 + (String.concat "; " (Xlist.map functs (fun (arg,_) -> "'" ^ ENIAM_LCGstringOf.grammar_symbol 1 arg ^ "'"))); *)
  461 + Xlist.fold functs [] (fun l -> function
  462 + Bracket(false,rf,funct),sem ->
  463 + let argst,argsf = Xlist.fold args ([],[]) (fun (argst,argsf) -> function
  464 + Bracket(true,_,arg),arg_sem -> (arg,arg_sem) :: argst, argsf
  465 + | Bracket(false,_,arg),arg_sem -> argst, (arg,arg_sem) :: argsf
  466 + | _ -> argst,argsf) in
  467 + let l = Xlist.fold (deduce_app references Backward (funct,sem) argst) l (fun l (t,sem) ->
  468 + (Bracket(true,rf,t), (*LCGreductions.linear_term_beta_reduction2*) sem) :: l) in
  469 + Xlist.fold (deduce_app references Backward (funct,sem) argsf) l (fun l (t,sem) ->
  470 + (Bracket(false,rf,t), (*LCGreductions.linear_term_beta_reduction2*) sem) :: l)
  471 + | Bracket(true,rf,funct),sem ->
  472 + let args = Xlist.fold args [] (fun args -> function Bracket(_,_,arg),arg_sem -> (arg,arg_sem) :: args | _ -> args) in
  473 + Xlist.fold (deduce_app references Backward (funct,sem) args) l (fun l (t,sem) ->
  474 + (Bracket(true,rf,t), (*LCGreductions.linear_term_beta_reduction2*) sem) :: l)
  475 + | BracketSet(Backward),_ -> (*print_endline "tt";*) Xlist.fold args l (fun l -> function Bracket(lf,_,arg),arg_sem -> (Bracket(lf,true,arg),arg_sem) :: l | _ -> l)
  476 + | _ -> l)
  477 +
439 478 let backward_application_conll references args functs =
440 479 (* Printf.printf "backward_application: [%s] [%s]\n%!"
441 480 (String.concat "; " (Xlist.map args (fun (arg,_) -> "'" ^ ENIAM_LCGstringOf.grammar_symbol 1 arg ^ "'")))
... ... @@ -469,6 +508,7 @@ let backward_cross_composition references args functs =
469 508 (* FIXME: błąd przy redukcji "Jan chce iść spać" *)
470 509  
471 510 let application_rules = [0,backward_application; 0,forward_application]
  511 +let application_rules_ignore_brackets = [0,backward_application_ignore_brackets; 0,forward_application_ignore_brackets]
472 512 let cross_composition_rules = [1,backward_cross_composition;1,forward_cross_composition]
473 513  
474 514 let rec flatten_functor2 l seml = function
... ...
exec/ENIAMexec.ml
... ... @@ -33,6 +33,8 @@ let translate_mode = function
33 33 | ENIAMsubsyntaxTypes.Mate -> Mate
34 34 | ENIAMsubsyntaxTypes.Swigra -> Swigra
35 35 | ENIAMsubsyntaxTypes.POLFIE -> POLFIE
  36 + | ENIAMsubsyntaxTypes.Error -> Error
  37 + | ENIAMsubsyntaxTypes.Name -> Name
36 38  
37 39 let rec translate_sentence = function
38 40 ENIAMsubsyntaxTypes.RawSentence s -> RawSentence s
... ... @@ -53,6 +55,7 @@ let rec translate_paragraph = function
53 55 sentence=translate_sentence p.ENIAMsubsyntaxTypes.sentence}))
54 56 | ENIAMsubsyntaxTypes.AltParagraph l -> AltParagraph(Xlist.map l (fun (mode,paragraph) ->
55 57 translate_mode mode, translate_paragraph paragraph))
  58 + | ENIAMsubsyntaxTypes.ErrorParagraph s -> ErrorParagraph s
56 59  
57 60 let rec translate_text = function
58 61 ENIAMsubsyntaxTypes.RawText s -> RawText s
... ... @@ -61,14 +64,16 @@ let rec translate_text = function
61 64 | ENIAMsubsyntaxTypes.AltText l -> AltText(Xlist.map l (fun (mode,text) ->
62 65 translate_mode mode, translate_text text))
63 66  
64   -let clarify_categories cats token =
  67 +let clarify_categories cats (*snode*) token =
65 68 match token.ENIAMtokenizerTypes.token with
66 69 ENIAMtokenizerTypes.Lemma(lemma,pos,interp) ->
67   - List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,proj) -> ENIAMcategoriesPL.clarify_categories false cat proj (lemma,pos,interp)))))
  70 + List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,coerced) ->
  71 + (* Printf.printf "lemma=%s pos=%s cat=%s coerced=%s\n%!" lemma pos cat (String.concat "," coerced); *)
  72 + ENIAMcategoriesPL.clarify_categories false cat coerced (*snode*) (lemma,pos,interp)))))
68 73 | ENIAMtokenizerTypes.Proper(lemma,pos,interp,senses2) ->
69   - List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,proj) -> ENIAMcategoriesPL.clarify_categories true cat proj (lemma,pos,interp)))))
  74 + List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,coerced) -> ENIAMcategoriesPL.clarify_categories true cat coerced (*snode*) (lemma,pos,interp)))))
70 75 | ENIAMtokenizerTypes.Interp lemma ->
71   - List.flatten (Xlist.map cats (fun (cat,proj) -> ENIAMcategoriesPL.clarify_categories false cat proj (lemma,"interp",[])))
  76 + List.flatten (Xlist.map cats (fun (cat,coerced) -> ENIAMcategoriesPL.clarify_categories false cat coerced (*snode*) (lemma,"interp",[])))
72 77 | _ -> []
73 78  
74 79 let create_chart rules tokens lex_sems paths last =
... ... @@ -79,9 +84,9 @@ let create_chart rules tokens lex_sems paths last =
79 84 let s = ExtArray.get lex_sems id in
80 85 ENIAM_LCGrenderer.reset_variable_names ();
81 86 ENIAM_LCGrenderer.add_variable_numbers ();
82   - if s.ENIAMlexSemanticsTypes.schemata = [] then failwith ("create_chart: no schema for token=" ^ t.ENIAMtokenizerTypes.orth ^ " lemma=" ^ ENIAMtokens.get_lemma t.ENIAMtokenizerTypes.token) else
83   - Xlist.fold s.ENIAMlexSemanticsTypes.schemata chart (fun chart (selectors,cats,schema) ->
84   - let cats = clarify_categories cats t in
  87 + (* if s.ENIAMlexSemanticsTypes.schemata = [] then failwith ("create_chart: no schema for token=" ^ t.ENIAMtokenizerTypes.orth ^ " lemma=" ^ ENIAMtokens.get_lemma t.ENIAMtokenizerTypes.token) else *)
  88 + Xlist.fold s.ENIAMlexSemanticsTypes.schemata chart (fun chart (selectors,cats,(*snode,*)schema) ->
  89 + let cats = clarify_categories cats (*snode*) t in
85 90 (* let chart = ENIAM_LCGchart.add_inc_list chart lnode rnode s.ENIAMlexSemanticsTypes.lex_entries 0 in *)
86 91 let l = ENIAM_LCGlexicon.create_entries rules id t.ENIAMtokenizerTypes.orth cats [selectors,schema] s.ENIAMlexSemanticsTypes.lex_entries in
87 92 ENIAM_LCGchart.add_inc_list chart lnode rnode l 0)) in
... ... @@ -110,10 +115,11 @@ let create_dep_chart dep_rules tokens lex_sems paths =
110 115 let s = ExtArray.get lex_sems id in
111 116 ENIAM_LCGrenderer.reset_variable_names ();
112 117 ENIAM_LCGrenderer.add_variable_numbers ();
113   - let cats = clarify_categories ["X",["X"]] t in
114   - let schemata = Xlist.map s.ENIAMlexSemanticsTypes.schemata (fun (selectors,_,schema) -> selectors,schema) in
115   - let l = ENIAM_LCGlexicon.create_entries dep_rules id t.ENIAMtokenizerTypes.orth cats schemata s.ENIAMlexSemanticsTypes.lex_entries in
116   - IntMap.add nodes i l) in
  118 + Xlist.fold s.ENIAMlexSemanticsTypes.schemata nodes (fun nodes (selectors,cats,(*snode,*)schema) ->
  119 + let cats = clarify_categories ["X",["X"]] (*snode*) t in
  120 + (* let chart = ENIAM_LCGchart.add_inc_list chart lnode rnode s.ENIAMlexSemanticsTypes.lex_entries 0 in *)
  121 + let l = ENIAM_LCGlexicon.create_entries dep_rules id t.ENIAMtokenizerTypes.orth cats [selectors,schema] s.ENIAMlexSemanticsTypes.lex_entries in
  122 + IntMap.add_inc nodes i l (fun l2 -> l @ l2))) in
117 123 (* print_endline "create_dep_chart 3"; *)
118 124 let x = dep_create_rec nodes sons 0 in
119 125 (* print_endline "create_dep_chart 4"; *)
... ... @@ -134,10 +140,27 @@ let create_text_fragments tokens paths last =
134 140 text_fragments.(i) <- map);
135 141 text_fragments
136 142  
  143 +(*let create_beg_positions tokens paths last =
  144 + let beg_positions = Array.make last (-1) in
  145 + Xlist.iter paths (fun (id,lnode,rnode) ->
  146 + let t = ExtArray.get tokens id in
  147 + beg_positions.(lnode) <- t.ENIAMtokenizerTypes.beg);
  148 + beg_positions
  149 +
  150 +let create_end_positions tokens paths last =
  151 + let end_positions = Array.make last (-1) in
  152 + Xlist.iter paths (fun (id,lnode,rnode) ->
  153 + let t = ExtArray.get tokens id in
  154 + end_positions.(rnode) <- t.ENIAMtokenizerTypes.beg + t.ENIAMtokenizerTypes.len);
  155 + end_positions*)
  156 +
137 157 let eniam_parse_sentence timeout verbosity rules tokens lex_sems paths last =
138 158 ENIAM_LCGreductions.reset_variant_label ();
139 159 let result = {empty_eniam_parse_result with paths_size = Xlist.size paths} in
140   - let result = if verbosity = 0 then result else {result with text_fragments=create_text_fragments tokens paths last} in
  160 + let result = if verbosity = 0 then result else {result with
  161 + text_fragments=create_text_fragments tokens paths last;
  162 + (*beg_positions=create_beg_positions tokens paths last;
  163 + end_positions=create_end_positions tokens paths last;*)} in
141 164 let time1 = time_fun () in
142 165 try
143 166 (* print_endline "eniam_parse_sentence 1"; *)
... ... @@ -469,6 +492,7 @@ let eniam_semantic_processing verbosity tokens lex_sems (result : eniam_parse_re
469 492 let graph = ENIAMsemGraph.greater_simplify graph in
470 493 (* let graph = ENIAMsemGraph.manage_quantification graph in *)
471 494 let graph = ENIAMsemGraph.simplify_gender graph in
  495 + let graph = ENIAMsemGraph.manage_variant_labels graph in
472 496 let result = (*if verbosity = 0 then result else*) {result with semantic_graph11=graph; semantic_graph12=graph} in
473 497 graph,result
474 498 with e -> ENIAMsemTypes.Dot,{result with status=SemGraphError; msg=string_of_exn e} in
... ...
exec/ENIAMexecTypes.ml
... ... @@ -78,7 +78,7 @@ type semantic_processing_result = {
78 78 }
79 79 *)
80 80 type mode =
81   - Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE
  81 + Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE | Error | Name
82 82  
83 83 type sentence =
84 84 RawSentence of string
... ... @@ -98,6 +98,7 @@ and paragraph =
98 98 RawParagraph of string
99 99 | StructParagraph of paragraph_record list (* zdania *)
100 100 | AltParagraph of (mode * paragraph) list
  101 + | ErrorParagraph of string
101 102  
102 103 type text =
103 104 RawText of string
... ... @@ -267,6 +268,7 @@ let rec map_paragraph mode f = function
267 268 let l = Xlist.rev_map l (fun (mode,paragraph) ->
268 269 mode, map_paragraph mode f paragraph) in
269 270 AltParagraph(List.rev l)
  271 + | ErrorParagraph s -> ErrorParagraph s
270 272  
271 273 let rec map_text mode f = function
272 274 RawText s -> RawText s
... ... @@ -295,6 +297,7 @@ let rec fold_paragraph mode s f = function
295 297 | AltParagraph l ->
296 298 Xlist.fold l s (fun s (mode,paragraph) ->
297 299 fold_paragraph mode s f paragraph)
  300 + | ErrorParagraph _ -> s
298 301  
299 302 let rec fold_text mode s f = function
300 303 RawText _ -> s
... ... @@ -306,6 +309,7 @@ let rec fold_text mode s f = function
306 309 fold_text mode s f text)
307 310  
308 311 let rules_filename = ENIAM_LCGlexiconTypes.resource_path ^ "/LCGlexicon/lexicon-pl.dic"
  312 +let colours_filename = ENIAMwalTypes.data_path ^ "/colours.tab"
309 313  
310 314 let lcg_rules = ref ([] : (int * (ENIAM_LCGtypes.linear_term ExtArray.t ->
311 315 (ENIAM_LCGtypes.SymbolMap.key * ENIAM_LCGtypes.linear_term) list ->
... ...
exec/ENIAMexecXMLof.ml
... ... @@ -64,6 +64,7 @@ let rec paragraph m = function
64 64 Xml.Element("StructParagraph",set_mode m,Xlist.map sentences (fun p ->
65 65 Xml.Element("Sentence",["id",p.id;"beg",string_of_int p.beg;"len",string_of_int p.len;"next",string_of_int p.next],[sentence "" p.sentence])))
66 66 | AltParagraph l -> Xml.Element("AltParagraph",set_mode m,Xlist.map l (fun (m,t) -> paragraph (ENIAMvisualization.string_of_mode m) t))
  67 + | ErrorParagraph s -> Xml.Element("ErrorParagraph",set_mode m,[Xml.PCData s])
67 68  
68 69 let rec text m = function
69 70 RawText s -> Xml.Element("RawText",set_mode m,[Xml.PCData s])
... ...
exec/ENIAMselectSent.ml
... ... @@ -69,6 +69,7 @@ let rec select_sentence_modes_paragraph = function
69 69 let l = Xlist.rev_map l (fun (mode,paragraph) ->
70 70 mode, select_sentence_modes_paragraph paragraph) in
71 71 AltParagraph(List.rev l)
  72 + | ErrorParagraph s -> ErrorParagraph s
72 73  
73 74 let rec select_sentence_modes_text = function
74 75 RawText s -> RawText s
... ... @@ -148,6 +149,7 @@ let rec select_sentences_paragraph mode = function
148 149 let l = Xlist.rev_map l (fun (mode,paragraph) ->
149 150 mode, select_sentences_paragraph mode paragraph) in
150 151 AltParagraph(List.rev l)
  152 + | ErrorParagraph s -> ErrorParagraph s
151 153  
152 154 let rec select_sentences_text mode = function
153 155 RawText s -> RawText s
... ...
exec/ENIAMvisualization.ml
... ... @@ -24,7 +24,7 @@ open ENIAMtokenizerTypes
24 24 open ENIAMexecTypes
25 25  
26 26 let string_of_status = function
27   - Idle -> "Idle"
  27 + Idle -> "Idle"
28 28 | PreprocessingError -> "PreprocessingError"
29 29 | LexiconError -> "LexiconError"
30 30 | ParseError -> "ParseError"
... ... @@ -657,6 +657,8 @@ let string_of_mode = function
657 657 | Mate -> "Mate"
658 658 | Swigra -> "Swigra"
659 659 | POLFIE -> "POLFIE"
  660 + | Error -> "Error"
  661 + | Name -> "Name"
660 662 (*
661 663 (*let rec string_of_sentence = function
662 664 RawSentence s -> sprintf "RawSentence(%s)" s
... ... @@ -775,6 +777,94 @@ let create_latex_dep_chart path name dep_chart =
775 777 LatexMain.latex_compile_and_clean path name
776 778 *)
777 779  
  780 +let rec extract_pos_cat_internal vars = function
  781 + | Atom x -> x
  782 + | AVar x -> (try extract_pos_cat_internal vars (Xlist.assoc vars x) with Not_found -> failwith "extract_pos_cat_internal")
  783 + | With l -> String.concat "&" (Xlist.map l (extract_pos_cat_internal vars))
  784 + | Zero -> "0"
  785 + | Top -> "T"
  786 +
  787 +let rec extract_pos_cat vars = function
  788 + | Tensor [] -> failwith "extract_pos_cat: ni"
  789 + | Tensor [pos] -> extract_pos_cat_internal vars pos
  790 + | Tensor [pos;_] -> extract_pos_cat_internal vars pos
  791 + | Tensor [pos;_;_] -> extract_pos_cat_internal vars pos
  792 + | Tensor (Atom "num" :: _) -> "Number"
  793 + | Tensor (Atom "aglt" :: _) -> "Aglt"
  794 + | Tensor (Atom "prepnp" :: _) -> "Prep"
  795 + | Tensor (Atom "comparp" :: _) -> "Compar"
  796 + | Tensor (Atom "cp" :: _) -> "Comp"
  797 + | Tensor [_;cat;_;_] -> extract_pos_cat_internal vars cat
  798 + | Tensor [_;_;cat;_;_] -> extract_pos_cat_internal vars cat
  799 + | Tensor [_;_;_;cat;_;_] -> extract_pos_cat_internal vars cat
  800 + | Tensor [_;_;_;_;cat;_;_] -> extract_pos_cat_internal vars cat
  801 + | Tensor [_;_;_;_;_;cat;_;_] -> extract_pos_cat_internal vars cat
  802 + | Tensor [_;_;_;_;_;_;cat;_;_] -> extract_pos_cat_internal vars cat
  803 + (* | Tensor (pos :: cat :: _) -> (*extract_pos_cat_internal vars pos ^ "*" ^*) extract_pos_cat_internal vars cat *)
  804 + | Tensor _ as t -> print_endline ("Unknown symbol " ^ ENIAM_LCGstringOf.grammar_symbol 0 t); "Unknown"
  805 + | Plus l -> failwith "extract_pos_cat: ni"
  806 + | Imp(s,d,t2) -> extract_pos_cat vars s
  807 + | One -> failwith "extract_pos_cat: ni"
  808 + | ImpSet(s,l) -> extract_pos_cat vars s
  809 + | WithVar(v,g,e,s) -> extract_pos_cat ((v,g) :: vars) s
  810 + | Star s -> failwith "extract_pos_cat: ni"
  811 + | Bracket(lf,rf,s) -> extract_pos_cat vars s
  812 + | BracketSet d -> "BracketSet"
  813 + | Maybe s -> failwith "extract_pos_cat: ni"
  814 +
  815 +let get_text_fragment text_fragments node1 node2 =
  816 + try IntMap.find text_fragments.(node1) node2
  817 + with (*Not_found*)_ -> "???"(*failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2)*)
  818 +
  819 +let omited = StringSet.of_list ["<subst>";"<depr>";"<ppron12>";"<ppron3>";"<siebie>";"<prep>";
  820 + "<num>";"<intnum>";"<realnum>";"<intnum-interval>";"<realnum-interval>";"<symbol>";"<ordnum>";
  821 + "<date>";"<date-interval>";"<hour-minute>";"<hour>";"<hour-minute-interval>";"<hour-interval>";
  822 + "<year>";"<year-interval>";"<day>";"<day-interval>";"<day-month>";"<day-month-interval>";
  823 + "<month-interval>";"<roman>";"<roman-interval>";"<roman-ordnum>";"<match-result>";"<url>";
  824 + "<email>";"<obj-id>";"<adj>";"<apron>";"<adjc>";"<adjp>";"<adja>";"<adv>";"<ger>";"<pact>";
  825 + "<ppas>";"<fin>";"<bedzie>";"<praet>";"<winien>";"<impt>";"<imps>";"<pred>";"<aglt>";"<inf>";
  826 + "<pcon>";"<pant>";"<qub>";"<comp>";"<compar>";"<conj>";"<interj>";"<sinterj>";"<burk>";
  827 + "<interp>";"<part>";"<unk>";"<building-number>";"<html-tag>";"<list-item>";"<numcomp>";
  828 + "<phone-number>";"<postal-code>";"<sentence>";"<paragraph>"]
  829 +
  830 +let cat_tokens_sequence text_fragments g =
  831 + let _,_,l = ENIAM_LCGchart.fold g (0,0,[]) (fun (m,n,l) (symbol,node1,node2,sem,layer) ->
  832 + node1,node2,
  833 + (if m < node1 then
  834 + if n < node1 then [n, node1, get_text_fragment text_fragments n node1, "null"]
  835 + else if n = node1 then []
  836 + else [node1, n, get_text_fragment text_fragments node1 n, "overlap"]
  837 + else if m = node1 then
  838 + if n < node2 then [m, n, get_text_fragment text_fragments m n, "overlap"]
  839 + else if n = node2 then []
  840 + else [node1, node2, get_text_fragment text_fragments node1 node2, "overlap"]
  841 + else failwith "cat_tokens_sequence") @
  842 + [node1, node2, get_text_fragment text_fragments node1 node2, extract_pos_cat [] symbol] @ l) in
  843 + let map = Xlist.fold l IntMap.empty (fun map (m,n,text,symbol) ->
  844 + IntMap.add_inc map (1000000*m+n) [text,symbol] (fun l -> (text,symbol) :: l)) in
  845 + let map = IntMap.map map (fun l ->
  846 + let t,ov,set = Xlist.fold l ("",false,StringSet.empty) (fun (t,ov,set) (text,symbol) ->
  847 + if symbol = "null" then text,ov,set
  848 + else if symbol = "overlap" then t,true,set
  849 + else if StringSet.mem omited symbol then text,ov,set
  850 + else t,ov,StringSet.add set symbol) in
  851 + let l = if StringSet.is_empty set then [t] else StringSet.to_list set in
  852 + if ov then "OVERLAP{" ^ String.concat " " l ^ "}" else
  853 + match l with
  854 + [t] -> t
  855 + | _ -> "{" ^ String.concat " " l ^ "}") in
  856 + let l = List.sort compare (IntMap.fold map [] (fun l k texts -> (k,texts) :: l)) in
  857 +(* let l = Xlist.sort l (fun (m1,n1,text1,symbol1) (m2,n2,text2,symbol2) ->
  858 + if m1 <> m2 then compare m1 m2 else
  859 + if n1 <> n2 then compare n1 n2 else
  860 + compare symbol1 symbol2) in
  861 + let l = if l = [] then l else
  862 + Xlist.fold (List.tl l) [List.hd l] (fun l a ->
  863 + match l with
  864 + [] -> failwith "cat_tokens_sequence"
  865 + | b :: l -> if a = b then b :: l else a :: b :: l) in*)
  866 + String.concat " " (Xlist.map l (fun (n,texts) -> texts))
  867 +
778 868  
779 869 (* verbosity:
780 870 0 -> jedynie informacja o statusie zdania
... ... @@ -785,13 +875,13 @@ let create_latex_dep_chart path name dep_chart =
785 875 let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam_parse_result) =
786 876 match result.status with
787 877 Idle -> "<font color=\"red\">idle</font>\n"
788   - | LexiconError -> sprintf "<font color=\"red\">error_lex</font>: %s paths_size=%d\n" result.msg result.paths_size
  878 + | LexiconError -> sprintf "<font color=\"red\">error_lex</font>: %s paths_size=%d\n" (escape_html result.msg) result.paths_size
789 879 | ParseError ->
790 880 if verbosity = 0 then () else (
791 881 ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.text_fragments result.chart1;
792 882 ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.text_fragments result.chart2;
793 883 ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2);
794   - sprintf "<font color=\"red\">error_parse</font>: %s paths_size=%d\n" result.msg result.paths_size ^
  884 + sprintf "<font color=\"red\">error_parse</font>: %s paths_size=%d\n" (escape_html result.msg) result.paths_size ^
795 885 (if verbosity = 0 then "" else
796 886 sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix ^
797 887 sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix ^
... ... @@ -803,7 +893,7 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
803 893 ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2);
804 894 if verbosity = 0 then () else (
805 895 ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.text_fragments result.chart2);
806   - sprintf "<font color=\"red\">timeout</font>: %s paths_size=%d\n" result.msg result.paths_size ^
  896 + sprintf "<font color=\"red\">timeout</font>: %s paths_size=%d\n" (escape_html result.msg) result.paths_size ^
807 897 (if verbosity < 2 then "" else
808 898 sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix ^
809 899 sprintf "<BR><A HREF=\"%s_2_references.pdf\">References 2</A>\n" file_prefix) ^
... ... @@ -829,6 +919,7 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
829 919 sprintf "<BR><A HREF=\"%s_3_references.pdf\">References 3</A>\n" file_prefix ^
830 920 sprintf "<BR><A HREF=\"%s_3_chart.pdf\">Chart 3</A>\n" file_prefix) ^
831 921 (if verbosity = 0 then "" else
  922 + sprintf "<BR>%s\n" (escape_html (cat_tokens_sequence result.text_fragments (ENIAM_LCGchart.select_maximal result.chart1))) ^
832 923 sprintf "<BR><A HREF=\"%s_3_chart_selection.pdf\">Chart 3 Selection</A>\n" file_prefix) ^
833 924 ""
834 925 | ReductionError ->
... ... @@ -840,7 +931,7 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
840 931 ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.text_fragments result.chart1;
841 932 ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3);
842 933 (if verbosity < 2 then "" else
843   - sprintf "<font color=\"red\">error_reduction</font>: %s paths_size=%d chart_size=%d\n" result.msg result.paths_size result.chart_size ^
  934 + sprintf "<font color=\"red\">error_reduction</font>: %s paths_size=%d chart_size=%d\n" (escape_html result.msg) result.paths_size result.chart_size ^
844 935 sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix ^
845 936 sprintf "<BR><A HREF=\"%s_2_references.pdf\">References 2</A>\n" file_prefix ^
846 937 sprintf "<BR><A HREF=\"%s_3_chart.pdf\">Chart 3</A>\n" file_prefix) ^
... ... @@ -898,7 +989,7 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
898 989 Printf.fprintf file "\\[%s\\]\n" (ENIAM_LCGlatexOf.linear_term 0 result.term4));
899 990 Xlatex.latex_compile_and_clean path (file_prefix ^ "_4_term");
900 991 ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_4_dependency_tree") "a0" result.dependency_tree4);
901   - sprintf "<font color=\"red\">error_reduction2</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.msg result.paths_size result.chart_size result.dependency_tree_size ^
  992 + sprintf "<font color=\"red\">error_reduction2</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" (escape_html result.msg) result.paths_size result.chart_size result.dependency_tree_size ^
902 993 (if verbosity < 2 then "" else
903 994 sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix ^
904 995 sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix ^
... ... @@ -928,7 +1019,7 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
928 1019 ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_6b_dependency_tree") result.dependency_tree6b;
929 1020 ENIAM_LCGgraphOf.print_simplified_dependency_tree path (file_prefix ^ "_6a_simple_dependency_tree") result.dependency_tree6a;
930 1021 ENIAM_LCGgraphOf.print_simplified_dependency_tree path (file_prefix ^ "_6b_simple_dependency_tree") result.dependency_tree6b);
931   - sprintf "<font color=\"red\">error_reduction3</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.msg result.paths_size result.chart_size result.dependency_tree_size ^
  1022 + sprintf "<font color=\"red\">error_reduction3</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" (escape_html result.msg) result.paths_size result.chart_size result.dependency_tree_size ^
932 1023 (if verbosity < 2 then "" else
933 1024 sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix ^
934 1025 sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix ^
... ... @@ -966,8 +1057,9 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
966 1057 ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_6b_dependency_tree") "a4" result.dependency_tree6b);
967 1058 if verbosity = 0 then () else (
968 1059 ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_6a_dependency_tree") result.dependency_tree6a;
  1060 + ENIAM_LCGgraphOf.print_simplified_dependency_tree path (file_prefix ^ "_6a_simple_dependency_tree") result.dependency_tree6a);
  1061 + if verbosity < 2 then () else (
969 1062 ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_6b_dependency_tree") result.dependency_tree6b;
970   - ENIAM_LCGgraphOf.print_simplified_dependency_tree path (file_prefix ^ "_6a_simple_dependency_tree") result.dependency_tree6a;
971 1063 ENIAM_LCGgraphOf.print_simplified_dependency_tree path (file_prefix ^ "_6b_simple_dependency_tree") result.dependency_tree6b);
972 1064 sprintf "parsed: paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.paths_size result.chart_size result.dependency_tree_size ^
973 1065 (if verbosity < 2 then "" else
... ... @@ -984,10 +1076,11 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
984 1076 (if verbosity = 0 then "" else
985 1077 (if img <> 2 then sprintf "<BR><A HREF=\"%s_6a_dependency_tree.png\">Dependency Tree 6a</A>\n" file_prefix
986 1078 else sprintf "<BR><IMG SRC=\"%s_6a_dependency_tree.png\">\n" file_prefix) ^
  1079 + (if img <> 1 then sprintf "<BR><A HREF=\"%s_6a_simple_dependency_tree.png\">Simplified Dependency Tree 6a</A>\n" file_prefix
  1080 + else sprintf "<BR><IMG SRC=\"%s_6a_simple_dependency_tree.png\">\n" file_prefix)) ^
  1081 + (if verbosity < 2 then "" else
987 1082 (if img <> 2 then sprintf "<BR><A HREF=\"%s_6b_dependency_tree.png\">Dependency Tree 6b</A>\n" file_prefix
988 1083 else sprintf "<BR><IMG SRC=\"%s_6b_dependency_tree.png\">\n" file_prefix) ^
989   - (if img <> 1 then sprintf "<BR><A HREF=\"%s_6a_simple_dependency_tree.png\">Simplified Dependency Tree 6a</A>\n" file_prefix
990   - else sprintf "<BR><IMG SRC=\"%s_6a_simple_dependency_tree.png\">\n" file_prefix) ^
991 1084 (if img <> 1 then sprintf "<BR><A HREF=\"%s_6b_simple_dependency_tree.png\">Simplified Dependency Tree 6b</A>\n" file_prefix
992 1085 else sprintf "<BR><IMG SRC=\"%s_6b_simple_dependency_tree.png\">\n" file_prefix)) ^
993 1086 ""
... ... @@ -999,7 +1092,7 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
999 1092 if ExtArray.size result.dependency_tree8 <> 0 then ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_8_dependency_tree") "a3" result.dependency_tree8;
1000 1093 if result.dependency_tree9 <> [| |] then ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") "a3" result.dependency_tree9;
1001 1094 if result.dependency_tree9 <> [| |] then ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") result.dependency_tree9);
1002   - sprintf "<font color=\"red\">error_sem_valence</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.msg result.paths_size result.chart_size result.dependency_tree_size ^
  1095 + sprintf "<font color=\"red\">error_sem_valence</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" (escape_html result.msg) result.paths_size result.chart_size result.dependency_tree_size ^
1003 1096 (if verbosity = 0 then "" else
1004 1097 sprintf "<BR><A HREF=\"%s_6b_dependency_tree.pdf\">Dependency Tree References 6b</A>\n" file_prefix ^
1005 1098 (if result.dependency_tree7 <> [| |] then sprintf "<BR><A HREF=\"%s_7_dependency_tree.pdf\">Dependency Tree References 7</A>\n" file_prefix else "") ^
... ... @@ -1027,7 +1120,7 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
1027 1120 if ExtArray.size result.dependency_tree8 <> 0 then ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_8_dependency_tree") "a3" result.dependency_tree8;
1028 1121 if result.dependency_tree9 <> [| |] then ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") "a3" result.dependency_tree9;
1029 1122 if result.dependency_tree9 <> [| |] then ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") result.dependency_tree9));
1030   - sprintf "<font color=\"red\">error_sem_graph</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.msg result.paths_size result.chart_size result.dependency_tree_size ^
  1123 + sprintf "<font color=\"red\">error_sem_graph</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" (escape_html result.msg) result.paths_size result.chart_size result.dependency_tree_size ^
1031 1124 (if verbosity = 2 then
1032 1125 sprintf "<BR><A HREF=\"%s_6b_dependency_tree.pdf\">Dependency Tree References 6b</A>\n" file_prefix ^
1033 1126 (if result.semantic_graph10 <> [| |] then sprintf "<BR><A HREF=\"%s_10_semantic_graph.pdf\">Semantic Graph References 10</A>\n" file_prefix else "") ^
... ... @@ -1050,14 +1143,32 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
1050 1143 | SemGraphError2 ->
1051 1144 if verbosity = 0 then () else (
1052 1145 ENIAMsemGraphOf.print_semantic_graph2 path (file_prefix ^ "_11_semantic_graph") "" result.semantic_graph11);
1053   - sprintf "<font color=\"red\">error_sem_graph2</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.msg result.paths_size result.chart_size result.dependency_tree_size ^
  1146 + sprintf "<font color=\"red\">error_sem_graph2</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" (escape_html result.msg) result.paths_size result.chart_size result.dependency_tree_size ^
1054 1147 (if verbosity = 0 then "" else
1055 1148 sprintf "<BR><IMG SRC=\"%s_11_semantic_graph.png\">\n" file_prefix) ^
1056 1149 ""
1057 1150 | SemNotValidated ->
  1151 + if verbosity < 2 then () else (
  1152 + ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_6b_dependency_tree") result.dependency_tree6b;
  1153 + ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") result.dependency_tree9;
  1154 + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_6b_dependency_tree") "a3" result.dependency_tree6b;
  1155 + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_7_dependency_tree") "a2" result.dependency_tree7;
  1156 + ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_8_dependency_tree") "a3" result.dependency_tree8;
  1157 + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") "a3" result.dependency_tree9;
  1158 + ENIAMsemLatexOf.print_semantic_graph path (file_prefix ^ "_10_semantic_graph") "a3" result.semantic_graph10;
  1159 + ENIAMsemGraphOf.print_semantic_graph2 path (file_prefix ^ "_11_semantic_graph") "" result.semantic_graph11);
1058 1160 if verbosity = 0 then () else (
1059 1161 ENIAMsemGraphOf.print_semantic_graph2 path (file_prefix ^ "_12_semantic_graph") "" result.semantic_graph12);
1060   - sprintf "<font color=\"red\">sem_not_validated</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.msg result.paths_size result.chart_size result.dependency_tree_size ^
  1162 + sprintf "<font color=\"red\">sem_not_validated</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" (escape_html result.msg) result.paths_size result.chart_size result.dependency_tree_size ^
  1163 + (if verbosity < 2 then "" else
  1164 + sprintf "<BR><A HREF=\"%s_6b_dependency_tree.pdf\">Dependency Tree References 6b</A>\n" file_prefix ^
  1165 + sprintf "<BR><A HREF=\"%s_7_dependency_tree.pdf\">Dependency Tree References 7</A>\n" file_prefix ^
  1166 + sprintf "<BR><A HREF=\"%s_8_dependency_tree.pdf\">Dependency Tree References 8</A>\n" file_prefix ^
  1167 + sprintf "<BR><A HREF=\"%s_9_dependency_tree.pdf\">Dependency Tree References 9</A>\n" file_prefix ^
  1168 + sprintf "<BR><IMG SRC=\"%s_6b_dependency_tree.png\">\n" file_prefix ^
  1169 + sprintf "<BR><IMG SRC=\"%s_9_dependency_tree.png\">\n" file_prefix ^
  1170 + sprintf "<BR><A HREF=\"%s_10_semantic_graph.pdf\">Semantic Graph References 10</A>\n" file_prefix ^
  1171 + sprintf "<BR><IMG SRC=\"%s_11_semantic_graph.png\">\n" file_prefix) ^
1061 1172 (if verbosity = 0 then "" else
1062 1173 sprintf "<BR><IMG SRC=\"%s_12_semantic_graph.png\">\n" file_prefix) ^
1063 1174 ""
... ... @@ -1164,6 +1275,8 @@ let file_prefix_of_mode = function
1164 1275 | Mate -> "M"
1165 1276 | Swigra -> "S"
1166 1277 | POLFIE -> "P"
  1278 + | Error -> "Er"
  1279 + | Name -> "N"
1167 1280  
1168 1281 let rec html_of_sentence path file_prefix mode img verbosity tokens = function
1169 1282 RawSentence s -> escape_html s
... ... @@ -1196,6 +1309,7 @@ let rec html_of_paragraph path mode img verbosity tokens = function
1196 1309 String.concat "\n" (Xlist.map l (fun (mode,paragraph) ->
1197 1310 sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_paragraph path mode img verbosity tokens paragraph))) ^
1198 1311 "</table>"
  1312 + | ErrorParagraph s -> sprintf "<font color=\"red\">subsyntax_error</font>: %s\n" (escape_html s)
1199 1313  
1200 1314 let rec html_of_text path mode img verbosity tokens = function
1201 1315 RawText s -> escape_html s
... ... @@ -1229,6 +1343,7 @@ let rec find_prev_next_paragraph rev = function
1229 1343 | StructParagraph sentences ->
1230 1344 Xlist.fold sentences rev (fun rev p -> find_prev_next_sentence p.id p.file_prefix rev p.sentence)
1231 1345 | AltParagraph l -> Xlist.fold l rev (fun rev (mode,paragraph) -> find_prev_next_paragraph rev paragraph)
  1346 + | ErrorParagraph s -> rev
1232 1347  
1233 1348 let rec make_prev_next_map map prev = function
1234 1349 [x] -> StringMap.add map x (prev,"")
... ... @@ -1288,7 +1403,6 @@ let rec print_main_result_sentence path cg_bin_path results_web_path id file_pre
1288 1403 | AltSentence((Raw,RawSentence query) :: sentences) ->
1289 1404 File.file_out (path ^ "page" ^ id ^ "_" ^ file_prefix ^ ".html") (fun file ->
1290 1405 print_sentence_to_file path cg_bin_path results_web_path true id file_prefix prev_next_map query sentences file)
1291   - (* | AltSentence[Raw,RawSentence query] -> print_not_parsed_main_result path cg_bin_path results_web_path id file_prefix query pid prev_next_map *)
1292 1406 | _ -> failwith "print_main_result_sentence: ni"
1293 1407  
1294 1408 let rec print_main_result_paragraph path cg_bin_path results_web_path id tokens prev_next_map = function
... ... @@ -1296,6 +1410,8 @@ let rec print_main_result_paragraph path cg_bin_path results_web_path id tokens
1296 1410 | StructParagraph sentences ->
1297 1411 Xlist.iter sentences (fun p -> print_main_result_sentence path cg_bin_path results_web_path id p.file_prefix tokens p.id prev_next_map p.sentence)
1298 1412 | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> print_main_result_paragraph path cg_bin_path results_web_path id tokens prev_next_map paragraph)
  1413 + | ErrorParagraph s -> File.file_out (path ^ "page" ^ id ^ "_Er.html") (fun file ->
  1414 + print_sentence_to_file path cg_bin_path results_web_path false id "Er" prev_next_map ("ErrorParagraph: " ^ s) [] file)
1299 1415  
1300 1416 let rec print_main_result_text path cg_bin_path results_web_path id tokens = function
1301 1417 RawText s -> ()
... ... @@ -1309,8 +1425,7 @@ let rec print_main_result_first_page_sentence path cg_bin_path results_web_path
1309 1425 AltSentence[Raw,_;Struct,QuotedSentences sentences] ->
1310 1426 let p = List.hd sentences in
1311 1427 print_main_result_first_page_sentence path cg_bin_path results_web_path id p.file_prefix tokens p.id prev_next_map p.sentence
1312   -(* | AltSentence[Raw,RawSentence query] -> print_not_parsed_main_result_first_page path cg_bin_path results_web_path id file_prefix query pid prev_next_map
1313   -*) | AltSentence((Raw,RawSentence query) :: sentences) ->
  1428 + | AltSentence((Raw,RawSentence query) :: sentences) ->
1314 1429 print_sentence_to_file path cg_bin_path results_web_path false id file_prefix prev_next_map query sentences stdout
1315 1430 | _ -> failwith "print_main_result_first_page_sentence: ni"
1316 1431  
... ... @@ -1320,6 +1435,7 @@ let rec print_main_result_first_page_paragraph path cg_bin_path results_web_path
1320 1435 let p = List.hd sentences in
1321 1436 print_main_result_first_page_sentence path cg_bin_path results_web_path id p.file_prefix tokens p.id prev_next_map p.sentence
1322 1437 | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> print_main_result_first_page_paragraph path cg_bin_path results_web_path id tokens prev_next_map paragraph)
  1438 + | ErrorParagraph s -> print_sentence_to_file path cg_bin_path results_web_path false id "Er" prev_next_map ("ErrorParagraph: " ^ s) [] stdout
1323 1439  
1324 1440 let rec print_main_result_first_page_text path cg_bin_path results_web_path id tokens = function
1325 1441 RawText s -> ()
... ... @@ -1328,3 +1444,28 @@ let rec print_main_result_first_page_text path cg_bin_path results_web_path id t
1328 1444 (List.rev (Xlist.fold paragraphs [] find_prev_next_paragraph)) in
1329 1445 print_main_result_first_page_paragraph path cg_bin_path results_web_path id tokens prev_next_map (List.hd paragraphs)
1330 1446 | AltText l -> Xlist.iter l (fun (mode,text) -> print_main_result_first_page_text path cg_bin_path results_web_path id tokens text)
  1447 +
  1448 +let to_string_eniam_sentence verbosity tokens (result : eniam_parse_result) =
  1449 + let status_string = string_of_status result.status in
  1450 + if result.status = NotParsed then
  1451 + [status_string ^ ": " ^ cat_tokens_sequence result.text_fragments (ENIAM_LCGchart.select_maximal result.chart1)]
  1452 + else [status_string]
  1453 +
  1454 +let rec to_string_sentence verbosity tokens = function
  1455 + RawSentence s -> []
  1456 + | StructSentence(paths,last) -> []
  1457 + | DepSentence paths -> []
  1458 + | ENIAMSentence result -> to_string_eniam_sentence verbosity tokens result
  1459 + | QuotedSentences sentences -> List.flatten (Xlist.map sentences (fun p -> to_string_sentence verbosity tokens p.sentence))
  1460 + | AltSentence l -> List.flatten (Xlist.map l (fun (mode,sentence) -> to_string_sentence verbosity tokens sentence))
  1461 +
  1462 +let rec to_string_paragraph verbosity tokens = function
  1463 + RawParagraph s -> []
  1464 + | StructParagraph sentences -> List.flatten (Xlist.map sentences (fun p -> to_string_sentence verbosity tokens p.sentence))
  1465 + | AltParagraph l -> List.flatten (Xlist.map l (fun (mode,paragraph) -> to_string_paragraph verbosity tokens paragraph))
  1466 + | ErrorParagraph s -> ["SubsyntaxError"]
  1467 +
  1468 +let rec to_string_text verbosity tokens = function
  1469 + RawText s -> []
  1470 + | StructText paragraphs -> List.flatten (Xlist.map paragraphs (to_string_paragraph verbosity tokens))
  1471 + | AltText l -> List.flatten (Xlist.map l (fun (mode,text) -> to_string_text verbosity tokens text))
... ...
exec/parser.ml
... ... @@ -112,7 +112,7 @@ let rec main_loop sub_in sub_out in_chan out_chan =
112 112 if text = "" then () else (
113 113 let text,tokens,lex_sems,msg =
114 114 if !lexSemantics_built_in then
115   - let text,tokens,msg = ENIAMsubsyntax.catch_parse_text text in
  115 + let text,tokens,msg = ENIAMsubsyntax.catch_parse_text true text in
116 116 let text,msg =
117 117 if msg <> "" || not !perform_integration then text,msg else
118 118 ENIAMpreIntegration.catch_parse_text ENIAMsubsyntaxTypes.Struct tokens text in
... ...
exec/semparser.ml
... ... @@ -30,7 +30,7 @@ let load_cats_map filename =
30 30 | l -> failwith ("load_cats_map: " ^ String.concat "\t" l))
31 31  
32 32 let cats_map = load_cats_map ENIAM_LCGlexiconTypes.user_cats_filename
33   -let proj_map = load_cats_map ENIAM_LCGlexiconTypes.user_proj_filename
  33 +let coerced_map = load_cats_map ENIAM_LCGlexiconTypes.user_coerced_filename
34 34  
35 35 let subsyntax_built_in = ref true
36 36 let subsyntax_host = ref "localhost"
... ... @@ -96,19 +96,19 @@ let get_cats cats_map = function
96 96 | Proper(_,_,_,cats) -> if cats = [] then ["X"] else cats
97 97 | _ -> ["X"]
98 98  
99   -let expand_projections proj_map cats =
100   - Xlist.rev_map cats (fun cat -> cat, cat :: (try StringMap.find proj_map cat with Not_found -> []))
  99 +let expand_coercions coerced_map cats =
  100 + Xlist.rev_map cats (fun cat -> cat, cat :: (try StringMap.find coerced_map cat with Not_found -> []))
101 101 (* StringSet.to_list (Xlist.fold cats StringSet.empty (fun set cat ->
102   - let cats = try StringMap.find proj_map cat with Not_found -> [] in
  102 + let cats = try StringMap.find coerced_map cat with Not_found -> [] in
103 103 Xlist.fold (cat :: cats) set StringSet.add))*)
104 104  
105   -let assign_lex_sems proj_map cats_map tokens =
  105 +let assign_lex_sems coerced_map cats_map tokens =
106 106 let lex_sems = ExtArray.make (ExtArray.size tokens) ENIAMlexSemanticsTypes.empty_lex_sem in
107 107 let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in
108 108 Int.iter 1 (ExtArray.size tokens - 1) (fun i ->
109 109 let lemma = ENIAMtokens.get_lemma (ExtArray.get tokens i).token in
110 110 let pos = ENIAMtokens.get_pos (ExtArray.get tokens i).token in
111   - let cats = expand_projections proj_map (get_cats cats_map (ExtArray.get tokens i).token) in
  111 + let cats = expand_coercions coerced_map (get_cats cats_map (ExtArray.get tokens i).token) in
112 112 let frames =
113 113 Xlist.rev_map (ENIAMvalence.get_aroles [] lemma pos) (fun (sel,arole,arole_attr,arev) ->
114 114 {ENIAMlexSemanticsTypes.empty_frame with ENIAMlexSemanticsTypes.selectors=sel; ENIAMlexSemanticsTypes.arole=arole; ENIAMlexSemanticsTypes.arole_attr=arole_attr; ENIAMlexSemanticsTypes.arev=arev}) in
... ... @@ -125,7 +125,7 @@ let rec main_loop sub_in sub_out =
125 125 Printf.fprintf sub_out "%s\n\n%!" text;
126 126 (Marshal.from_channel sub_in : ENIAMsubsyntaxTypes.text * token_env ExtArray.t * string)) in
127 127 if msg <> "" then print_endline msg else (
128   - let lex_sems = assign_lex_sems proj_map cats_map tokens in
  128 + let lex_sems = assign_lex_sems coerced_map cats_map tokens in
129 129 let text = ENIAMexec.translate_text text in
130 130 (* let text = ENIAMexec.parse !timeout !verbosity rules tokens lex_sems text in *)
131 131 let text = ENIAMexec.parse !timeout !verbosity rules dep_rules tokens lex_sems text in
... ...
integration/ENIAMpreIntegration.ml
... ... @@ -339,6 +339,7 @@ let rec parse_paragraph mode tokens = function
339 339 let l = Xlist.rev_map l (fun (mode,paragraph) ->
340 340 mode, parse_paragraph mode tokens paragraph) in
341 341 AltParagraph(List.rev l)
  342 + | ErrorParagraph s -> ErrorParagraph s
342 343  
343 344 let rec parse_text mode tokens = function
344 345 RawText s -> RawText s
... ...
lexSemantics/ENIAMadjuncts.ml
... ... @@ -37,7 +37,7 @@ let simplify_position_verb mode l = function (* FIXME: dodać czyszczenie E Pro
37 37 | E Or -> l
38 38 | E (CP(CompTypeUndef,CompUndef)) -> l
39 39 | E (PrepNP(_,prep,Case case)) -> l
40   - | E (PrepNCP(prep,Case case,CompTypeUndef,CompUndef)) -> l
  40 + | E (PrepNCP(_,prep,Case case,CompTypeUndef,CompUndef)) -> l
41 41 | NP(Case "gen") as t -> if mode = "temp" then l else t :: l
42 42 | NP(Case "acc") as t -> if mode = "dur" then l else t :: l
43 43 | t -> t :: l
... ... @@ -253,19 +253,19 @@ let simplify_schemata lexemes pos pos2 lemma schemata =
253 253 "{" ^ String.concat ";" (PhraseSet.fold morfs [] (fun l m -> ENIAMwalStringOf.phrase m :: l)) ^ "}")))); *)
254 254 schemata
255 255  
256   -let add_adjuncts preps compreps compars pos2 (selectors,cat,schema) =
  256 +let add_adjuncts preps compreps compars pos2 (selectors,cat,(*has_context,*)schema) =
257 257 let compreps = Xlist.rev_map compreps ENIAMwalRenderer.render_comprep in
258 258 let prepnps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_prepnp prep cases) in
259 259 let prepadjps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_prepadjp prep cases) in
260 260 let compars = Xlist.rev_map compars ENIAMwalRenderer.render_compar in
261 261 match pos2 with
262   - "verb" -> [selectors,cat,schema @ ENIAMwalRenderer.verb_adjuncts_simp @ prepnps @ prepadjps @ compreps @ compars]
  262 + "verb" -> [selectors,cat,(*has_context,*)schema @ ENIAMwalRenderer.verb_adjuncts_simp @ prepnps @ prepadjps @ compreps @ compars]
263 263 | "noun" -> [
264   - [Nsyn,Eq,["proper"]] @ selectors,cat,ENIAMwalRenderer.proper_noun_adjuncts_simp @ prepnps @ compreps @ compars;
265   - [Nsyn,Eq,["common"];Nsem,Eq,["measure"]] @ selectors,cat,ENIAMwalRenderer.measure_noun_adjuncts_simp @ prepnps @ compreps @ compars;
266   - [Nsyn,Eq,["common"];Nsem,Neq,["measure"]] @ selectors,cat,ENIAMwalRenderer.common_noun_adjuncts_simp @ prepnps @ compreps @ compars]
267   - | "adj" -> [selectors,cat,schema @ ENIAMwalRenderer.adj_adjuncts_simp @ compars]
268   - | "adv" -> [selectors,cat,schema @ ENIAMwalRenderer.adv_adjuncts_simp @ compars]
  264 + [Nsyn,Eq,["proper"]] @ selectors,cat,(*has_context,*)ENIAMwalRenderer.proper_noun_adjuncts_simp @ prepnps @ compreps @ compars;
  265 + [Nsyn,Eq,["common"];Nsem,Eq,["measure"]] @ selectors,cat,(*has_context,*)ENIAMwalRenderer.measure_noun_adjuncts_simp @ prepnps @ compreps @ compars;
  266 + [Nsyn,Eq,["common"];Nsem,Neq,["measure"]] @ selectors,cat,(*has_context,*)ENIAMwalRenderer.common_noun_adjuncts_simp @ prepnps @ compreps @ compars]
  267 + | "adj" -> [selectors,cat,(*has_context,*)schema @ ENIAMwalRenderer.adj_adjuncts_simp @ compars]
  268 + | "adv" -> [selectors,cat,(*has_context,*)schema @ ENIAMwalRenderer.adv_adjuncts_simp @ compars]
269 269 | _ -> []
270 270  
271 271 open ENIAMlexSemanticsTypes
... ...
lexSemantics/ENIAMlexSemantics.ml
... ... @@ -23,20 +23,22 @@ open ENIAMlexSemanticsTypes
23 23 open ENIAMwalTypes
24 24 open Xstd
25 25  
26   -let find_meaning m =
  26 +(*let snode_values = ENIAM_LCGlexiconTypes.SelectorMap.find ENIAMcategoriesPL.selector_values ENIAM_LCGlexiconTypes.SNode*)
  27 +
  28 +let find_sense m =
27 29 try
28   - ENIAMplWordnet.find_meaning m.plwnluid
  30 + ENIAMplWordnet.find_sense m.plwnluid
29 31 with Not_found ->
30   - m.name ^ "-" ^ m.variant, [], unknown_meaning_weight
  32 + m.name ^ "-" ^ m.variant, [], unknown_sense_weight
31 33  
32   -let find_prep_meaning lemma hipero =
  34 +let find_prep_sense lemma hipero =
33 35 let hipero = match hipero with
34 36 [Predef hipero] -> hipero
35   - | _ -> failwith "find_prep_meaning" in
36   - if hipero = "ALL" then lemma, [hipero,0], unknown_meaning_weight else
  37 + | _ -> failwith "find_prep_sense" in
  38 + if hipero = "ALL" then lemma, [hipero,0], unknown_sense_weight else
37 39 let syn_id = StringMap.find !ENIAMplWordnet.predef hipero in
38 40 let hipero = IntMap.fold (ENIAMplWordnet.get_hipero syn_id) [] (fun hipero syn_id cost -> (ENIAMplWordnet.synset_name syn_id, cost) :: hipero) in
39   - lemma, hipero, unknown_meaning_weight
  41 + lemma, hipero, unknown_sense_weight
40 42  
41 43 let lex_sie = LCG (ENIAMwalRenderer.render_morf (SimpleLexArg("się",QUB)))
42 44  
... ... @@ -47,7 +49,7 @@ let rec has_lemma_sie = function
47 49 (* FIXME: naiwnie wierzymy, że jeśli leksem jest opisany semantycznie w walentym to zawiera ramy dla wszystkich sensów *)
48 50 let find_senses t s =
49 51 (*let set = Xlist.fold s.frames StringSet.empty (fun set frame ->
50   - Xlist.fold frame.meanings set (fun set (name,hipero,weight) ->
  52 + Xlist.fold frame.senses set (fun set (name,hipero,weight) ->
51 53 StringSet.add set name)) in*)
52 54 let senses = match t.token with
53 55 Lemma(lemma,pos,_) -> ENIAMplWordnet.find_senses lemma pos
... ... @@ -62,15 +64,15 @@ let find_senses t s =
62 64 | _ -> [] in
63 65 (* let senses_sie = Xlist.fold senses_sie [] (fun senses_sie (name,hipero,weight) ->
64 66 if StringSet.mem set name then senses_sie else (name,hipero,weight) :: senses_sie) in
65   - let frames = if senses = [] then s.frames else {empty_frame with meanings=senses} :: s.frames in
66   - let frames = if senses_sie = [] then frames else {empty_frame with meanings=senses_sie;
  67 + let frames = if senses = [] then s.frames else {empty_frame with senses=senses} :: s.frames in
  68 + let frames = if senses_sie = [] then frames else {empty_frame with senses=senses_sie;
67 69 positions=[{empty_position with role="Lemma"; mode=["lemma"]; morfs=[lex_sie]; is_necessary=Req}]} :: frames in*) (* FIXME: czy to nie usuwa elementów z ramy? *)
68 70 let frames = Xlist.rev_map s.frames (fun f ->
69   - if f.meanings <> [] then f else
  71 + if f.senses <> [] then f else
70 72 if has_lemma_sie f.positions then
71   - if senses_sie = [] then {f with meanings=[ENIAMtokens.get_lemma t.token ^ " się", [], unknown_meaning_weight]} else {f with meanings=senses_sie}
  73 + if senses_sie = [] then {f with senses=[ENIAMtokens.get_lemma t.token ^ " się", [], unknown_sense_weight]} else {f with senses=senses_sie}
72 74 else
73   - if senses = [] then {f with meanings=[ENIAMtokens.get_lemma t.token, [], unknown_meaning_weight]} else {f with meanings=senses}) in
  75 + if senses = [] then {f with senses=[ENIAMtokens.get_lemma t.token, [], unknown_sense_weight]} else {f with senses=senses}) in
74 76 {s with frames=frames}
75 77  
76 78 let find_selprefs schema = (* FIXME: RelationRole *)
... ... @@ -124,6 +126,7 @@ let rec split_tokens_into_groups_paragraph a = function
124 126 Xlist.iter sentences (fun p -> split_tokens_into_groups_sentence a p.sentence)
125 127 | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) ->
126 128 split_tokens_into_groups_paragraph a paragraph)
  129 + | ErrorParagraph s -> ()
127 130  
128 131 let rec split_tokens_into_groups_text a = function
129 132 RawText s -> ()
... ... @@ -172,10 +175,10 @@ let semantize lemma pos (selectors,schema) =
172 175  
173 176 let load_num_sem filename (num_sem,num_sem_args) =
174 177 File.fold_tab filename (num_sem,num_sem_args) (fun (num_sem,num_sem_args) -> function
175   - [lemma;_;nsems;meaning;sem_args] ->
  178 + [lemma;_;nsems;sense;sem_args] ->
176 179 let sem_args = Xstring.split "," sem_args in
177 180 Xlist.fold (Xstring.split "," nsems) num_sem (fun num_sem nsem ->
178   - StringMap.add_inc num_sem lemma [nsem,meaning] (fun l -> (nsem,meaning) ::l)),
  181 + StringMap.add_inc num_sem lemma [nsem,sense] (fun l -> (nsem,sense) ::l)),
179 182 StringMap.add_inc num_sem_args lemma sem_args (fun _ -> failwith "load_num_sem")
180 183 | _ -> failwith "load_num_sem")
181 184  
... ... @@ -208,13 +211,13 @@ let mark_nosem frame =
208 211 let assign_prep_semantics lemma =
209 212 if StringSet.mem ENIAMcategoriesPL.compar_lexemes lemma then
210 213 [{empty_frame with
211   - meanings = [find_prep_meaning lemma [Predef "ALL"]];
  214 + senses = [find_prep_sense lemma [Predef "ALL"]];
212 215 positions= [{empty_position with
213 216 dir=Forward_; gf=CORE;
214 217 morfs=ENIAMwalRenderer.compar_morfs; is_necessary=Req}];
215 218 agf="arg"};
216 219 {empty_frame with
217   - meanings = [find_prep_meaning lemma [Predef "ALL"]];
  220 + senses = [find_prep_sense lemma [Predef "ALL"]];
218 221 positions= [{empty_position with
219 222 sel_prefs=[SynsetName "ALL"]; dir=Forward_; gf=CORE;
220 223 morfs=ENIAMwalRenderer.compar_morfs; is_necessary=Req}];
... ... @@ -223,14 +226,14 @@ let assign_prep_semantics lemma =
223 226 let roles = try StringMap.find ENIAMlexSemanticsData.prep_roles lemma with Not_found -> [] in
224 227 (* Printf.printf "assign_prep_semantics: |roles|=%d\n%!" (Xlist.size roles); *)
225 228 {empty_frame with
226   - meanings = [find_prep_meaning lemma [Predef "ALL"]];
  229 + senses = [find_prep_sense lemma [Predef "ALL"]];
227 230 positions= [{empty_position with
228 231 dir=if lemma="temu" then Backward_ else Forward_; gf=CORE;
229 232 morfs=ENIAMwalRenderer.prep_morfs; is_necessary=Req}];
230 233 agf="arg"} ::
231 234 (if roles = [] then (* FIXME: zaślepka do usunięcia po stworzeniu listy przyimków *)
232 235 [{empty_frame with
233   - meanings = [find_prep_meaning lemma [Predef "ALL"]];
  236 + senses = [find_prep_sense lemma [Predef "ALL"]];
234 237 positions= [{empty_position with
235 238 sel_prefs=[SynsetName "ALL"]; dir=if lemma="temu" then Backward_ else Forward_; gf=CORE;
236 239 morfs=ENIAMwalRenderer.prep_morfs; is_necessary=Req}];
... ... @@ -238,28 +241,28 @@ let assign_prep_semantics lemma =
238 241 else
239 242 Xlist.map roles (function (case,arole,arole_attr,hipero,sel_prefs) ->
240 243 (* Printf.printf "assign_prep_semantics: case=%s arole=%s arole_attr=%s\n%!" case arole arole_attr; *)
241   - let meaning = find_prep_meaning lemma hipero in (* FIXME: zaślepka dla meaning i weight *)
  244 + let sense = find_prep_sense lemma hipero in (* FIXME: zaślepka dla sense i weight *)
242 245 (* print_endline "assign_prep_semantics 1"; *)
243 246 let positions = [{empty_position with
244 247 sel_prefs=sel_prefs; dir=if lemma="temu" then Backward_ else Forward_; gf=CORE;
245 248 morfs=ENIAMwalRenderer.prep_morfs(*ENIAMwalRenderer.assing_prep_morfs (lemma,case)*); is_necessary=Req}] in
246 249 (* print_endline "assign_prep_semantics 2"; *)
247   - {empty_frame with selectors=[ENIAM_LCGlexiconTypes.Case,ENIAM_LCGlexiconTypes.Eq,[case]]; meanings=[meaning]; positions=find_selprefs positions;
  250 + {empty_frame with selectors=[ENIAM_LCGlexiconTypes.Case,ENIAM_LCGlexiconTypes.Eq,[case]]; senses=[sense]; positions=find_selprefs positions;
248 251 arole=arole; arole_attr=arole_attr; arev=false; agf="adjunct"}))
249 252  
250 253 let assign_num_semantics lemma =
251 254 let sems = try StringMap.find !num_sem lemma with Not_found -> [] in
252   - Xlist.map sems (fun (nsem,meaning) ->
253   - let meaning,arole_attr =
254   - if meaning = "" then (lemma, [], unknown_meaning_weight),"Approximate"
255   - else (meaning, [], unknown_meaning_weight),"Exact" in
  255 + Xlist.map sems (fun (nsem,sense) ->
  256 + let sense,arole_attr =
  257 + if sense = "" then (lemma, [], unknown_sense_weight),"Approximate"
  258 + else (sense, [], unknown_sense_weight),"Exact" in
256 259 let arole = match nsem with
257 260 "count" -> "Count"
258 261 | "mass" -> "Measure"
259 262 | _ -> failwith "assign_num_semantics" in
260 263 {empty_frame with
261 264 selectors=[ENIAM_LCGlexiconTypes.Nsem,ENIAM_LCGlexiconTypes.Eq,[nsem]];
262   - meanings=[meaning]; arole=arole; arole_attr=arole_attr; arev=false})
  265 + senses=[sense]; arole=arole; arole_attr=arole_attr; arev=false})
263 266  
264 267 let assign_symb_num_semantics lemma pos =
265 268 let arole_attr = match pos with
... ... @@ -270,14 +273,14 @@ let assign_symb_num_semantics lemma pos =
270 273 | _ -> failwith "assign_symb_num_semantics" in
271 274 [{empty_frame with
272 275 selectors=[ENIAM_LCGlexiconTypes.Nsem,ENIAM_LCGlexiconTypes.Eq,["count"]];
273   - meanings=[lemma, [], unknown_meaning_weight]; arole="Count"; arole_attr=arole_attr; arev=false}]
  276 + senses=[lemma, [], unknown_sense_weight]; arole="Count"; arole_attr=arole_attr; arev=false}]
274 277  
275 278 (*let set_context lemma pos frame =
276 279 if pos = "fin" || pos = "praet" || pos = "winien" || pos = "inf" || pos = "pred" || pos = "impt" || pos = "imps" || pos = "ger" || pos = "pcon" || pos = "pant" then
277 280 [{frame with has_context=true}] else
278 281 if pos = "subst" then
279   - if frame.meanings = [] then failwith "set_context" else
280   - let Xlist.fold frame.meanings (fun -> ) in
  282 + if frame.senses = [] then failwith "set_context" else
  283 + let Xlist.fold frame.senses (fun -> ) in
281 284 else [{frame with has_context=true}](*wydarzenie 1 czynność 1*) (*czynności 1 czyn 1*)*)
282 285  
283 286 let assign_valence tokens lex_sems group =
... ... @@ -302,17 +305,17 @@ let assign_valence tokens lex_sems group =
302 305 let schemata = ENIAMadjuncts.simplify_schemata lexemes pos pos2 lemma schemata1 in
303 306 (* Printf.printf "C %s |schemata|=%d\n" lemma (Xlist.size schemata); *)
304 307 let schemata = Xlist.rev_map schemata (fun (selectors,schema) ->
305   - selectors,["X",["X"]],ENIAMwalRenderer.render_simple_schema schema) in
  308 + selectors,["X",["X"]],(*snode_values,*)ENIAMwalRenderer.render_simple_schema schema) in
306 309 let schemata = List.flatten (Xlist.rev_map schemata (ENIAMadjuncts.add_adjuncts preps compreps compars pos2)) in
307   - let schemata = if schemata = [] then [[],["X",["X"]],[]] else schemata in
  310 + let schemata = if schemata = [] then [[],["X",["X"]],(*snode_values,*)[]] else schemata in
308 311 (* Printf.printf "D %s |schemata|=%d\n" lemma (Xlist.size schemata); *)
309 312 let entries = List.flatten (Xlist.rev_map entries (ENIAMvalence.transform_lex_entry pos lemma)) in
310 313 let entries = Xlist.map entries (fun (selectors,entry) ->
311 314 selectors,ENIAMwalRenderer.render_lex_entry entry) in
312   - let connected = List.flatten (Xlist.map connected (fun (sopinion,fopinion,meanings,neg,pred,aspect,schema1) ->
  315 + let connected = List.flatten (Xlist.map connected (fun (sopinion,fopinion,senses,neg,pred,aspect,schema1) ->
313 316 List.flatten (Xlist.rev_map (ENIAMvalence.transform_entry pos lemma neg pred aspect schema1) (fun (selectors,schema) ->
314 317 Xlist.rev_map (ENIAMvalence.get_aroles schema1 lemma pos) (fun (sel,arole,arole_attr,arev) ->
315   - {selectors=sel @ selectors; meanings=Xlist.map meanings find_meaning; positions=schema;
  318 + {empty_frame with selectors=sel @ selectors; senses=Xlist.map senses find_sense; positions=schema;
316 319 arole=arole; arole_attr=arole_attr; arev=arev; agf=""; rev_hipero=false; sem_args=[]; sopinion=sopinion; fopinion=fopinion}))))) in
317 320 (* Printf.printf "E %s |connected|=%d\n" lemma (Xlist.size connected); *)
318 321 let connected = if connected = [] then List.flatten (Xlist.rev_map (make_unique schemata1) (semantize lemma pos)) else connected in
... ... @@ -344,7 +347,7 @@ let assign_valence tokens lex_sems group =
344 347 let connected = Xlist.rev_map connected mark_nosem in
345 348 let connected = if connected = [] then semantize lemma pos ([],[]) else connected in
346 349 let connected = Xlist.rev_map connected (fun f ->
347   - if f.meanings = [] then {f with meanings=[lemma, ["X",1], unknown_meaning_weight]} else f) in
  350 + if f.senses = [] then {f with senses=[lemma, ["X",1], unknown_sense_weight]} else f) in
348 351 (* let connected = List.flatten (Xlist.rev_map connected (set_context lemma pos)) in *)
349 352 (* Printf.printf "K %s |connected|=%d\n" lemma (Xlist.size connected); *)
350 353 ExtArray.set lex_sems id {(*(ExtArray.get lex_sems id) with*)
... ... @@ -404,11 +407,11 @@ let disambiguate_senses lex_sems group =
404 407 Xlist.iter group (fun id ->
405 408 let t = ExtArray.get lex_sems id in
406 409 ExtArray.set lex_sems id {t with frames=Xlist.map t.frames (fun frame ->
407   - let meanings = Xlist.map frame.meanings (fun (name,hipero,weight) ->
  410 + let senses = Xlist.map frame.senses (fun (name,hipero,weight) ->
408 411 let hipero = Xlist.fold hipero ["ALL",0] (fun hipero (name,cost) ->
409 412 if StringSet.mem prefs name then (name,cost) :: hipero else hipero) in
410 413 name,hipero,weight) in
411   - {frame with meanings=meanings})})
  414 + {frame with senses=senses})})
412 415  
413 416 let remove_unused_tokens tokens groups =
414 417 let set = Xlist.fold groups IntSet.empty (fun set group ->
... ... @@ -459,9 +462,9 @@ let rec create_tokens_for_artificial_nodes_rec tokens lex_sems = function
459 462 Node t ->
460 463 let t = if t.id = 0 then (
461 464 let id = ExtArray.add tokens empty_token_env in
462   - let lex_sem = {empty_lex_sem with frames=[{empty_frame with meanings=[t.lemma, [t.lemma,0], unknown_meaning_weight]}]} in
  465 + let lex_sem = {empty_lex_sem with frames=[{empty_frame with senses=[t.lemma, [t.lemma,0], unknown_sense_weight]}]} in
463 466 let id2 = ExtArray.add lex_sems lex_sem in
464   - if id <>id2 then failwith "create_tokens_for_artificial_nodes_rec" else
  467 + if id <> id2 then failwith "create_tokens_for_artificial_nodes_rec: tokens inconsistent with lex_sems" else
465 468 let t = if t.symbol = Dot then
466 469 {t with symbol = match t.pos with
467 470 "<root>" -> Tuple[Val "<root>"]
... ...
lexSemantics/ENIAMlexSemanticsData.ml
... ... @@ -239,7 +239,7 @@ let qub_roles = Xlist.fold [
239 239 ] StringMap.empty (fun map (k,r,a) -> StringMap.add_inc map k [r,a] (fun l -> (r,a) :: l))
240 240  
241 241  
242   -let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_prefs *)
  242 +let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,sense/hipero,sel_prefs *)
243 243 "od","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
244 244 "spod","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
245 245 "spomiędzy","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
... ...
lexSemantics/ENIAMlexSemanticsHTMLof.ml
... ... @@ -60,14 +60,15 @@ let html_of_lex_sems tokens lex_sems =
60 60 let core = Printf.sprintf "%3d %s %s" id orth lemma in
61 61 let lex_entries = Xlist.map t.lex_entries (fun (selectors,s) ->
62 62 "&emsp;&emsp;[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] " ^ ENIAM_LCGstringOf.grammar_symbol 0 s) in
63   - let schemata = Xlist.map t.schemata (fun (selectors,cat,l) ->
  63 + let schemata = Xlist.map t.schemata (fun (selectors,cat,(*snode,*)l) ->
64 64 "&emsp;&emsp;[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "]" ^
65 65 String.concat "," (Xlist.map cat (fun (m,l) -> m ^ "[" ^ String.concat "," l ^ "]")) ^
  66 + (*String.concat "|" snode ^*)
66 67 " {" ^ String.concat ", " (Xlist.map l (fun (d,s) ->
67 68 ENIAM_LCGstringOf.direction d ^ ENIAM_LCGstringOf.grammar_symbol 0 s)) ^ "}") in
68   - (* let frames = Xlist.map t.frames (fun (selectors,meanings,schema) -> FIXME
  69 + (* let frames = Xlist.map t.frames (fun (selectors,senses,schema) -> FIXME
69 70 "&emsp;&emsp;[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] {" ^ ENIAMwalStringOf.schema schema ^ "} " ^
70   - String.concat ", " (Xlist.map meanings (fun m -> ENIAMwalStringOf.meaning m))) in *)
  71 + String.concat ", " (Xlist.map senses (fun m -> ENIAMwalStringOf.sense m))) in *)
71 72 (String.concat "<br>\n " ([core] @ schemata (*@ frames*) @ lex_entries)) :: l))) ^
72 73 "</P>"
73 74  
... ... @@ -76,7 +77,7 @@ let html_of_lex_sems tokens lex_sems =
76 77 lex_entries: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list *
77 78 ENIAM_LCGtypes.grammar_symbol) list;
78 79 frames: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list *
79   - ENIAMwalTypes.meaning list * ENIAMwalTypes.position list) list;*)
  80 + ENIAMwalTypes.sense list * ENIAMwalTypes.position list) list;*)
80 81  
81 82 let text_and_tokens_and_lex_sems text tokens lex_sems msg =
82 83 if msg = "" then sprintf "%s\n%s<BR>\n%s<BR>\n%s<BR>\n%s\n" html_header
... ...
lexSemantics/ENIAMlexSemanticsStringOf.ml
... ... @@ -40,13 +40,14 @@ let string_of_lex_sems tokens lex_sems =
40 40 let core = Printf.sprintf "%3d %s %s" id orth lemma in
41 41 let lex_entries = Xlist.map t.lex_entries (fun (selectors,s) ->
42 42 "&[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] " ^ ENIAM_LCGstringOf.grammar_symbol 0 s) in
43   - let schemata = Xlist.map t.schemata (fun (selectors,cat,l) ->
  43 + let schemata = Xlist.map t.schemata (fun (selectors,cat,(*snode,*)l) ->
44 44 "[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "]" ^
45 45 String.concat "," (Xlist.map cat (fun (m,l) -> m ^ "[" ^ String.concat "," l ^ "]")) ^
  46 + (*String.concat "|" snode ^*)
46 47 " {" ^ String.concat "," (Xlist.map l (fun (d,s) ->
47 48 ENIAM_LCGstringOf.direction d ^ ENIAM_LCGstringOf.grammar_symbol 0 s)) ^ "}") in
48 49 let frames = Xlist.map t.frames (fun f ->
49 50 "*" ^ arole f ^ "[" ^ ENIAMcategoriesPL.string_of_selectors f.selectors ^ "] {" ^ ENIAMwalStringOf.schema f.positions ^ "} " ^
50   - String.concat "," (Xlist.map f.meanings (fun (sense,hipero,weight) ->
  51 + String.concat "," (Xlist.map f.senses (fun (sense,hipero,weight) ->
51 52 Printf.sprintf "%s[%s]%.2f" sense (String.concat "," (Xlist.map hipero (fun (s,n) -> s ^ " " ^ string_of_int n))) weight))) in
52 53 (String.concat "\n " ([core] @ schemata @ frames @ lex_entries)) :: l)))
... ...
lexSemantics/ENIAMlexSemanticsTypes.ml
... ... @@ -22,7 +22,8 @@ open Xstd
22 22  
23 23 type frame = {
24 24 selectors: (ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list;
25   - meanings: ((*ENIAMwalTypes.meaning **) string * (string * int) list * float) list;
  25 + senses: ((*ENIAMwalTypes.sense **) string * (string * int) list * float) list;
  26 + cats: (string * string list) list;
26 27 positions: ENIAMwalTypes.position list;
27 28 arole: string;
28 29 arole_attr: string;
... ... @@ -30,17 +31,18 @@ type frame = {
30 31 agf: string;
31 32 sem_args: string list;
32 33 rev_hipero: bool;
33   - (* has_context: bool; *)
  34 + (*snode: string list;*)
34 35 sopinion: ENIAMwalTypes.opinion;
35 36 fopinion: ENIAMwalTypes.opinion;
36 37 }
37 38  
38   -let empty_frame = {selectors=[]; meanings=[]; positions=[]; arole=""; arole_attr=""; arev=false; agf=""; sem_args=[]; rev_hipero=false; (*has_context=false;*)
  39 +let empty_frame = {selectors=[]; senses=[]; cats=["X",["X"]]; positions=[]; arole=""; arole_attr=""; arev=false; agf=""; sem_args=[]; rev_hipero=false; (*snode=[];*)
39 40 sopinion=ENIAMwalTypes.Nieokreslony; fopinion=ENIAMwalTypes.Nieokreslony}
40 41  
41 42 type lex_sem = {
42 43 schemata: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list *
43 44 (string * string list) list * (* sensy *)
  45 + (*string list **) (* has_context *)
44 46 (ENIAM_LCGtypes.direction * ENIAM_LCGtypes.grammar_symbol) list) list;
45 47 lex_entries: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list *
46 48 ENIAM_LCGtypes.grammar_symbol) list;
... ... @@ -49,10 +51,10 @@ type lex_sem = {
49 51 }
50 52  
51 53 let empty_lex_sem = {
52   - schemata=[]; lex_entries=[]; frames=[]; (*cats=["X",["X"]]*)}
  54 + schemata=[]; lex_entries=[]; frames=[]}
53 55  
54 56 let hipero_threshold = 3
55   -let unknown_meaning_weight = -1.
  57 +let unknown_sense_weight = -1.
56 58  
57 59 let lu_filename = resource_path ^ "/plWordnet/lu.tab"
58 60 let ex_hipo_filename = resource_path ^ "/plWordnet/ex_hipo.tab"
... ... @@ -61,5 +63,5 @@ let syn_filename = resource_path ^ &quot;/plWordnet/syn.tab&quot;
61 63 let predef_filename = resource_path ^ "/lexSemantics/predef_prefs.tab"
62 64 let proper_classes_filename = resource_path ^ "/lexSemantics/proper_classes.tab"
63 65  
64   -let proj_filename = ENIAMwalTypes.data_path ^ "/projections.tab"
65   -let proper_meanings_filename = ENIAMwalTypes.data_path ^ "/proper_meanings.tab"
  66 +let coercions_filename = ENIAMwalTypes.data_path ^ "/coercions.tab"
  67 +let proper_cats_filename = ENIAMwalTypes.data_path ^ "/proper_cats.tab"
... ...
lexSemantics/ENIAMplWordnet.ml
... ... @@ -155,7 +155,7 @@ let find_proper_senses senses =
155 155 List.flatten (Xlist.rev_map senses (fun sense ->
156 156 try StringMap.find !proper_classes sense with Not_found -> failwith ("find_proper_senses: " ^ sense)))
157 157  
158   -let find_meaning lu_id =
  158 +let find_sense lu_id =
159 159 let lemma,variant,syn_id = IntMap.find !lu_names lu_id in
160 160 lemma ^ "-" ^ variant,
161 161 IntMap.fold (get_hipero syn_id) [] (fun hipero syn_id cost -> (synset_name syn_id, cost) :: hipero),
... ...
lexSemantics/ENIAMvalence.ml
... ... @@ -56,6 +56,7 @@ let transform_gdy = function
56 56 | "imperative" -> [Comp "gdy"]
57 57 | "conditional" -> [Comp "gdyby"]
58 58 | "gerundial" -> [Comp "gdy"]
  59 + | "no-subj" -> [Comp "gdy"]
59 60 | "" -> [Comp "gdy";Comp "gdyby"]
60 61 | s -> failwith ("transform_gdy: " ^ s)
61 62  
... ... @@ -232,9 +233,29 @@ let transform_qub_phrase lemma = function
232 233 | phrase -> failwith ("transform_qub_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase)
233 234  
234 235 let transform_qub_pos lemma = function
235   - | QUB as morf -> [morf]
  236 + | QUB as morf -> [morf]
236 237 | pos -> failwith ("transform_qub_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos)
237 238  
  239 +let transform_interj_phrase lemma = function
  240 + NP(Case "nom") as morf -> [morf]
  241 + | Null -> [Null]
  242 + | phrase -> failwith ("transform_interj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase)
  243 +
  244 +let transform_interj_pos lemma = function
  245 + | pos -> failwith ("transform_interj_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos)
  246 +
  247 +let transform_sinterj_phrase lemma = function
  248 + | phrase -> failwith ("transform_sinterj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase)
  249 +
  250 +let transform_sinterj_pos lemma = function
  251 + | pos -> failwith ("transform_sinterj_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos)
  252 +
  253 +let transform_aglt_phrase lemma = function
  254 + | phrase -> failwith ("transform_aglt_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase)
  255 +
  256 +let transform_aglt_pos lemma = function
  257 + | pos -> failwith ("transform_aglt_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos)
  258 +
238 259 let transform_siebie_phrase lemma = function
239 260 | phrase -> failwith ("transform_siebie_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase)
240 261  
... ... @@ -243,9 +264,9 @@ let transform_siebie_pos lemma = function
243 264 | pos -> failwith ("transform_siebie_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos)
244 265  
245 266 let transform_pers_subj_phrase lemma negation mood = function (* FIXME: prepnp(na,loc) *)
246   - | NP(Str) -> [NP(NomAgr)(*;NumP(NomAgr)*)]
  267 + | NP(Str) -> [NP(NomAgr);NP(VocAgr)(*;NumP(NomAgr)*)]
247 268 | NP(Part) -> [NP(Case "gen");NP(Case "acc")(*;NumP(Case "gen");NumP(Case "acc")*)] (* tylko w 'nalewać', 'nalać', 'ponalewać', 'najechać','uzbierać' *)
248   - | NCP(Str,ctype,comp) -> [NCP(NomAgr,ctype,comp)]
  269 + | NCP(Str,ctype,comp) -> [NCP(NomAgr,ctype,comp);NCP(VocAgr,ctype,comp)]
249 270 | CP(ctype,comp) as morf -> [morf]
250 271 | InfP _ as morf -> [morf]
251 272 | Or as morf -> [morf]
... ... @@ -265,7 +286,7 @@ let transform_pers_subj_pos lemma negation mood = function
265 286 let transform_ger_subj_phrase lemma negation mood control = function
266 287 | NP(Str) -> [NP(Case "gen");PrepNP(Pnosem,"przez",Case "acc")(*;NumP(Case "gen")*)(*;PrepNumP("przez",Case "acc")*)] (* FIXME: czy przez:acc jest możliwe? *)
267 288 | NP(Part) -> [NP(Case "gen")(*;NP(Case "acc")*)(*;NumP(Case "gen");NumP(Case "acc")*)]
268   - | NCP(Str,ctype,comp) -> [NCP(Case "gen",ctype,comp);PrepNCP("przez",Case "acc",ctype,comp)] (* FIXME: czy przez:acc jest możliwe? *)
  289 + | NCP(Str,ctype,comp) -> [NCP(Case "gen",ctype,comp);PrepNCP(Pnosem,"przez",Case "acc",ctype,comp)] (* FIXME: czy przez:acc jest możliwe? *)
269 290 | CP(ctype,comp) as morf -> [morf]
270 291 | InfP _ as morf -> [morf] (* FIXME: czy to jest możliwe? *)
271 292 | Or as morf -> [morf]
... ... @@ -284,7 +305,7 @@ let transform_ger_subj_pos lemma negation mood = function (* FIXME: ADV(_) *)
284 305  
285 306 let transform_ppas_subj_phrase lemma negation mood control = function
286 307 | NP(Str) -> [PrepNP(Pnosem,"przez",Case "acc")(*;PrepNumP("przez",Case "acc")*)]
287   - | NCP(Str,ctype,comp) -> [PrepNCP("przez",Case "acc",ctype,comp)]
  308 + | NCP(Str,ctype,comp) -> [PrepNCP(Pnosem,"przez",Case "acc",ctype,comp)]
288 309 | CP(ctype,comp) as morf -> [morf]
289 310 | Pro -> if control then [Pro] else [Null]
290 311 | morf -> failwith ("transform_ppas_subj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf)
... ... @@ -300,6 +321,7 @@ let transform_pers_phrase lemma negation mood = function
300 321 | AdjP(Str) -> Xlist.map (transform_str mood negation) (fun case -> AdjP case) (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *)
301 322 | AdjP CaseAgr as morf -> if mood = "gerundial" then [AdjP AllAgr] else (failwith ("transform_pers_phrase2: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf))
302 323 | AdjP(Case _) as morf -> [morf] (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *)
  324 + | AdjP(NomAgr) as morf -> if mood = "no-subj" then [AdjP(Case "nom")] else [morf]
303 325 | CP(ctype,comp) as morf -> [morf]
304 326 | PrepNP _ as morf -> [morf]
305 327 | PrepAdjP _ as morf -> [morf] (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *)
... ... @@ -348,7 +370,7 @@ let transform_pers_pos lemma negation mood = function
348 370 let rec transform_comps negation mood = function
349 371 | CP(ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> CP(ctype,comp))
350 372 | NCP(case,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> NCP(case,ctype,comp))
351   - | PrepNCP(prep,case,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> PrepNCP(prep,case,ctype,comp))
  373 + | PrepNCP(psem,prep,case,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> PrepNCP(psem,prep,case,ctype,comp))
352 374 | E phrase -> Xlist.map (transform_comps negation mood phrase) (fun phrase -> E phrase)
353 375 | morf -> [morf]
354 376  
... ... @@ -363,31 +385,31 @@ let transform_preps morf =
363 385 | SimpleLexArg(lex,PREP c) -> if is_compar lex then SimpleLexArg(lex,COMPAR c) else SimpleLexArg(lex,PREP c)
364 386 | PrepNP(psem,prep,c) -> if is_compar prep then ComparP(prep,c) else PrepNP(psem,prep,c)
365 387 | PrepAdjP(prep,c) -> if is_compar prep then ComparP(prep,c) else PrepAdjP(prep,c)
366   - | PrepNCP(prep,case,ctype,comp) as morf -> if is_compar prep then failwith "transform_preps" else morf
  388 + | PrepNCP(psem,prep,case,ctype,comp) as morf -> if is_compar prep then failwith "transform_preps 1" else morf
367 389 | morf -> morf in
368 390 match morf with
369 391 | ComparP(prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> ComparP(prep,Case case))
370   - | ComparP _ -> failwith "transform_preps"
  392 + | ComparP _ -> failwith "transform_preps 2"
371 393 | LexArg(id,lex,COMPAR Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> LexArg(id,lex,COMPAR (Case case)))
372 394 | SimpleLexArg(lex,COMPAR Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> SimpleLexArg(lex,COMPAR (Case case)))
373 395 | LexArg(id,lex,COMPAR (Case _)) as morf -> [morf]
374 396 | SimpleLexArg(lex,COMPAR (Case _)) as morf -> [morf]
375   - | LexArg(id,lex,COMPAR _) -> failwith "transform_preps"
376   - | SimpleLexArg(lex,COMPAR _) -> failwith "transform_preps"
  397 + | LexArg(id,lex,COMPAR _) -> failwith "transform_preps 3"
  398 + | SimpleLexArg(lex,COMPAR _) -> failwith "transform_preps 4"
377 399 | PrepNP(sem,"per",Str) -> [PrepNP(sem,"per",Case "nom");PrepNP(sem,"per",Case "voc")] (* FIXME: voc do poprawienie w leksykonie *)
378 400 | PrepNP(_,_,Case _) as morf -> [morf]
379 401 | PrepAdjP(_,Case _) as morf -> [morf]
380   - | PrepNCP(_,Case _,_,_) as morf -> [morf]
381   - | PrepNP(_,"_",CaseUndef) as morf -> [morf]
382   - | PrepNP _ -> failwith "transform_preps"
383   - | PrepAdjP _ -> failwith "transform_preps"
384   - | PrepNCP _ -> failwith "transform_preps"
  402 + | PrepNCP(_,_,Case _,_,_) as morf -> [morf]
  403 + | PrepNP(_,_,CaseUndef) as morf -> [morf]
  404 + | PrepNP _ as morf -> failwith ("transform_preps 5: " ^ ENIAMwalStringOf.phrase morf)
  405 + | PrepAdjP _ -> failwith "transform_preps 6"
  406 + | PrepNCP _ -> failwith "transform_preps 7"
385 407 | LexArg(id,"w",PREP Str) -> [LexArg(id,"w",PREP (Case "acc"));LexArg(id,"w",PREP (Case "loc"));]
386 408 | SimpleLexArg("w",PREP Str) -> [SimpleLexArg("w",PREP (Case "acc"));SimpleLexArg("w",PREP (Case "loc"))]
387 409 | LexArg(id,lex,PREP (Case _)) as morf -> [morf]
388 410 | SimpleLexArg(lex,PREP (Case _)) as morf -> [morf]
389   - | LexArg(id,lex,PREP _) -> failwith "transform_preps"
390   - | SimpleLexArg(lex,PREP _) -> failwith "transform_preps"
  411 + | LexArg(id,lex,PREP _) -> failwith "transform_preps 8"
  412 + | SimpleLexArg(lex,PREP _) -> failwith "transform_preps 9"
391 413 | morf -> [morf]
392 414  
393 415 let transform_pers_schema lemma negation mood schema =
... ... @@ -445,7 +467,8 @@ let transform_ger_schema lemma negation schema = (* FIXME: zakładam, że ger ze
445 467 | phrase -> transform_pers_phrase lemma negation "gerundial" phrase))})
446 468  
447 469 let transform_ppas_schema lemma negation mood schema =
448   - if not (Xlist.fold schema false (fun b p -> if p.gf = OBJ then true else b)) then raise Not_found else
  470 + if not (Xlist.fold schema false (fun b p -> if p.gf = OBJ then true else b)) then
  471 + (*failwith ("transform_ppas_schema: attempt to make ppas schema for lemma " ^ lemma ^ "without OBJ arg")*)raise Not_found else
449 472 Xlist.map schema (fun s ->
450 473 let morfs = List.flatten (Xlist.map s.morfs (transform_comps negation mood)) in
451 474 let morfs = List.flatten (Xlist.map morfs transform_preps) in
... ... @@ -488,6 +511,9 @@ let transform_schema pos lemma schema =
488 511 | "comp" -> transform_comp_phrase,transform_comp_pos
489 512 | "qub" -> transform_qub_phrase,transform_qub_pos
490 513 | "siebie" -> transform_siebie_phrase,transform_siebie_pos
  514 + | "interj" -> transform_interj_phrase,transform_interj_pos
  515 + | "sinterj" -> transform_sinterj_phrase,transform_interj_pos
  516 + | "aglt" -> transform_aglt_phrase,transform_interj_pos
491 517 | _ -> failwith "transform_schema"
492 518 in
493 519 Xlist.map schema (fun s ->
... ... @@ -524,21 +550,26 @@ let aspect_sel = function
524 550 open ENIAM_LCGlexiconTypes
525 551  
526 552 let transform_entry pos lemma negation pred aspect schema =
527   - if pos = "subst" || pos = "depr" then (
  553 + match pos with
  554 + "subst" |"depr" ->
528 555 if negation <> NegationUndef || pred <> PredFalse || aspect <> AspectUndef then failwith ("transform_entry 1");
529   - [[],transform_schema "subst" lemma schema]) else
530   - if pos = "adj" || pos = "adjc" || pos = "adjp" then (
  556 + [[],transform_schema "subst" lemma schema]
  557 + | "adj" |"adjc" |"adjp" ->
531 558 if negation <> NegationUndef || aspect <> AspectUndef then failwith ("transform_entry 2");
532 559 let sel = match pred with PredTrue -> [Case,Eq,["pred"]] | _ -> [] in
533   - [sel,transform_schema "adj" lemma schema]) else
534   - if pos = "adv" || pos = "prep" || pos = "comprep" || pos = "comp" || pos = "compar" || pos = "qub" || pos = "siebie" then (
  560 + [sel,transform_schema "adj" lemma schema]
  561 + | "adv" | "prep" | "comprep" | "comp" | "compar" | "qub" | "siebie" ->
535 562 if negation <> NegationUndef || (*pred <> PredFalse ||*) aspect <> AspectUndef then failwith ("transform_entry 3"); (* FIXME: typy przysłówków *)
536   - [[],transform_schema pos lemma schema]) else
  563 + [[],transform_schema pos lemma schema]
  564 + | _ ->
537 565 if pred <> PredFalse then failwith ("transform_entry 4") else
538 566 if pos = "num" || pos = "intnum" then (
539 567 if negation <> NegationUndef || aspect <> AspectUndef then failwith ("transform_entry 5");
540 568 Xlist.map ["congr";"rec"] (fun acm ->
541 569 [Acm,Eq,[acm]],transform_num_schema acm schema)) else
  570 + if pos = "interj" then (
  571 + if negation <> NegationUndef || pred <> PredFalse || aspect <> AspectUndef then failwith ("transform_entry 6");
  572 + [[],transform_schema "interj" lemma schema]) else
542 573 List.flatten (Xlist.map (expand_negation negation) (fun negation ->
543 574 let sel = [Negation,Eq,[ENIAMwalStringOf.negation negation]] @ aspect_sel aspect in
544 575 if pos = "fin" || pos = "bedzie" then
... ... @@ -555,7 +586,7 @@ let transform_entry pos lemma negation pred aspect schema =
555 586 [sel @ [Mood,Eq,["indicative"]],transform_pers_schema lemma negation "indicative" schema] else
556 587 if pos = "pcon" || pos = "pant" || pos = "inf" || pos = "pact" then
557 588 (* let role,role_attr = try get_role SUBJ schema with Not_found -> "Initiator","" in *)
558   - [sel, transform_nosubj_schema lemma negation "indicative" schema] else
  589 + [sel, transform_nosubj_schema lemma negation "no-subj" schema] else
559 590 if pos = "ppas" then
560 591 try
561 592 (* let role,role_attr = try get_role OBJ schema with Not_found -> "Theme","" in *)
... ... @@ -563,6 +594,7 @@ let transform_entry pos lemma negation pred aspect schema =
563 594 with Not_found -> [] else
564 595 if pos = "ger" then
565 596 [sel,transform_ger_schema lemma negation schema] else
  597 + if schema = [] then [[],[]] else
566 598 failwith ("transform_entry: " ^ pos)))
567 599  
568 600 let transform_lex_entry pos lemma = function
... ...
lexSemantics/ENIAMwalParser.ml
... ... @@ -256,7 +256,7 @@ let rec parse_phrase = function
256 256 | "comparp",[[Text prep]] -> ComparP(prep,Str)
257 257 | "cp",[ctype;comp] -> CP(parse_ctype ctype,parse_comp comp)
258 258 | "ncp",[case;ctype;comp] -> NCP(parse_case case,parse_ctype ctype,parse_comp comp)
259   - | "prepncp",[[Text prep];case;ctype;comp] -> PrepNCP(prep,parse_case case,parse_ctype ctype,parse_comp comp)
  259 + | "prepncp",[[Text prep];case;ctype;comp] -> PrepNCP(Psem,prep,parse_case case,parse_ctype ctype,parse_comp comp)
260 260 | "infp",[aspect] -> InfP(parse_aspect aspect)
261 261 | "fixed",[[Text lemma]] -> FixedP lemma
262 262 | "fixed",[[Text lemma1];[Text lemma2]] -> FixedP (lemma1 ^ "," ^ lemma2)
... ... @@ -423,43 +423,43 @@ let load_schemata filename =
423 423  
424 424 let load_connected filename =
425 425 let l = File.load_tab filename (function
426   - [pos; lemma; sopinion; fopinion; meanings; neg; pred; aspect; schema] ->
427   - pos, lemma, sopinion, fopinion, meanings, neg, pred, aspect, schema
  426 + [pos; lemma; sopinion; fopinion; senses; neg; pred; aspect; schema] ->
  427 + pos, lemma, sopinion, fopinion, senses, neg, pred, aspect, schema
428 428 | _ -> failwith "load_schemata") in
429   - Xlist.fold l Entries.empty (fun entries (pos,lemma,sopinion,fopinion,meanings,neg,pred,aspect,schema) ->
  429 + Xlist.fold l Entries.empty (fun entries (pos,lemma,sopinion,fopinion,senses,neg,pred,aspect,schema) ->
430 430 let sopinion = parse_opinion sopinion in
431 431 let fopinion = parse_opinion fopinion in
432   - let meanings = Xlist.map (Xstring.split "," meanings) int_of_string in
  432 + let senses = Xlist.map (Xstring.split "," senses) int_of_string in
433 433 let neg = parse_negation [Text neg] in
434 434 let pred = parse_pred pred in
435 435 let aspect = parse_aspect [Text aspect] in
436 436 let schema = parse_connected_schema (split_text schema) in
437   - let entry = sopinion,fopinion,meanings,neg,pred,aspect,schema in
  437 + let entry = sopinion,fopinion,senses,neg,pred,aspect,schema in
438 438 Entries.add_inc entries pos lemma entry)
439 439  
440   -let load_meanings filename =
  440 +let load_senses filename =
441 441 let l = File.load_tab filename (function
442 442 [id; name; variant; plwnluid; gloss] -> {mng_id=int_of_string id;
443 443 name=name;
444 444 variant=variant;
445 445 plwnluid=int_of_string plwnluid;
446 446 gloss=gloss}
447   - | _ -> failwith "load_meaning") in
448   - Xlist.fold l IntMap.empty (fun meanings m ->
449   - IntMap.add meanings m.mng_id m)
  447 + | _ -> failwith "load_sense") in
  448 + Xlist.fold l IntMap.empty (fun senses m ->
  449 + IntMap.add senses m.mng_id m)
450 450  
451 451 let phrases = ref IntMap.empty
452 452 let entries = ref StringMap.empty
453 453 let schemata = ref StringMap.empty
454 454 let connected = ref StringMap.empty
455   -let meanings = ref IntMap.empty
  455 +let senses = ref IntMap.empty
456 456  
457 457 let initialize () =
458 458 phrases := load_phrases phrases_filename;
459 459 entries := load_entries entries_filename;
460 460 schemata := load_schemata schemata_filename;
461 461 connected := load_connected connected_filename;
462   - meanings := load_meanings meanings_filename;
  462 + senses := load_senses senses_filename;
463 463 ()
464 464  
465 465  
... ...
lexSemantics/ENIAMwalReduce.ml
... ... @@ -23,7 +23,7 @@ open Xstd
23 23 let create_phrase_reqs s (reqs,noreqs) = function
24 24 | PrepNP(_,prep,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
25 25 | PrepAdjP(prep,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
26   - | PrepNCP(prep,_,_,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
  26 + | PrepNCP(_,prep,_,_,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
27 27 | ComparP(prep,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
28 28 | FixedP(prep) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
29 29 | SimpleLexArg(lex,_) -> StringMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs
... ... @@ -34,7 +34,7 @@ let create_phrase_reqs s (reqs,noreqs) = function
34 34 let create_phrase_reqs2 s (reqs,noreqs) = function
35 35 | PrepNP(_,prep,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
36 36 | PrepAdjP(prep,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
37   - | PrepNCP(prep,_,_,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
  37 + | PrepNCP(_,prep,_,_,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
38 38 | ComparP(prep,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
39 39 | FixedP(prep) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
40 40 | SimpleLexArg(lex,_) -> IntMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs
... ... @@ -90,6 +90,7 @@ let select_comprep_adjuncts lexemes =
90 90 not (StringSet.is_empty (StringSet.intersection reqs lexemes)) then s :: l else l)
91 91 with Not_found -> l)
92 92  
  93 +(* FIXME: trzeba zanalizować interację tej procedury z Pro w schemacie w wersji z walentym i z semantyką dziedzinową *)
93 94 let set_necessary pos schema =
94 95 Xlist.map schema (fun p ->
95 96 let nec =
... ... @@ -101,6 +102,8 @@ let set_necessary pos schema =
101 102 | _ -> b) then Req else
102 103 if p.gf <> SUBJ && p.cr = [] (*&& p.ce = []*) then Opt else
103 104 if p.gf = SUBJ && pos = "impt" then ProNG else
  105 + if p.gf = SUBJ && pos = "pact" then Opt else
  106 + if p.gf = OBJ && pos = "ppas" then Opt else
104 107 if Xlist.fold p.morfs false (fun b -> function
105 108 NP NomAgr -> true
106 109 | NCP(NomAgr,_,_) -> true
... ... @@ -126,7 +129,7 @@ let reduce_phrase (test_comprep_reqs,test_comprep_reqs2,test_lexarg_reqs,test_le
126 129 | ComparP(prep,case) as phrase -> if test_lexemes prep then phrase else raise Not_found
127 130 | CP(ctype,comp) -> CP(ctype,reduce_comp test_lexemes comp)
128 131 | NCP(case,ctype,comp) -> if test_lexemes "to" then NCP(case,ctype,reduce_comp test_lexemes comp) else raise Not_found
129   - | PrepNCP(prep,case,ctype,comp) -> if test_lexemes prep && test_lexemes "to" then PrepNCP(prep,case,ctype,reduce_comp test_lexemes comp) else raise Not_found
  132 + | PrepNCP(psem,prep,case,ctype,comp) -> if test_lexemes prep && test_lexemes "to" then PrepNCP(psem,prep,case,ctype,reduce_comp test_lexemes comp) else raise Not_found
130 133 | SimpleLexArg(lemma,_) as phrase -> if test_lexemes lemma then phrase else raise Not_found
131 134 | LexArg(id,lemma,_) as phrase -> if test_lexemes lemma && test_lexarg_reqs id then phrase else raise Not_found
132 135 | FixedP lemma as phrase -> if test_lexemes lemma then phrase else raise Not_found
... ... @@ -168,11 +171,11 @@ let merge_entries phrases entries =
168 171 Entries.map entries (fun _ _ (opinion,neg,pred,aspect,schema) ->
169 172 opinion,neg,pred,aspect,merge_schema phrases schema)
170 173  
171   -let merge_entries_conn phrases meanings entries =
172   - Entries.map entries (fun _ _ (sopinion,fopinion,meaning_ids,neg,pred,aspect,schema) ->
173   - let meanings = Xlist.map meaning_ids (fun id ->
174   - try IntMap.find meanings id with Not_found -> failwith "merge_entries_conn") in
175   - sopinion,fopinion,meanings,neg,pred,aspect,merge_schema phrases schema)
  174 +let merge_entries_conn phrases senses entries =
  175 + Entries.map entries (fun _ _ (sopinion,fopinion,sense_ids,neg,pred,aspect,schema) ->
  176 + let senses = Xlist.map sense_ids (fun id ->
  177 + try IntMap.find senses id with Not_found -> failwith "merge_entries_conn") in
  178 + sopinion,fopinion,senses,neg,pred,aspect,merge_schema phrases schema)
176 179  
177 180 let create_tests comprep_reqs comprep_reqs2 lexarg_reqs lexemes =
178 181 let lexemes = StringSet.add (StringSet.add lexemes "_") "" in
... ... @@ -191,7 +194,7 @@ let create_tests comprep_reqs comprep_reqs2 lexarg_reqs lexemes =
191 194 StringSet.mem lexemes
192 195  
193 196  
194   -let select_entries_full phrases entries schemata connected meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes =
  197 +let select_entries_full phrases entries schemata connected senses comprep_reqs comprep_reqs2 lexarg_reqs lexemes =
195 198 let tests = create_tests comprep_reqs comprep_reqs2 lexarg_reqs lexemes in
196 199 let entries = reduce_entries lexemes entries in
197 200 let schemata = reduce_entries lexemes schemata in
... ... @@ -205,25 +208,25 @@ let select_entries_full phrases entries schemata connected meanings comprep_reqs
205 208 with ImpossibleSchema -> []) in
206 209 let schemata = Entries.map schemata (fun _ _ (opinion,neg,pred,aspect,schema) ->
207 210 opinion,neg,pred,aspect,reduce_schema2 tests schema) in
208   - let connected = merge_entries_conn phrases meanings connected in
209   - let connected = Entries.map connected (fun _ _ (sopinion,fopinion,meaning_ids,neg,pred,aspect,schema) ->
210   - sopinion,fopinion,meaning_ids,neg,pred,aspect,reduce_schema2 tests schema) in
  211 + let connected = merge_entries_conn phrases senses connected in
  212 + let connected = Entries.map connected (fun _ _ (sopinion,fopinion,sense_ids,neg,pred,aspect,schema) ->
  213 + sopinion,fopinion,sense_ids,neg,pred,aspect,reduce_schema2 tests schema) in
211 214 entries,schemata,connected
212 215  
213   -let select_all_entries phrases entries schemata connected meanings =
  216 +let select_all_entries phrases entries schemata connected senses =
214 217 let schemata = merge_entries phrases schemata in
215   - let connected = merge_entries_conn phrases meanings connected in
  218 + let connected = merge_entries_conn phrases senses connected in
216 219 entries,schemata,connected
217 220  
218 221 let select_entries lexemes =
219 222 select_entries_full !ENIAMwalParser.phrases !ENIAMwalParser.entries !ENIAMwalParser.schemata
220   - !ENIAMwalParser.connected !ENIAMwalParser.meanings !comprep_reqs !comprep_reqs2 !lexarg_reqs lexemes
  223 + !ENIAMwalParser.connected !ENIAMwalParser.senses !comprep_reqs !comprep_reqs2 !lexarg_reqs lexemes
221 224  
222 225 (* let entries,schemata,connected =
223 226 (* let lexemes = StringSet.of_list ["Ala"; "ma"; "kot"] in *)
224 227 let lexemes = StringSet.of_list ["dorastać"; "dorobić"; "po"; "bok"; "na"] in
225 228 select_entries ENIAMwalParser.phrases ENIAMwalParser.entries ENIAMwalParser.schemata
226   - ENIAMwalParser.connected ENIAMwalParser.meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes *)
  229 + ENIAMwalParser.connected ENIAMwalParser.senses comprep_reqs comprep_reqs2 lexarg_reqs lexemes *)
227 230  
228 231 (* let _ =
229 232 StringMap.iter comprep_reqs (fun s set ->
... ...
lexSemantics/ENIAMwalRenderer.ml
... ... @@ -53,7 +53,7 @@ let render_pos_entry = function
53 53 | "ppas" -> [Atom "ppas"; AVar "number"; AVar "case"; AVar "gender"; AVar "negation"]
54 54 | "inf" -> [Atom "inf"; AVar "aspect"; AVar "negation"]
55 55 | "qub" -> [Atom "qub"]
56   - | "compar" -> [Atom "compar"; AVar "case"]
  56 + | "compar" -> [Atom "comparp"; AVar "case"]
57 57 | "comp" -> [Atom "comp"; AVar "ctype"]
58 58 | "fin" -> [Atom "pers"; AVar "negation"]
59 59 | "praet" -> [Atom "pers"; AVar "negation"]
... ... @@ -117,7 +117,7 @@ let render_phrase = function
117 117 | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Atom case]
118 118 | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case]
119 119 | AdjP(Case case) -> Tensor[Atom "adjp"; Top; Atom case; Top]
120   -(* | AdjP NomAgr -> Tensor[Atom "adjp"; AVar "number"; Atom "nom"; AVar "gender"]*)
  120 + | AdjP NomAgr -> Tensor[Atom "adjp"; AVar "number"; Atom "nom"; AVar "gender"]
121 121 | AdjP AllAgr -> Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]
122 122 (* | AdjP CaseAgr -> Tensor[Atom "adjp"; Top; AVar "case"; Top]
123 123 | PrepAdjP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]*)
... ... @@ -130,7 +130,7 @@ let render_phrase = function
130 130 | PrepNumP(_,prep,Case case) -> Tensor[Atom "prepnump"; Atom prep; Atom case] *)
131 131 (* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top]*)
132 132 | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom prep]
133   - | ComparP(prep,Case case) -> Tensor[Atom "compar"; Atom prep; Atom case]
  133 + | ComparP(prep,Case case) -> Tensor[Atom "comparp"; Atom prep; Atom case]
134 134 (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep] *)
135 135 (* | IP -> Tensor[Atom "ip";Top;Top;Top] *)
136 136 | CP (ctype,Comp comp) -> Tensor[Atom "cp"; arg_of_ctype ctype; Atom comp]
... ... @@ -139,8 +139,10 @@ let render_phrase = function
139 139 | NCP(Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top]
140 140 | NCP(NomAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp]
141 141 | NCP(NomAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top]
142   - | PrepNCP(prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp]
143   - | PrepNCP(prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom prep; Atom case; Top; Top]
  142 + | PrepNCP(Psem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp]
  143 + | PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top]
  144 + | PrepNCP(Pnosem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp]
  145 + | PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top]
144 146 | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom aspect]
145 147 | InfP AspectUndef -> Tensor[Atom "infp"; Top]
146 148 (* | PadvP -> Tensor[Atom "padvp"] *)
... ... @@ -171,54 +173,64 @@ let render_phrase = function
171 173 | E (PrepNP(Pnosem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case]
172 174 | E (NP(Case case)) -> Tensor[Atom "np"; Top; Atom case; Top; Top]
173 175 | E (NCP(Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top]
174   - | E (PrepNCP(prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom prep; Atom case; Top; Top]
  176 + | E (PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top]
  177 + | E (PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top]
175 178 | phrase -> failwith ("render_phrase: " ^ ENIAMwalStringOf.phrase phrase)
176 179  
177   -let render_phrase_cat cat = function
178   - NP(Case case) -> Tensor[Atom "np"; Atom cat; Top; Atom case; Top; Top]
179   - | NP NomAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"]
180   -(* | NP GenAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"]
181   - | NP AllAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]*)
182   - | NP CaseAgr -> Tensor[Atom "np"; Atom cat; Top; AVar "case"; Top; Top]
183   - | NP CaseUndef -> Tensor[Atom "np"; Atom cat; Top; Top; Top; Top]
184   - | PrepNP(Psem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top]
185   - | PrepNP(Psem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top]
186   - | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case]
187   - | PrepNP(Pnosem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top]
188   - | PrepNP(Pnosem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top]
189   - | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case]
190   - | AdjP(Case case) -> Tensor[Atom "adjp"; Atom cat; Top; Atom case; Top]
191   -(* | AdjP NomAgr -> Tensor[Atom "adjp"; AVar "number"; Atom "nom"; AVar "gender"]*)
192   - | AdjP AllAgr -> Tensor[Atom "adjp"; Atom cat; AVar "number"; AVar "case"; AVar "gender"]
193   -(* | AdjP CaseAgr -> Tensor[Atom "adjp"; Top; AVar "case"; Top]
194   - | PrepAdjP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]*)
195   - | PrepAdjP(prep,Case case) -> Tensor[Atom "prepadjp"; Atom cat; Atom prep; Atom case]
196   - (* | NumP(Case case) -> Tensor[Atom "nump"; Top; Atom case; Top; Top]
  180 +let render_phrase_cat cat role node = function
  181 + NP(Case case) -> Tensor[Atom "np"; Top; Atom case; Top; Top; Atom cat; Atom role; Atom node]
  182 + | NP NomAgr -> Tensor[Atom "np"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Atom cat; Atom role; Atom node]
  183 + | NP VocAgr -> Tensor[Atom "np"; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; Atom cat; Atom role; Atom node]
  184 +(* | NP GenAgr -> Tensor[Atom "np"; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"; Atom cat; Atom role; Atom node]
  185 + | NP AllAgr -> Tensor[Atom "np"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"; Atom cat; Atom role; Atom node]*)
  186 + | NP CaseAgr -> Tensor[Atom "np"; Top; AVar "case"; Top; Top; Atom cat; Atom role; Atom node]
  187 + | NP CaseUndef -> Tensor[Atom "np"; Top; Top; Top; Top; Atom cat; Atom role; Atom node]
  188 + | PrepNP(Psem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom "sem"; Top; Top; Atom cat; Atom role; Atom node]
  189 + | PrepNP(Psem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom "sem"; Top; Top; Atom cat; Atom role; Atom node]
  190 + | PrepNP(Psem,"_",Case case) -> Tensor[Atom "prepnp"; Atom "sem"; Top; Atom case; Atom cat; Atom role; Atom node]
  191 + | PrepNP(Psem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Top; Atom cat; Atom role; Atom node]
  192 + | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Atom case; Atom cat; Atom role; Atom node]
  193 + | PrepNP(Pnosem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom "nosem"; Top; Top; Atom cat; Atom role; Atom node]
  194 + | PrepNP(Pnosem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom "nosem"; Top; Top; Atom cat; Atom role; Atom node]
  195 + | PrepNP(Pnosem,"_",Case case) -> Tensor[Atom "prepnp"; Atom "nosem"; Top; Atom case; Atom cat; Atom role; Atom node]
  196 + | PrepNP(Pnosem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Top; Atom cat; Atom role; Atom node]
  197 + | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case; Atom cat; Atom role; Atom node]
  198 + | AdjP(Case case) -> Tensor[Atom "adjp"; Top; Atom case; Top; Top; Atom cat; Atom role; Atom node]
  199 + | AdjP NomAgr -> Tensor[Atom "adjp"; AVar "number"; Atom "nom"; AVar "gender"; Top; Atom cat; Atom role; Atom node]
  200 + | AdjP AllAgr -> Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"; Top; Atom cat; Atom role; Atom node]
  201 +(* | AdjP CaseAgr -> Tensor[Atom "adjp"; Top; AVar "case"; Top; Top; Atom cat; Atom role; Atom node]
  202 + | PrepAdjP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top; Atom cat; Atom role; Atom node]*)
  203 + | PrepAdjP(prep,Case case) -> Tensor[Atom "prepadjp"; Atom prep; Atom case; Atom cat; Atom role; Atom node]
  204 + (* | NumP(Case case) -> Tensor[Atom "nump"; Top; Atom case; Top; Atom node]
197 205 | NumP NomAgr -> Tensor[Atom "nump"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"]
198 206 | NumP CaseAgr -> Tensor[Atom "nump"; Top; AVar "case"; Top; Top]
199 207 | NumP CaseUndef -> Tensor[Atom "nump"; Top; Top; Top; Top]
200 208 | PrepNumP(_,"",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]
201 209 | PrepNumP(_,prep,Case case) -> Tensor[Atom "prepnump"; Atom prep; Atom case] *)
202   -(* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top]*)
203   - | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom cat; Atom prep]
204   - | ComparP(prep,Case case) -> Tensor[Atom "compar"; Atom cat; Atom prep; Atom case]
205   - (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep] *)
206   - (* | IP -> Tensor[Atom "ip";Top;Top;Top] *)
207   - | CP (ctype,Comp comp) -> Tensor[Atom "cp"; Atom cat; arg_of_ctype ctype; Atom comp]
208   - (* | CP (ctype,CompUndef) -> Tensor[Atom "cp"; arg_of_ctype ctype; Top]*)
209   - | NCP(Case case,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; arg_of_ctype ctype; Atom comp]
210   - | NCP(Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; Top; Top]
211   - | NCP(NomAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp]
212   - | NCP(NomAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top]
213   - | PrepNCP(prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom cat; Atom prep; Atom case; arg_of_ctype ctype; Atom comp]
214   - | PrepNCP(prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom cat; Atom prep; Atom case; Top; Top]
215   - | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom cat; Atom aspect]
216   - | InfP AspectUndef -> Tensor[Atom "infp"; Atom cat; Top]
217   - (* | PadvP -> Tensor[Atom "padvp"] *)
218   - | AdvP "misc" -> Tensor[Atom "advp"; Atom cat; Top] (* FIXME: a może Atom "mod" zamiast Top *)
219   - | AdvP "" -> Tensor[Atom "advp"; Atom cat; Top] (* FIXME: a może Atom "mod" zamiast Top *)
220   - | AdvP mode -> Tensor[Atom "advp"; Atom cat; Atom mode]
221   - | ColonP -> Tensor[Atom "colonp"; Atom cat]
  210 +(* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top; Atom cat; Atom role; Atom node]*)
  211 + | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom prep; Atom cat; Atom role; Atom node]
  212 + | ComparP(prep,Case case) -> Tensor[Atom "comparp"; Atom prep; Atom case; Atom cat; Atom role; Atom node]
  213 + (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep; Atom cat; Atom role; Atom node] *)
  214 + (* | IP -> Tensor[Atom "ip";Top;Top;Top; Atom cat; Atom role; Atom node] *)
  215 + | CP (ctype,Comp comp) -> Tensor[Atom "cp"; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Atom node]
  216 + (* | CP (ctype,CompUndef) -> Tensor[Atom "cp"; arg_of_ctype ctype; Top; Atom cat; Atom role; Atom node]*)
  217 + | NCP(Case case,ctype,Comp comp) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Atom node]
  218 + | NCP(Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top; Atom cat; Atom role; Atom node]
  219 + | NCP(NomAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Atom node]
  220 + | NCP(NomAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top; Atom cat; Atom role; Atom node]
  221 + | NCP(VocAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Atom node]
  222 + | NCP(VocAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; Top; Top; Atom cat; Atom role; Atom node]
  223 + | PrepNCP(Psem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Atom node]
  224 + | PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top; Atom cat; Atom role; Atom node]
  225 + | PrepNCP(Pnosem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Atom node]
  226 + | PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top; Atom cat; Atom role; Atom node]
  227 + | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom aspect; Atom cat; Atom role; Atom node]
  228 + | InfP AspectUndef -> Tensor[Atom "infp"; Top; Atom cat; Atom role; Atom node]
  229 + (* | PadvP -> Tensor[Atom "padvp"; Atom cat; Atom role; Atom node] *)
  230 + | AdvP "misc" -> Tensor[Atom "advp"; Top; Atom cat; Atom role; Atom node] (* FIXME: a może Atom "mod" zamiast Top *)
  231 + | AdvP "" -> Tensor[Atom "advp"; Top; Atom cat; Atom role; Atom node] (* FIXME: a może Atom "mod" zamiast Top *)
  232 + | AdvP mode -> Tensor[Atom "advp"; Top; Atom cat; Atom role; Atom node]
  233 + | ColonP -> Tensor[Atom "colonp"; Atom cat; Atom cat; Atom role; Atom node]
222 234 (* | PrepP -> Tensor[Atom "prepp";Top]
223 235 | Prep("",CaseAgr) -> Tensor[Atom "prep"; Top; AVar "case"]
224 236 | Prep("",CaseUAgr) -> Tensor[Atom "prep"; Top; AVar "ucase"]
... ... @@ -233,14 +245,15 @@ let render_phrase_cat cat = function
233 245 | AuxImp -> Tensor[Atom "aux-imp"]
234 246 | Pro -> One
235 247 | ProNG -> One *)
236   - | E (CP(CompTypeUndef,CompUndef)) -> Tensor[Atom "cp"; Atom cat; Top; Top]
237   - | E (NCP(NomAgr,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top]
238   - | E (NP(NomAgr)) -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"]
239   - | E (PrepNP(Psem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case]
240   - | E (PrepNP(Pnosem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case]
241   - | E (NP(Case case)) -> Tensor[Atom "np"; Atom cat; Top; Atom case; Top; Top]
242   - | E (NCP(Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; Top; Top]
243   - | E (PrepNCP(prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom cat; Atom prep; Atom case; Top; Top]
  248 + | E (CP(CompTypeUndef,CompUndef)) -> Tensor[Atom "cp"; Top; Top; Atom cat; Atom role; Atom node]
  249 + | E (NCP(NomAgr,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top; Atom cat; Atom role; Atom node]
  250 + | E (NP(NomAgr)) -> Tensor[Atom "np"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Atom cat; Atom role; Atom node]
  251 + | E (PrepNP(Psem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Atom case; Atom cat; Atom role; Atom node]
  252 + | E (PrepNP(Pnosem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case; Atom cat; Atom role; Atom node]
  253 + | E (NP(Case case)) -> Tensor[Atom "np"; Top; Atom case; Top; Top; Atom cat; Atom role; Atom node]
  254 + | E (NCP(Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top; Atom cat; Atom role; Atom node]
  255 + | E (PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top; Atom cat; Atom role; Atom node]
  256 + | E (PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top; Atom cat; Atom role; Atom node]
244 257 | phrase -> failwith ("render_phrase_cat: " ^ ENIAMwalStringOf.phrase phrase)
245 258  
246 259 let render_morf = function
... ... @@ -251,7 +264,7 @@ let render_morf = function
251 264 | SimpleLexArg(lex,pos) -> Tensor([Atom "lex";Atom lex] @ render_pos pos)
252 265 | phrase -> render_phrase phrase
253 266  
254   -let render_morf_cat cats = function
  267 +let render_morf_cat cats role node = function
255 268 | Null -> [One]
256 269 | Pro -> [One]
257 270 | ProNG -> [One]
... ... @@ -261,13 +274,13 @@ let render_morf_cat cats = function
261 274 (* | X -> Tensor[Atom "X"]
262 275 | Lex lex -> Tensor[Atom lex] *)
263 276 | LexArg(id,lex,pos) -> [Tensor([Atom "lex";Atom (string_of_int id);Atom lex] @ render_pos pos)]
264   - | SimpleLexArg(lex,pos) -> [Tensor([Atom "lex";Atom lex] @ render_pos pos)]
265   - | phrase -> Xlist.map cats (fun cat -> render_phrase_cat cat phrase)
  277 + | SimpleLexArg(lex,pos) -> [Tensor([Atom "lex";Atom lex] @ render_pos pos @ [Atom role; Atom node])]
  278 + | phrase -> Xlist.map cats (fun cat -> render_phrase_cat cat role node phrase)
266 279  
267   -let extract_sel_prefs sel_prefs =
  280 +(* let extract_sel_prefs sel_prefs =
268 281 Xlist.map sel_prefs (function
269 282 SynsetName s -> s
270   - | _ -> failwith "extract_sel_prefs")
  283 + | _ -> failwith "extract_sel_prefs") *)
271 284  
272 285 let render_schema schema =
273 286 Xlist.map schema (fun p ->
... ... @@ -283,7 +296,7 @@ let translate_dir = function
283 296  
284 297 let render_schema_cat schema =
285 298 Xlist.map schema (fun p ->
286   - match List.flatten (Xlist.map p.morfs (render_morf_cat (extract_sel_prefs p.sel_prefs))) with
  299 + match List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs p.role p.node)) with
287 300 [] -> failwith "render_schema"
288 301 | [s] -> translate_dir p.dir,s
289 302 | l -> translate_dir p.dir,Plus l)
... ... @@ -298,7 +311,8 @@ let render_connected_schema schema =
298 311  
299 312 let render_connected_schema_cat schema =
300 313 Xlist.map schema (fun p ->
301   - {p with morfs=Xlist.map (List.flatten (Xlist.map p.morfs (render_morf_cat (extract_sel_prefs p.sel_prefs)))) (fun morf -> LCG morf)})
  314 + {p with
  315 + morfs=Xlist.map (List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs p.role p.node))) (fun morf -> LCG morf)})
302 316  
303 317 (* FIXME: tu trzeba by dodać zwykłe reguły dla czasowników dotyczące ich negacji, aglutynatu itp. *)
304 318 let render_lex_entry = function
... ... @@ -353,9 +367,9 @@ let render_connected_prepadjp prep cases =
353 367 adjunct (postp @ (Xlist.map cases (fun case ->
354 368 Tensor[Atom "prepadjp"; Atom prep; Atom case])))
355 369  
356   -let render_compar prep = Both,Plus[One;Tensor[Atom "compar"; Atom prep; Top]]
  370 +let render_compar prep = Both,Plus[One;Tensor[Atom "comparp"; Atom prep; Top]]
357 371  
358   -let render_connected_compar prep = adjunct [Tensor[Atom "compar"; Atom prep; Top]]
  372 +let render_connected_compar prep = adjunct [Tensor[Atom "comparp"; Atom prep; Top]]
359 373  
360 374 let verb_adjuncts_simp = [
361 375 Both, Plus[One;Tensor[Atom "advp"; Atom "pron"]];
... ...
lexSemantics/ENIAMwalRenderer_old.ml 0 → 100644
  1 +(*
  2 + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
  5 + *
  6 + * This library is free software: you can redistribute it and/or modify
  7 + * it under the terms of the GNU Lesser General Public License as published by
  8 + * the Free Software Foundation, either version 3 of the License, or
  9 + * (at your option) any later version.
  10 + *
  11 + * This library is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 + * GNU Lesser General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU Lesser General Public License
  17 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18 + *)
  19 +
  20 +open ENIAM_LCGtypes
  21 +open ENIAMwalTypes
  22 +
  23 +let arg_of_ctype = function
  24 + Int -> Atom "int"
  25 + | Rel -> Atom "rel"
  26 + (* | Sub -> LCGtypes.Atom "sub"
  27 + | Coord -> LCGtypes.Atom "coord" *)
  28 + | CompTypeUndef -> Top
  29 + (* | CompTypeAgr -> LCGtypes.AVar "ctype" *)
  30 +
  31 +let render_number = function
  32 + Number n -> Atom n
  33 + | NumberUndef -> Top
  34 + | NumberAgr -> Top
  35 +
  36 +let render_negation = function
  37 + Negation -> Atom "neg"
  38 + | Aff -> Atom "aff"
  39 + | NegationUndef -> Top
  40 +
  41 +let render_pos_entry = function
  42 + "subst" -> [Atom "subst"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]
  43 + | "ppron12" -> [Atom "ppron12"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]
  44 + | "ppron3" -> [Atom "ppron3"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]
  45 + | "siebie" -> [Atom "siebie"; AVar "case"]
  46 + | "num" -> [Atom "num"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]
  47 + | "intnum" -> [Atom "num"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]
  48 + | "prep" -> [Atom "prep"; AVar "case"]
  49 + | "adj" -> [Atom "adj"; AVar "number"; AVar "case"; AVar "gender"; AVar "grad"]
  50 + | "adv" -> [Atom "adv"; AVar "grad"]
  51 + | "ger" -> [Atom "ger"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"; AVar "negation"]
  52 + | "pact" -> [Atom "pact"; AVar "number"; AVar "case"; AVar "gender"; AVar "negation"]
  53 + | "ppas" -> [Atom "ppas"; AVar "number"; AVar "case"; AVar "gender"; AVar "negation"]
  54 + | "inf" -> [Atom "inf"; AVar "aspect"; AVar "negation"]
  55 + | "qub" -> [Atom "qub"]
  56 + | "compar" -> [Atom "compar"; AVar "case"]
  57 + | "comp" -> [Atom "comp"; AVar "ctype"]
  58 + | "fin" -> [Atom "pers"; AVar "negation"]
  59 + | "praet" -> [Atom "pers"; AVar "negation"]
  60 + | "pred" -> [Atom "pers"; AVar "negation"]
  61 + | "winien" -> [Atom "pers"; AVar "negation"]
  62 + | "bedzie" -> [Atom "pers"; AVar "negation"]
  63 + | s -> failwith ("render_pos_entry: " ^ s)
  64 +
  65 +let render_pos = function (* wprowadzam uzgodnienia a nie wartości cech, bo wartości cech są wprowadzane przez leksem a uzgodnienia wiążą je z wartościami u nadrzędnika *)
  66 + | SUBST(number,Case case) -> [Atom "subst"; render_number number; Atom case; Top; Top]
  67 + | SUBST(_,NomAgr) -> [Atom "subst"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"]
  68 + | SUBST(_,GenAgr) -> [Atom "subst"; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"]
  69 + | SUBST(_,AllAgr) -> [Atom "subst"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]
  70 + | SUBST(number,CaseAgr) -> [Atom "subst"; render_number number; AVar "case"; Top; Top]
  71 + | SUBST(_,CaseUndef) -> [Atom "subst"; Top; Top; Top; Top]
  72 + | PPRON12(number,Case case) -> [Atom "ppron12"; render_number number; Atom case; Top; Top]
  73 + | PPRON3(number,Case case) -> [Atom "ppron3"; render_number number; Atom case; Top; Top]
  74 + | SIEBIE(Case case) -> [Atom "siebie"; Atom case]
  75 + | NUM(Case case,_) -> [Atom "num"; Top; Atom case; Top; Top]
  76 + | NUM(NomAgr,_) -> [Atom "num"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"]
  77 +(* | NUM(CaseAgr,_) -> [Atom "num"; Top; AVar "case"; Top; Top]
  78 + | NUM(CaseUndef,_) -> [Atom "num"; Top; Top; Top; Top]*)
  79 + | PREP(Case case) -> [Atom "prep"; Atom case]
  80 + | ADJ(_,Case case,_,Grad grad) -> [Atom "adj"; Top; Atom case; Top; Atom grad]
  81 +(* | ADJ(_,NomAgr,_,_) -> [Atom "adj"; AVar "number"; Atom "nom"; AVar "gender"]
  82 + | ADJ(_,CaseAgr,_,_) -> [Atom "adj"; Top; AVar "case"; Top]*)
  83 + | ADJ(_,CaseUndef,_,Grad grad) -> [Atom "adj"; Top; Top; Top; Atom grad]
  84 + | ADJ(_,AllAgr,_,Grad grad) -> [Atom "adj"; AVar "number"; AVar "case"; AVar "gender"; Atom grad]
  85 + | ADJ(_,AllAgr,_,GradUndef) -> [Atom "adj"; AVar "number"; AVar "case"; AVar "gender"; Top]
  86 + | ADV (Grad grad) -> [Atom "adv"; Atom grad]
  87 + | ADV GradUndef -> [Atom "adv"; Top]
  88 + | GER(_,Case case,_,_,neg) -> [Atom "ger"; Top; Atom case; Top; Top; render_negation neg]
  89 +(* | GER(_,NomAgr,_,_,_) -> [Atom "ger"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"]
  90 + | GER(_,CaseAgr,_,_,_) -> [Atom "ger"; Top; AVar "case"; Top; Top]
  91 + | GER(_,CaseUndef,_,_,_) -> [Atom "ger"; Top; Top; Top; Top]
  92 + | PACT(_,Case case,_,_,_) -> [Atom "pact"; Top; Atom case; Top]
  93 + | PACT(_,NomAgr,_,_,_) -> [Atom "pact"; AVar "number"; Atom "nom"; AVar "gender"]*)
  94 + | PACT(_,AllAgr,_,_,neg) -> [Atom "pact"; AVar "number"; AVar "case"; AVar "gender"; render_negation neg]
  95 +(* | PACT(_,CaseAgr,_,_,_) -> [Atom "pact"; Top; AVar "case"; Top]*)
  96 + | PPAS(_,Case case,_,_,neg) -> [Atom "ppas"; Top; Atom case; Top; render_negation neg]
  97 + | PPAS(_,CaseUndef,_,_,neg) -> [Atom "ppas"; Top; Top; Top; render_negation neg]
  98 + (* | PPAS(_,NomAgr,_,_,_) -> [Atom "ppas"; AVar "number"; Atom "nom"; AVar "gender"]*)
  99 + | PPAS(_,AllAgr,_,_,neg) -> [Atom "ppas"; AVar "number"; AVar "case"; AVar "gender"; render_negation neg]
  100 +(* | PPAS(_,CaseAgr,_,_,_) -> [Atom "ppas"; Top; AVar "case"; Top]*)
  101 + | INF(Aspect aspect,neg) -> [Atom "inf"; Atom aspect; render_negation neg]
  102 + | INF(AspectUndef,neg) -> [Atom "inf"; Top; render_negation neg]
  103 + | QUB -> [Atom "qub"]
  104 + | COMPAR (Case case) -> [Atom "compar"; Atom case]
  105 + | COMP ctype -> [Atom "comp"; arg_of_ctype ctype]
  106 + | PERS neg -> [Atom "pers"; render_negation neg]
  107 + | pos -> failwith ("render_pos: " ^ ENIAMwalStringOf.pos pos)
  108 +
  109 +let render_phrase = function
  110 + NP(Case case) -> Tensor[Atom "np"; Top; Atom case; Top; Top]
  111 + | NP NomAgr -> Tensor[Atom "np"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"]
  112 +(* | NP GenAgr -> Tensor[Atom "np"; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"]
  113 + | NP AllAgr -> Tensor[Atom "np"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]*)
  114 + | NP CaseAgr -> Tensor[Atom "np"; Top; AVar "case"; Top; Top]
  115 +(* | NP CaseUndef -> Tensor[Atom "np"; Top; Top; Top; Top]
  116 + | PrepNP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]*)
  117 + | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Atom case]
  118 + | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case]
  119 + | AdjP(Case case) -> Tensor[Atom "adjp"; Top; Atom case; Top]
  120 + | AdjP NomAgr -> Tensor[Atom "adjp"; AVar "number"; Atom "nom"; AVar "gender"]
  121 + | AdjP AllAgr -> Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]
  122 +(* | AdjP CaseAgr -> Tensor[Atom "adjp"; Top; AVar "case"; Top]
  123 + | PrepAdjP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]*)
  124 + | PrepAdjP(prep,Case case) -> Tensor[Atom "prepadjp"; Atom prep; Atom case]
  125 + (* | NumP(Case case) -> Tensor[Atom "nump"; Top; Atom case; Top; Top]
  126 + | NumP NomAgr -> Tensor[Atom "nump"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"]
  127 + | NumP CaseAgr -> Tensor[Atom "nump"; Top; AVar "case"; Top; Top]
  128 + | NumP CaseUndef -> Tensor[Atom "nump"; Top; Top; Top; Top]
  129 + | PrepNumP(_,"",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]
  130 + | PrepNumP(_,prep,Case case) -> Tensor[Atom "prepnump"; Atom prep; Atom case] *)
  131 +(* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top]*)
  132 + | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom prep]
  133 + | ComparP(prep,Case case) -> Tensor[Atom "compar"; Atom prep; Atom case]
  134 + (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep] *)
  135 + (* | IP -> Tensor[Atom "ip";Top;Top;Top] *)
  136 + | CP (ctype,Comp comp) -> Tensor[Atom "cp"; arg_of_ctype ctype; Atom comp]
  137 + (* | CP (ctype,CompUndef) -> Tensor[Atom "cp"; arg_of_ctype ctype; Top]*)
  138 + | NCP(Case case,ctype,Comp comp) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; arg_of_ctype ctype; Atom comp]
  139 + | NCP(Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top]
  140 + | NCP(NomAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp]
  141 + | NCP(NomAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top]
  142 + | PrepNCP(Psem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp]
  143 + | PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top]
  144 + | PrepNCP(Pnosem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp]
  145 + | PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top]
  146 + | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom aspect]
  147 + | InfP AspectUndef -> Tensor[Atom "infp"; Top]
  148 + (* | PadvP -> Tensor[Atom "padvp"] *)
  149 + | AdvP "misc" -> Tensor[Atom "advp"; Top] (* FIXME: a może Atom "mod" zamiast Top *)
  150 + | AdvP mode -> Tensor[Atom "advp"; Atom mode]
  151 + | ColonP -> Tensor[Atom "colonp"]
  152 + | FixedP lex -> Tensor[Atom "fixed"; Atom lex]
  153 + (* | PrepP -> Tensor[Atom "prepp";Top]
  154 + | Prep("",CaseAgr) -> Tensor[Atom "prep"; Top; AVar "case"]
  155 + | Prep("",CaseUAgr) -> Tensor[Atom "prep"; Top; AVar "ucase"]
  156 + | Num(AllAgr,Acm acm) -> Tensor[Atom "num"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"; Atom acm]
  157 + | Measure(AllUAgr) -> Tensor[Atom "measure"; AVar "unumber"; AVar "ucase"; AVar "ugender"; AVar "uperson"] *)
  158 + | Or -> Tensor[Atom "or"]
  159 + (* | Qub -> Tensor[Atom "qub"]*)
  160 + (* | Inclusion -> Tensor[Atom "inclusion"]
  161 + | Adja -> Tensor[Atom "adja"]
  162 + | Aglt -> Tensor[Atom "aglt"; AVar "number"; AVar "person"]
  163 + | AuxPast -> Tensor[Atom "aux-past"; AVar "number"; AVar "gender"; AVar "person"]
  164 + | AuxFut -> Tensor[Atom "aux-fut"; AVar "number"; AVar "gender"; AVar "person"]
  165 + | AuxImp -> Tensor[Atom "aux-imp"]
  166 + | Pro -> One
  167 + | ProNG -> One *)
  168 + | E Or -> Tensor[Atom "or"]
  169 + | E (CP(CompTypeUndef,CompUndef)) -> Tensor[Atom "cp"; Top; Top]
  170 + | E (NCP(NomAgr,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top]
  171 + | E (NP(NomAgr)) -> Tensor[Atom "np"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"]
  172 + | E (PrepNP(Psem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Atom case]
  173 + | E (PrepNP(Pnosem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case]
  174 + | E (NP(Case case)) -> Tensor[Atom "np"; Top; Atom case; Top; Top]
  175 + | E (NCP(Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top]
  176 + | E (PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top]
  177 + | E (PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top]
  178 + | phrase -> failwith ("render_phrase: " ^ ENIAMwalStringOf.phrase phrase)
  179 +
  180 +let render_phrase_cat cat = function
  181 + NP(Case case) -> Tensor[Atom "np"; Atom cat; Top; Atom case; Top; Top]
  182 + | NP NomAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"]
  183 + | NP VocAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"]
  184 +(* | NP GenAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"]
  185 + | NP AllAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]*)
  186 + | NP CaseAgr -> Tensor[Atom "np"; Atom cat; Top; AVar "case"; Top; Top]
  187 + | NP CaseUndef -> Tensor[Atom "np"; Atom cat; Top; Top; Top; Top]
  188 + | PrepNP(Psem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top]
  189 + | PrepNP(Psem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top]
  190 + | PrepNP(Psem,"_",Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Atom case]
  191 + | PrepNP(Psem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Top]
  192 + | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case]
  193 + | PrepNP(Pnosem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top]
  194 + | PrepNP(Pnosem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top]
  195 + | PrepNP(Pnosem,"_",Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Atom case]
  196 + | PrepNP(Pnosem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Top]
  197 + | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case]
  198 + | AdjP(Case case) -> Tensor[Atom "adjp"; Atom cat; Top; Atom case; Top]
  199 + | AdjP NomAgr -> Tensor[Atom "adjp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"]
  200 + | AdjP AllAgr -> Tensor[Atom "adjp"; Atom cat; AVar "number"; AVar "case"; AVar "gender"]
  201 +(* | AdjP CaseAgr -> Tensor[Atom "adjp"; Top; AVar "case"; Top]
  202 + | PrepAdjP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]*)
  203 + | PrepAdjP(prep,Case case) -> Tensor[Atom "prepadjp"; Atom cat; Atom prep; Atom case]
  204 + (* | NumP(Case case) -> Tensor[Atom "nump"; Top; Atom case; Top; Top]
  205 + | NumP NomAgr -> Tensor[Atom "nump"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"]
  206 + | NumP CaseAgr -> Tensor[Atom "nump"; Top; AVar "case"; Top; Top]
  207 + | NumP CaseUndef -> Tensor[Atom "nump"; Top; Top; Top; Top]
  208 + | PrepNumP(_,"",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]
  209 + | PrepNumP(_,prep,Case case) -> Tensor[Atom "prepnump"; Atom prep; Atom case] *)
  210 +(* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top]*)
  211 + | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom cat; Atom prep]
  212 + | ComparP(prep,Case case) -> Tensor[Atom "compar"; Atom cat; Atom prep; Atom case]
  213 + (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep] *)
  214 + (* | IP -> Tensor[Atom "ip";Top;Top;Top] *)
  215 + | CP (ctype,Comp comp) -> Tensor[Atom "cp"; Atom cat; arg_of_ctype ctype; Atom comp]
  216 + (* | CP (ctype,CompUndef) -> Tensor[Atom "cp"; arg_of_ctype ctype; Top]*)
  217 + | NCP(Case case,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; arg_of_ctype ctype; Atom comp]
  218 + | NCP(Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; Top; Top]
  219 + | NCP(NomAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp]
  220 + | NCP(NomAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top]
  221 + | NCP(VocAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp]
  222 + | NCP(VocAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; Top; Top]
  223 + | PrepNCP(Psem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom cat; Atom "sem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp]
  224 + | PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom cat; Atom "sem"; Atom prep; Atom case; Top; Top]
  225 + | PrepNCP(Pnosem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom cat; Atom "nosem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp]
  226 + | PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom cat; Atom "nosem"; Atom prep; Atom case; Top; Top]
  227 + | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom cat; Atom aspect]
  228 + | InfP AspectUndef -> Tensor[Atom "infp"; Atom cat; Top]
  229 + (* | PadvP -> Tensor[Atom "padvp"] *)
  230 + | AdvP "misc" -> Tensor[Atom "advp"; Atom cat; Top] (* FIXME: a może Atom "mod" zamiast Top *)
  231 + | AdvP "" -> Tensor[Atom "advp"; Atom cat; Top] (* FIXME: a może Atom "mod" zamiast Top *)
  232 + | AdvP mode -> Tensor[Atom "advp"; Atom cat; Atom mode]
  233 + | ColonP -> Tensor[Atom "colonp"; Atom cat]
  234 + (* | PrepP -> Tensor[Atom "prepp";Top]
  235 + | Prep("",CaseAgr) -> Tensor[Atom "prep"; Top; AVar "case"]
  236 + | Prep("",CaseUAgr) -> Tensor[Atom "prep"; Top; AVar "ucase"]
  237 + | Num(AllAgr,Acm acm) -> Tensor[Atom "num"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"; Atom acm]
  238 + | Measure(AllUAgr) -> Tensor[Atom "measure"; AVar "unumber"; AVar "ucase"; AVar "ugender"; AVar "uperson"] *)
  239 + (* | Qub -> Tensor[Atom "qub"]*)
  240 + (* | Inclusion -> Tensor[Atom "inclusion"]
  241 + | Adja -> Tensor[Atom "adja"]
  242 + | Aglt -> Tensor[Atom "aglt"; AVar "number"; AVar "person"]
  243 + | AuxPast -> Tensor[Atom "aux-past"; AVar "number"; AVar "gender"; AVar "person"]
  244 + | AuxFut -> Tensor[Atom "aux-fut"; AVar "number"; AVar "gender"; AVar "person"]
  245 + | AuxImp -> Tensor[Atom "aux-imp"]
  246 + | Pro -> One
  247 + | ProNG -> One *)
  248 + | E (CP(CompTypeUndef,CompUndef)) -> Tensor[Atom "cp"; Atom cat; Top; Top]
  249 + | E (NCP(NomAgr,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top]
  250 + | E (NP(NomAgr)) -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"]
  251 + | E (PrepNP(Psem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case]
  252 + | E (PrepNP(Pnosem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case]
  253 + | E (NP(Case case)) -> Tensor[Atom "np"; Atom cat; Top; Atom case; Top; Top]
  254 + | E (NCP(Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; Top; Top]
  255 + | E (PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom cat; Atom "sem"; Atom prep; Atom case; Top; Top]
  256 + | E (PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom cat; Atom "nosem"; Atom prep; Atom case; Top; Top]
  257 + | phrase -> failwith ("render_phrase_cat: " ^ ENIAMwalStringOf.phrase phrase)
  258 +
  259 +let render_morf = function
  260 + | Null -> One
  261 + (* | X -> Tensor[Atom "X"]
  262 + | Lex lex -> Tensor[Atom lex] *)
  263 + | LexArg(id,lex,pos) -> Tensor([Atom "lex";Atom (string_of_int id);Atom lex] @ render_pos pos)
  264 + | SimpleLexArg(lex,pos) -> Tensor([Atom "lex";Atom lex] @ render_pos pos)
  265 + | phrase -> render_phrase phrase
  266 +
  267 +let render_morf_cat cats = function
  268 + | Null -> [One]
  269 + | Pro -> [One]
  270 + | ProNG -> [One]
  271 + | FixedP lex -> [Tensor[Atom "fixed"; Atom lex]]
  272 + | Or -> [Tensor[Atom "or"]]
  273 + | E Or -> [Tensor[Atom "or"]]
  274 + (* | X -> Tensor[Atom "X"]
  275 + | Lex lex -> Tensor[Atom lex] *)
  276 + | LexArg(id,lex,pos) -> [Tensor([Atom "lex";Atom (string_of_int id);Atom lex] @ render_pos pos)]
  277 + | SimpleLexArg(lex,pos) -> [Tensor([Atom "lex";Atom lex] @ render_pos pos)]
  278 + | phrase -> Xlist.map cats (fun cat -> render_phrase_cat cat phrase)
  279 +
  280 +(* let extract_sel_prefs sel_prefs =
  281 + Xlist.map sel_prefs (function
  282 + SynsetName s -> s
  283 + | _ -> failwith "extract_sel_prefs") *)
  284 +
  285 +let render_schema schema =
  286 + Xlist.map schema (fun p ->
  287 + match Xlist.map p.morfs render_morf with
  288 + [] -> failwith "render_schema"
  289 + | [s] -> Both,s
  290 + | l -> Both,Plus l)
  291 +
  292 +let translate_dir = function
  293 + Both_ -> Both
  294 + | Forward_ -> Forward
  295 + | Backward_ -> Backward
  296 +
  297 +let render_schema_cat schema =
  298 + Xlist.map schema (fun p ->
  299 + match List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs)) with
  300 + [] -> failwith "render_schema"
  301 + | [s] -> translate_dir p.dir,s
  302 + | l -> translate_dir p.dir,Plus l)
  303 +
  304 +let render_simple_schema schema =
  305 + Xlist.map schema (fun morfs ->
  306 + Both,Plus(One :: Xlist.map morfs render_morf))
  307 +
  308 +let render_connected_schema schema =
  309 + Xlist.map schema (fun p ->
  310 + {p with morfs=Xlist.map p.morfs (fun morf -> LCG (render_morf morf))})
  311 +
  312 +let render_connected_schema_cat schema =
  313 + Xlist.map schema (fun p ->
  314 + {p with
  315 + morfs=Xlist.map (List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs))) (fun morf -> LCG morf)})
  316 +
  317 +(* FIXME: tu trzeba by dodać zwykłe reguły dla czasowników dotyczące ich negacji, aglutynatu itp. *)
  318 +let render_lex_entry = function
  319 + SimpleLexEntry(lemma,pos) -> Tensor([Atom "lex";Atom lemma] @ render_pos_entry pos)
  320 + | LexEntry(id,lemma,pos,NoRestr,schema) ->
  321 + ImpSet(Tensor([Atom "lex";Atom (string_of_int id);Atom lemma] @ render_pos_entry pos),render_schema schema)
  322 + (*Xlist.map (transform_entry pos lemma NegationUndef PredFalse AspectUndef schema) (fun (sel,schema) ->
  323 + sel,LexEntry(id,lemma,pos,NoRestr,schema))*)
  324 + | ComprepNPEntry(prep,NoRestr,schema) -> ImpSet(Tensor[Atom "comprepnp"; Atom prep],render_schema schema)
  325 + (*Xlist.map (transform_entry "comprep" s NegationUndef PredFalse AspectUndef schema) (fun (sel,schema) ->
  326 + sel,ComprepNPEntry(s,NoRestr,schema))*)
  327 + | LexEntry(id,lemma,pos,_,[]) (*as entry*) ->
  328 + ImpSet(Tensor([Atom "lex";Atom (string_of_int id);Atom lemma] @ render_pos_entry pos),[Both,Tensor[AVar "schema"]])
  329 + | entry -> failwith ("render_entry:" ^ ENIAMwalStringOf.lex_entry entry)
  330 +
  331 +(* let schemata,entries = ENIAMvalence.prepare_all_valence ENIAMwalParser.phrases ENIAMwalParser.schemata ENIAMwalParser.entries *)
  332 +
  333 +(* let _ =
  334 + (* Entries.map schemata (fun pos lemma (selectors,schema) ->
  335 + (* Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema); *)
  336 + render_schema schema) *)
  337 + Entries.map entries (fun pos lemma (selectors,entry) ->
  338 + (* Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema); *)
  339 + selectors,render_lex_entry entry) *)
  340 +
  341 +let adjunct morfs = {empty_position with gf=ADJUNCT; is_necessary=Opt; morfs=Xlist.map morfs (fun morf -> LCG morf)}
  342 +let adjunct_multi dir morfs = {empty_position with gf=ADJUNCT; is_necessary=Multi; dir=dir; morfs=Xlist.map morfs (fun morf -> LCG morf)}
  343 +let adjunct_dir dir morfs = {empty_position with gf=ADJUNCT; is_necessary=Opt; dir=dir; morfs=Xlist.map morfs (fun morf -> LCG morf)}
  344 +let adjunct_ce ce morfs = {empty_position with gf=ADJUNCT; ce=[ce]; is_necessary=Opt; morfs=Xlist.map morfs (fun morf -> LCG morf)}
  345 +
  346 +let render_comprep prep = Both,Plus[One;Tensor[Atom "comprepnp"; Atom prep]]
  347 +
  348 +let render_connected_comprep prep = adjunct [Tensor[Atom "comprepnp"; Atom prep]]
  349 +
  350 +let render_prepnp prep cases =
  351 + Both,Plus(One :: List.flatten (Xlist.map cases (fun case ->
  352 + [Tensor[Atom "prepnp"; Atom prep; Atom case];
  353 + Tensor[Atom "prepncp"; Atom prep; Atom case; Top; Top]])))
  354 +
  355 +let render_connected_prepnp prep cases =
  356 + adjunct (List.flatten (Xlist.map cases (fun case ->
  357 + [Tensor[Atom "prepnp"; Atom prep; Atom case];
  358 + Tensor[Atom "prepncp"; Atom prep; Atom case; Top; Top]])))
  359 +
  360 +let render_prepadjp prep cases =
  361 + let postp = if prep = "z" || prep = "po" || prep = "na" then [Tensor[Atom "prepadjp"; Atom prep; Atom "postp"]] else [] in
  362 + Both,Plus(One :: postp @ (Xlist.map cases (fun case ->
  363 + Tensor[Atom "prepadjp"; Atom prep; Atom case])))
  364 +
  365 +let render_connected_prepadjp prep cases =
  366 + let postp = if prep = "z" || prep = "po" || prep = "na" then [Tensor[Atom "prepadjp"; Atom prep; Atom "postp"]] else [] in
  367 + adjunct (postp @ (Xlist.map cases (fun case ->
  368 + Tensor[Atom "prepadjp"; Atom prep; Atom case])))
  369 +
  370 +let render_compar prep = Both,Plus[One;Tensor[Atom "compar"; Atom prep; Top]]
  371 +
  372 +let render_connected_compar prep = adjunct [Tensor[Atom "compar"; Atom prep; Top]]
  373 +
  374 +let verb_adjuncts_simp = [
  375 + Both, Plus[One;Tensor[Atom "advp"; Atom "pron"]];
  376 + Both, Plus[One;Tensor[Atom "advp"; Atom "locat"]];
  377 + Both, Plus[One;Tensor[Atom "advp"; Atom "abl"]];
  378 + Both, Plus[One;Tensor[Atom "advp"; Atom "adl"]];
  379 + Both, Plus[One;Tensor[Atom "advp"; Atom "perl"]];
  380 + Both, Plus[One;Tensor[Atom "advp"; Atom "temp"]];
  381 + Both, Plus[One;Tensor[Atom "advp"; Atom "dur"]];
  382 + Both, Plus[One;Tensor[Atom "advp"; Atom "mod"]];
  383 + Both, Plus[One;Tensor[Atom "np";Top;Atom "dat"; Top; Top];Tensor[Atom "ncp"; Top; Atom "dat"; Top; Top; Top; Top]];
  384 + Both, Plus[One;Tensor[Atom "np";Top;Atom "inst"; Top; Top];Tensor[Atom "ncp"; Top; Atom "inst"; Top; Top; Top; Top]];
  385 + Both, Plus[One;Tensor[Atom "date"];Tensor[Atom "day-lex"];Tensor[Atom "day-month"];Tensor[Atom "day"]];
  386 + Forward, Plus[One;Tensor[Atom "cp";Top; Top]]; (* FIXME: to powinno być jako ostatnia lista argumentów *)
  387 + Both, Plus[One;Tensor[Atom "or"]];
  388 + Both, Plus[One;Tensor[Atom "lex";Atom "się";Atom "qub"]];
  389 + Both, Plus[One;Tensor[Atom "padvp"]];
  390 +]
  391 +
  392 +let verb_connected_adjuncts_simp = [
  393 + adjunct [Tensor[Atom "advp"; Atom "pron"]];
  394 + adjunct [Tensor[Atom "advp"; Atom "locat"]];
  395 + adjunct [Tensor[Atom "advp"; Atom "abl"]];
  396 + adjunct [Tensor[Atom "advp"; Atom "adl"]];
  397 + adjunct [Tensor[Atom "advp"; Atom "perl"]];
  398 + adjunct [Tensor[Atom "advp"; Atom "temp"]];
  399 + adjunct [Tensor[Atom "advp"; Atom "dur"]];
  400 + adjunct [Tensor[Atom "advp"; Atom "mod"]];
  401 + adjunct [Tensor[Atom "np";Top;Atom "dat"; Top; Top];Tensor[Atom "ncp"; Top; Atom "dat"; Top; Top; Top; Top]];
  402 + adjunct [Tensor[Atom "np";Top;Atom "inst"; Top; Top];Tensor[Atom "ncp"; Top; Atom "inst"; Top; Top; Top; Top]];
  403 + adjunct [Tensor[Atom "date"];Tensor[Atom "day-lex"];Tensor[Atom "day-month"];Tensor[Atom "day"]];
  404 + adjunct_dir Forward_ [Tensor[Atom "cp";Top; Top]];
  405 + adjunct [Tensor[Atom "or"]];
  406 + adjunct [Tensor[Atom "lex";Atom "się";Atom "qub"]];
  407 + adjunct_ce "3" [Tensor[Atom "padvp"]];
  408 +]
  409 +
  410 +let proper_noun_adjuncts_simp = [
  411 + Both, Plus[One;Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]];
  412 + Forward, Plus[One;Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]];
  413 + Backward, Maybe(Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]);
  414 + Forward, Plus[One;Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]];
  415 +]
  416 +
  417 +let proper_noun_connected_adjuncts_simp = [
  418 + adjunct [Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]];
  419 + adjunct_dir Forward_ [Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]];
  420 + adjunct_multi Backward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]];
  421 + adjunct_dir Forward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]];
  422 +]
  423 +
  424 +let common_noun_adjuncts_simp = [
  425 + Both, Plus[One;Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]];
  426 + Forward, Plus[One;Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]];
  427 + Backward, Maybe(Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]);
  428 + Forward, Plus[One;Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]];
  429 +]
  430 +
  431 +let common_noun_connected_adjuncts_simp = [
  432 + adjunct [Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]];
  433 + adjunct_dir Forward_ [Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]];
  434 + adjunct_multi Backward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]];
  435 + adjunct_dir Forward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]];
  436 +]
  437 +
  438 +let measure_noun_adjuncts_simp = [
  439 + Backward, Maybe(Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]);
  440 + Forward, Plus[One;Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]];
  441 +]
  442 +
  443 +let measure_noun_connected_adjuncts_simp = [
  444 + adjunct_multi Backward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]];
  445 + adjunct_dir Forward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]];
  446 +]
  447 +
  448 +let adj_adjuncts_simp = [
  449 + Both, Plus[One;Tensor[Atom "advp"; Top]];
  450 +]
  451 +
  452 +let adj_connected_adjuncts_simp = [
  453 + adjunct [Tensor[Atom "advp"; Top]];
  454 +]
  455 +
  456 +let adv_adjuncts_simp = [
  457 + Both, Plus[One;Tensor[Atom "advp"; Top]];
  458 + ]
  459 +
  460 +let adv_connected_adjuncts_simp = [
  461 + adjunct [Tensor[Atom "advp"; Top]];
  462 + ]
  463 +
  464 +let assing_prep_morfs = function
  465 + "po","postp" -> [
  466 + LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "dat"; Atom "m1"]);
  467 + LCG(Tensor[Atom "adjp"; Top; Atom "postp"; Top])]
  468 + | "z","postp" -> [LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "nom"; Atom "f"])]
  469 + | "na","postp" -> [LCG(Tensor[Atom "advp"; Top])]
  470 + | _,case -> [
  471 + LCG(Tensor[Atom "np"; Top; Atom case; Top; Top]);
  472 + LCG(Tensor[Atom "adjp"; Top; Atom case; Top])]
  473 +
  474 +let prep_morfs = [
  475 + LCG(Tensor[Atom "np"; Top; Atom "case"; Top; Top]);
  476 + LCG(Tensor[Atom "adjp"; Top; Atom "case"; Top]);
  477 + LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "dat"; Atom "m1"]);
  478 + LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "nom"; Atom "f"]);
  479 + LCG(Tensor[Atom "advp"; Top]);
  480 + LCG(Tensor[Atom "year"]);
  481 + LCG(Tensor[Atom "hour-minute"]);
  482 + LCG(Tensor[Atom "day-month"]);
  483 + LCG(Tensor[Atom "hour"]);
  484 + LCG(Tensor[Atom "day"]);
  485 + LCG(Tensor[Atom "date"]);
  486 + ]
  487 +
  488 +let compar_morfs = [
  489 + LCG(Tensor[Atom "np"; Top; Atom "case"; Top; Top]);
  490 + LCG(Tensor[Atom "adjp"; Top; Atom "case"; Top]);
  491 + LCG(Tensor[Atom "prepnp"; Top; Top]);
  492 + LCG(Tensor[Atom "prepadjp"; Top; Top]);
  493 + ]
... ...
lexSemantics/ENIAMwalStringOf.ml
... ... @@ -59,6 +59,7 @@ let case = function
59 59 | CaseUndef -> "_"
60 60 | AllAgr -> "allagr"
61 61 | NomAgr -> "nomagr"
  62 + | VocAgr -> "vocagr"
62 63 | GenAgr -> "genagr"
63 64  
64 65 let rec comp = function
... ... @@ -140,7 +141,7 @@ let rec phrase = function
140 141 | ComparP(prep,c) -> "comparp(" ^ prep ^ "," ^ case c ^ ")"
141 142 | CP(ct,co) -> "cp(" ^ comp_type ct ^ "," ^ comp co ^ ")"
142 143 | NCP(c,ct,co) -> "ncp(" ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")"
143   - | PrepNCP(prep,c,ct,co) -> "prepncp(" ^ prep ^ "," ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")"
  144 + | PrepNCP(p,prep,c,ct,co) -> "prepncp(" ^ psem p ^ "," ^ prep ^ "," ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")"
144 145 | InfP(a) -> "infp(" ^ aspect a (*^ req r*) ^ ")"
145 146 | AdvP(m) -> "advp(" ^ m ^ ")"
146 147 | ColonP -> "colonp"
... ... @@ -250,7 +251,7 @@ let rec connected_schema schema =
250 251 "{" ^ String.concat ";" (Xlist.map s.morfs simple_morf) ^ "}:" ^ sem_frame s))
251 252 *)
252 253  
253   -let meaning m =
  254 +let sense m =
254 255 m.name ^ "-" ^ m.variant
255 256  
256 257 let lex_entry = function
... ...
lexSemantics/ENIAMwalTypes.ml
... ... @@ -24,7 +24,7 @@ type opinion = Pewny | Potoczny | Watpliwy | Archaiczny | Zly | Wulgarny | Nieok
24 24 type negation = Negation | Aff | NegationUndef (*| NegationNA*)
25 25 type pred = PredTrue | PredFalse | PredUndef (*| PredNA*)
26 26 type aspect = Aspect of string | AspectUndef (*| AspectNA*)
27   -type case = Case of string | Str | Part | CaseAgr | CaseUndef (*| AllUAgr | CaseUAgr*) | GenAgr | NomAgr | AllAgr
  27 +type case = Case of string | Str | Part | CaseAgr | CaseUndef (*| AllUAgr | CaseUAgr*) | GenAgr | NomAgr | VocAgr | AllAgr
28 28 type comp = Comp of string | Zeby | Gdy | CompUndef
29 29 type comp_type = Int | Rel | CompTypeUndef (*| CompTypeAgr*)
30 30 type number = Number of string | NumberUndef | NumberAgr
... ... @@ -72,7 +72,7 @@ type phrase =
72 72 | ComparP of string * case
73 73 | CP of comp_type * comp
74 74 | NCP of case * comp_type * comp
75   - | PrepNCP of string * case * comp_type * comp
  75 + | PrepNCP of psem * string * case * comp_type * comp
76 76 | InfP of aspect
77 77 | AdvP of string
78 78 | ColonP
... ... @@ -108,34 +108,34 @@ type necessary = Req | Opt | Pro | ProNG | Multi
108 108  
109 109 type direction = Both_ | Forward_ | Backward_
110 110  
111   -type position = {psn_id: int; gf: gf; role: string; role_attr: string; sel_prefs: sel_prefs list;
  111 +type position = {psn_id: int; gf: gf; role: string; role_attr: string; node: string; sel_prefs: sel_prefs list; cat_prefs: string list;
112 112 mode: string list; cr: string list; ce: string list; morfs: phrase list;
113 113 dir: direction; is_necessary: necessary}
114 114  
115 115 let empty_position =
116   - {psn_id=(-1); gf=ARG; role=""; role_attr=""; mode=[]; sel_prefs=[]; cr=[]; ce=[]; dir=Both_; morfs=[]; is_necessary=Opt}
  116 + {psn_id=(-1); gf=ARG; role=""; role_attr=""; mode=[]; node="concept"; sel_prefs=[]; cat_prefs=["X"]; cr=[]; ce=[]; dir=Both_; morfs=[]; is_necessary=Opt}
117 117  
118   -type meaning = {mng_id: int;
  118 +type sense = {mng_id: int;
119 119 name: string;
120 120 variant: string;
121 121 plwnluid: int;
122 122 gloss: string}
123 123  
124   -let empty_meaning = {mng_id = (-1);
  124 +let empty_sense = {mng_id = (-1);
125 125 name = "";
126 126 variant = "";
127 127 plwnluid = (-1);
128 128 gloss = ""}
129 129  
130 130 (* type frame_atrs =
131   - EmptyAtrs of meaning list
132   - | DefaultAtrs of meaning list * refl * opinion * negation * pred * aspect
  131 + EmptyAtrs of sense list
  132 + | DefaultAtrs of sense list * refl * opinion * negation * pred * aspect
133 133 | ComprepAtrs of string
134   - | NounAtrs of meaning list * string * nsem (** string list*)
135   - | AdjAtrs of meaning list * case * string (** string * string list*)
136   - | PersAtrs of meaning list * string * negation * mood * tense * aux * aspect
137   - | GerAtrs of meaning list * string * negation * aspect
138   - | NonPersAtrs of meaning list * string * string * string * negation * aspect *)
  134 + | NounAtrs of sense list * string * nsem (** string list*)
  135 + | AdjAtrs of sense list * case * string (** string * string list*)
  136 + | PersAtrs of sense list * string * negation * mood * tense * aux * aspect
  137 + | GerAtrs of sense list * string * negation * aspect
  138 + | NonPersAtrs of sense list * string * string * string * negation * aspect *)
139 139  
140 140 (* type schema = {sch_id: int; opinion: opinion; reflexiveMark: refl; aspect: aspect;
141 141 negativity: negation; predicativity: pred; positions: position list; text_rep: string} *)
... ... @@ -175,6 +175,6 @@ let phrases_filename = resource_path ^ &quot;/Walenty/phrases.tab&quot;
175 175 let entries_filename = resource_path ^ "/Walenty/entries.tab"
176 176 let schemata_filename = resource_path ^ "/Walenty/schemata.tab"
177 177 let connected_filename = resource_path ^ "/Walenty/connected.tab"
178   -let meanings_filename = resource_path ^ "/Walenty/meanings.tab"
  178 +let senses_filename = resource_path ^ "/Walenty/meanings.tab"
179 179  
180 180 let user_valence_filename = data_path ^ "/valence.dic"
... ...
morphology/resources/alt_supplement.tab
... ... @@ -4,4 +4,12 @@ sobie siebie siebie:dat.loc
4 4 sobą siebie siebie:inst
5 5 to to pred
6 6 yay yay interj
  7 +świetnie świetnie interj
  8 +doskonale doskonale interj
  9 +idealnie idealnie interj
  10 +zdecydowanie zdecydowanie interj
  11 +ok ok interj
  12 +super super interj
  13 +dobrze dobrze interj
  14 +dzięki dzięki interj
7 15  
... ...
semantics/ENIAMsemGraph.ml
... ... @@ -25,7 +25,7 @@ let empty_concept =
25 25 {c_sense=Dot;c_name=Dot;(* c_variable: string; c_visible_var: bool;*) c_quant=Dot; c_local_quant=true; (*c_modalities: (string * type_term) list;
26 26 c_left_input_pos: int; c_right_input_pos: int;*) c_relations=Dot; c_variable="",""; c_pos=(-1); c_cat=Dot; c_label=""; c_def_label=""}
27 27  
28   -let empty_context = {cx_sense=Dot; cx_contents=Dot; cx_relations=Dot; cx_variable="",""; cx_pos=(-1); cx_cat=Dot}
  28 +let empty_context = {cx_sense=Dot; cx_contents=Dot; cx_relations=Dot; cx_variable="",""; cx_pos=(-1); cx_cat=Dot; cx_label=""; cx_def_label=""}
29 29  
30 30 let rec make_args_list = function
31 31 Tuple l -> List.flatten (Xlist.map l make_args_list)
... ... @@ -34,7 +34,7 @@ let rec make_args_list = function
34 34 let symbols = StringSet.of_list [
35 35 "symbol"; "date"; "date-interval"; "hour-minute"; "hour"; "hour-minute-interval"; "hour-interval";
36 36 "year"; "year-interval"; "day"; "day-interval"; "day-month"; "day-month-interval"; "month-interval"; "roman"; "roman-interval";
37   - "match-result"; "url"; "email"; "obj-id"; "building-number";
  37 + "match-result"; "url"; "email"; "phone-number"; "obj-id"; "building-number";
38 38 "month-lex"; "day-lex"]
39 39  
40 40 let rec get_person = function
... ... @@ -60,27 +60,27 @@ let make_relation t c =
60 60 | "adjunct" -> MakeTripleRelation(t.arole,t.arole_attr,c)
61 61 | s -> failwith ("make_make_triple_relation: " ^ s)*)
62 62  
63   -(* let add_proj proj c =
64   - if proj = Dot then Concept c else
65   - Concept{empty_concept with c_cat=proj; c_relations=Tuple[Relation("Has","",Concept{c with c_relations=Dot});c.c_relations]} *)
66   -let add_proj proj c =
67   - if proj = Dot then Concept c else
68   - Concept{empty_concept with c_cat=proj; c_relations=Relation("Has","",Concept c)}
69   -(* let add_proj proj c =
70   - if proj = Dot then Concept c else
71   - let proj_rels,c_rels = split_relations c.c_relations in
72   - Concept{empty_concept with c_cat=proj; c_relations=Tuple[Relation("Has","",Concept{c with c_relations=c_rels});proj_rels]} *)
73   -let add_proj2 proj c =
74   - if proj = Dot then c else
75   - Concept{empty_concept with c_cat=proj; c_relations=Relation("Has","",c)}
76   -
77   -
78   -let create_normal_concept tokens lex_sems t cat proj =
  63 +(* let add_coerced coerced c =
  64 + if coerced = Dot then Concept c else
  65 + Concept{empty_concept with c_cat=coerced; c_relations=Tuple[Relation("Has","",Concept{c with c_relations=Dot});c.c_relations]} *)
  66 +let add_coerced coerced c =
  67 + if coerced = Dot then Concept c else
  68 + Concept{empty_concept with c_cat=coerced; c_relations=Relation("Has","",Concept c)} (* FIXME: trzeba dodać concept do tokenów *)
  69 +(* let add_coerced coerced c =
  70 + if coerced = Dot then Concept c else
  71 + let coerced_rels,c_rels = split_relations c.c_relations in
  72 + Concept{empty_concept with c_cat=coerced; c_relations=Tuple[Relation("Has","",Concept{c with c_relations=c_rels});coerced_rels]} *)
  73 +let add_coerced2 coerced c =
  74 + if coerced = Dot then c else
  75 + Concept{empty_concept with c_cat=coerced; c_relations=Relation("Has","",c)} (* FIXME: trzeba dodać concept do tokenów *)
  76 +
  77 +
  78 +let create_normal_concept tokens lex_sems t cat coerced =
79 79 (*if t.agf = ENIAMwalTypes.NOSEM then t.args else*)
80   - let cat,proj = if !user_ontology_flag then cat,proj else Dot,Dot in
81   - let proj = if proj = cat then Dot else proj in
  80 + let cat,coerced = if !user_ontology_flag then cat,coerced else Dot,Dot in
  81 + let coerced = if coerced = cat then Dot else coerced in
82 82 let c = {empty_concept with
83   - c_sense = if !user_ontology_flag then Val t.lemma else (*if t.lemma = "<root>" then Dot else*) t.meaning;
  83 + c_sense = (*if !user_ontology_flag then Val t.lemma else*) (*if t.lemma = "<root>" then Dot else*) t.sense;
84 84 c_relations=t.args;
85 85 c_quant=if t.label = "" then t.sem_args else Dot; (* FIXME: zakładam że t.label <> "" występuje tylko dla pro *)
86 86 c_variable=string_of_int t.id,"";
... ... @@ -93,7 +93,7 @@ let create_normal_concept tokens lex_sems t cat proj =
93 93 let c = {c with c_local_quant=false} in
94 94 let c,measure,cx_flag = Xlist.fold t.attrs (c,false,false) (fun (c,measure,cx_flag) -> function
95 95 "NSYN",Val "common" -> c,measure,cx_flag
96   - | "NSYN",Val "proper" -> {c with c_name=Val t.lemma; c_sense=Dot(*t.meaning*)(*c_sense=if Val t.pred=c.c_sense then Dot else c.c_sense*)},measure,cx_flag; (* FIXME: zaślepka na potrzeby gramatyk semantycznych *) (* Rozpoznawanie propoer names nieznanego typu - ryzykowne ale proste *)
  96 + | "NSYN",Val "proper" -> {c with c_name=Val t.lemma; c_sense=Dot(*t.sense*)(*c_sense=if Val t.pred=c.c_sense then Dot else c.c_sense*)},measure,cx_flag; (* FIXME: zaślepka na potrzeby gramatyk semantycznych *) (* Rozpoznawanie propoer names nieznanego typu - ryzykowne ale proste *)
97 97 | "NSYN",Val "pronoun" -> c(*{c with c_quant=Tuple[c.c_quant;Val "indexical"]}*),measure,cx_flag
98 98 | "NSEM",Val "count" -> c(*{c with c_quant=Tuple[c.c_quant;Val "count"]}*),measure,cx_flag
99 99 | "NSEM",Val "mass" -> {c with c_quant=Tuple[c.c_quant;Val "mass"]},measure,cx_flag
... ... @@ -120,13 +120,13 @@ let create_normal_concept tokens lex_sems t cat proj =
120 120 if cx_flag then
121 121 let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in
122 122 let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in
123   - make_relation t (Context{empty_context with cx_contents=add_proj proj c; cx_variable=string_of_int id,""; cx_pos=c.c_pos})
  123 + make_relation t (Context{empty_context with cx_contents=add_coerced coerced c; cx_variable=string_of_int id,""; cx_pos=c.c_pos})
124 124 else
125   - make_relation t (add_proj proj c) else
  125 + make_relation t (add_coerced coerced c) else
126 126 if t.pos = "fin" || t.pos = "bedzie" || t.pos = "praet" || t.pos = "winien" || t.pos = "impt" || t.pos = "imps" || t.pos = "pred" || t.lemma = "pro-komunikować" then
127 127 let c = {c with c_local_quant=false} in
128 128 let c = Xlist.fold t.attrs c (fun c -> function
129   -(* "MEANING",t -> {c with c_sense=Tuple[c.c_sense;t]} *)
  129 +(* "SENSE",t -> {c with c_sense=Tuple[c.c_sense;t]} *)
130 130 | "NUM",t -> c
131 131 | "GEND",_ -> c
132 132 | "PERS",_ -> c
... ... @@ -142,7 +142,7 @@ let create_normal_concept tokens lex_sems t cat proj =
142 142 let c = if t.lemma = "pro-komunikować" then {c with c_relations=Relation("Theme","",c.c_relations)} else c in (* FIXME: to by trzeba przesunąć na wcześniej *)
143 143 let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in
144 144 let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in
145   - let cx = {empty_context with cx_contents=add_proj proj c; cx_variable=string_of_int id,""; cx_pos=c.c_pos; cx_cat=Val "Situation"} in
  145 + let cx = {empty_context with cx_contents=add_coerced coerced c; cx_variable=string_of_int id,""; cx_pos=c.c_pos; cx_cat=Val "Situation"} in
146 146 (* if t.role <> "" || t.role_attr <> "" then failwith "create_normal_concept: verb" else *)
147 147 make_relation t (Context cx) else
148 148 if t.pos = "inf" then
... ... @@ -154,13 +154,13 @@ let create_normal_concept tokens lex_sems t cat proj =
154 154 | "NEGATION",Val "neg" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]}
155 155 | e,t -> failwith ("create_normal_concept verb: " ^ e)) in
156 156 let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in
157   - let _ = ExtArray.add lex_sems in
158   - let cx = {empty_context with cx_contents=add_proj proj c; cx_variable=string_of_int id,""; cx_pos=c.c_pos; cx_cat=Val "Situation"} in
  157 + let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in
  158 + let cx = {empty_context with cx_contents=add_coerced coerced c; cx_variable=string_of_int id,""; cx_pos=c.c_pos; cx_cat=Val "Situation"} in
159 159 make_relation t (Context cx) else
160 160 if t.pos = "adj" || t.pos = "adjc" || t.pos = "adjp" || t.pos = "adja" || t.pos = "pact" || t.pos = "ppas" || t.pos = "apron" || t.pos = "ordnum" || t.pos = "roman-adj" then
161 161 let c = if t.pos = "pact" || t.pos = "ppas" then {c with c_local_quant=false} else c in
162 162 let c = Xlist.fold t.attrs c (fun c -> function
163   -(* "MEANING",t -> {c with c_sense=Tuple[c.c_sense;t]} *)
  163 +(* "SENSE",t -> {c with c_sense=Tuple[c.c_sense;t]} *)
164 164 | "SYN",Val "common" -> c
165 165 | "SYN",Val "pronoun" -> c(*{c with c_quant=Tuple[c.c_quant;Val "indexical"]}*)
166 166 | "SYN",Val "proper" -> if t.pos = "roman-adj" then c else failwith "create_normal_concept adj: SYN=proper"
... ... @@ -172,6 +172,7 @@ let create_normal_concept tokens lex_sems t cat proj =
172 172 | "GRAD",Val "com" -> {c with c_relations=Tuple[c.c_relations;SingleRelation (Val "com")]}
173 173 | "GRAD",Val "sup" -> {c with c_relations=Tuple[c.c_relations;SingleRelation (Val "sup")]}
174 174 | "ASPECT",_ -> c
  175 + | "CTYPE",_ -> c (* FIXME1: trzeba zaznaczyć pytajność w grafie, CTYPE pojawia się w dwu węzłach *)
175 176 (* | "TYPE",Val "int" -> {c with c_quant=Tuple[c.c_quant;Val "interrogative"]} *)
176 177 | "TYPE",_ -> c (* FIXME *)
177 178 | "PERS",_ -> c
... ... @@ -183,7 +184,7 @@ let create_normal_concept tokens lex_sems t cat proj =
183 184 if t.pos = "adv" || t.pos = "pcon" || t.pos = "pant" then
184 185 let c = if t.pos = "pcon" || t.pos = "pant" then {c with c_local_quant=false} else c in
185 186 let c = Xlist.fold t.attrs c (fun c -> function
186   -(* "MEANING",t -> {c with c_sense=Tuple[c.c_sense;t]} *)
  187 +(* "SENSE",t -> {c with c_sense=Tuple[c.c_sense;t]} *)
187 188 | "GRAD",Val "pos" -> c
188 189 | "GRAD",Val "com" -> {c with c_relations=Tuple[c.c_relations;SingleRelation (Val "com")]}
189 190 | "GRAD",Val "sup" -> {c with c_relations=Tuple[c.c_relations;SingleRelation (Val "sup")]}
... ... @@ -194,7 +195,7 @@ let create_normal_concept tokens lex_sems t cat proj =
194 195 | "NEGATION",Val "aff" -> c
195 196 | "NEGATION",Val "neg" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]}
196 197 | e,t -> failwith ("create_normal_concept adv: " ^ e)) in
197   - make_relation t (add_proj proj c) else
  198 + make_relation t (add_coerced coerced c) else
198 199 if t.pos = "prep" then
199 200 (* if t.arole = "NOSEM" then make_relation t (t.args) else *)
200 201 let c,is_sem = Xlist.fold t.attrs (c,false) (fun (c,is_sem) -> function
... ... @@ -203,9 +204,9 @@ let create_normal_concept tokens lex_sems t cat proj =
203 204 | "PSEM",Val "nosem" -> c,false
204 205 | e,t -> failwith ("create_normal_concept prep: " ^ e)) in
205 206 (* make_make_triple_relation t (Concept c) else *)
206   - if is_sem then make_relation t (add_proj2 proj (CreateContext({empty_context with cx_sense=c.c_sense; cx_variable=c.c_variable; cx_pos=c.c_pos; cx_cat=c.c_cat},c.c_relations)))
  207 + if is_sem then make_relation t (add_coerced2 coerced (CreateContext({empty_context with cx_sense=c.c_sense; cx_variable=c.c_variable; cx_pos=c.c_pos; cx_cat=c.c_cat},c.c_relations)))
207 208 else make_relation t (RemoveRelation("CORE","",c.c_relations)) else
208   - if proj <> Dot then failwith ("create_normal_concept proj: " ^ t.lemma) else
  209 + if coerced <> Dot then failwith ("create_normal_concept coerced: " ^ t.lemma) else
209 210 if t.pos = "pro" || t.pos = "ppron12" || t.pos = "ppron3" || t.pos = "siebie" then (* FIXME: indexicalność *)
210 211 let c = {c with c_local_quant=false} in
211 212 let c = Xlist.fold t.attrs c (fun c -> function
... ... @@ -225,7 +226,7 @@ let create_normal_concept tokens lex_sems t cat proj =
225 226 make_relation t (Concept c) else
226 227 if t.pos = "num" || t.pos = "intnum" || t.pos = "realnum" || t.pos = "intnum-interval" || t.pos = "realnum-interval" then
227 228 let c = Xlist.fold t.attrs c (fun c -> function
228   -(* "MEANING",t -> {c with c_sense=Tuple[c.c_sense;t]} *)
  229 +(* "SENSE",t -> {c with c_sense=Tuple[c.c_sense;t]} *)
229 230 | "ACM",_ -> c
230 231 | "NUM",_ -> c
231 232 | "CASE",_ -> c
... ... @@ -249,12 +250,15 @@ let create_normal_concept tokens lex_sems t cat proj =
249 250 if t.pos = "comp" then
250 251 make_relation t (SetContextName(c.c_sense,RemoveRelation("CORE","",c.c_relations))) else
251 252 if t.pos = "conj" then
252   - let c = {empty_context with cx_sense=t.meaning; cx_contents=t.args; cx_variable=c.c_variable; cx_pos=c.c_pos} in
  253 + let c = {empty_context with cx_sense=t.sense; cx_contents=t.args; cx_variable=c.c_variable; cx_pos=c.c_pos; cx_cat=c.c_cat; cx_def_label=c.c_def_label; cx_label=c.c_label} in
253 254 let c = Xlist.fold t.attrs c (fun c -> function
254 255 | "NUM",_ -> c
255 256 | "CASE",_ -> c
256 257 | "GEND",_ -> c
257 258 | "PERS",_ -> c
  259 + | "ASPECT",_ -> c
  260 + | "controller",_ -> c
  261 + | "controllee",_ -> c
258 262 | e,t -> failwith ("create_normal_concept conj: " ^ e)) in
259 263 ManageCoordination({t with attrs=[]; args=Dot},Context c) else
260 264 (* if t.pos = "interj" then
... ... @@ -264,16 +268,25 @@ let create_normal_concept tokens lex_sems t cat proj =
264 268 if t.pos = "sinterj" || t.pos = "interj" then
265 269 let c = Xlist.fold t.attrs c (fun c -> function
266 270 | e,t -> failwith ("create_normal_concept sinterj: " ^ e)) in
267   - make_relation t (Concept c) else
  271 + let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in
  272 + let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in
  273 + let cx = {empty_context with cx_contents=add_coerced coerced c; cx_variable=string_of_int id,""; cx_pos=c.c_pos; cx_cat=Val "Situation"} in
  274 + make_relation t (Context cx) else
268 275 if t.lemma = "<root>" then t.args else
269 276 if t.lemma = "<merge>" then RemoveRelation("null","",t.args) else
270   - if t.pos = "interp" && t.lemma = "?" && t.args = Dot then SingleRelation(Val "int") else
  277 + (* if t.pos = "interp" && t.lemma = "?" && t.args = Dot then SingleRelation(Val "int") else *)
  278 + if t.pos = "interp" && t.lemma = "?" then
  279 + make_relation t (AddSingleRelation(Val "int",RemoveRelation("CORE","",t.args))) else (* FIXME1: to powinno tworzyć kontekst i zaznaczać ze jest interrogative *)
271 280 if t.pos = "interp" && t.lemma = ":" then
272 281 make_relation t (RemoveRelation("CORE","",t.args)) else
273 282 if t.pos = "interp" && t.lemma = "</sentence>" then
274 283 let l = (*List.rev*) (make_args_list t.args) in
275 284 Xlist.fold (List.tl l) (RemoveRelation("null","",List.hd l)) (fun t s -> AddRelation(t,"Next","Clause",RemoveRelation("null","",s))) else
276 285 if t.pos = "interp" && t.lemma = "<sentence>" then t.args else
  286 + if t.pos = "interp" && t.lemma = "</query>" then
  287 + let l = (*List.rev*) (make_args_list t.args) in
  288 + Xlist.fold (List.tl l) (List.hd l) (fun t s -> AddRelation(t,"Next","Sentence",s)) else
  289 + if t.pos = "interp" && t.lemma = "<query>" then t.args else
277 290 (* if t.pos = "interp" && t.lemma = "”s" then
278 291 let l = List.rev (make_args_list t.args) in
279 292 let x = Xlist.fold (List.tl l) (List.hd l) (fun t s -> AddRelation(RemoveRelation t,"Next","Sentence",RemoveRelation s)) in
... ... @@ -310,54 +323,55 @@ let rec translate_node tokens lex_sems t =
310 323 orth=t.ENIAM_LCGtypes.orth; lemma=t.ENIAM_LCGtypes.lemma; pos=t.ENIAM_LCGtypes.pos; weight=t.ENIAM_LCGtypes.weight;
311 324 id=t.ENIAM_LCGtypes.id; symbol=create_concepts tokens lex_sems t.ENIAM_LCGtypes.symbol; arg_symbol=create_concepts tokens lex_sems t.ENIAM_LCGtypes.arg_symbol;
312 325 arg_dir=t.ENIAM_LCGtypes.arg_dir;
313   - attrs=[]; label=""; def_label="";
  326 + attrs=[]; label=""; def_label=""; snode="";
314 327 args=create_concepts tokens lex_sems t.ENIAM_LCGtypes.args;
315   - gf=""; role=""; role_attr=""; selprefs=Dot; meaning=Dot; arole=""; arole_attr=""; arev=false; sem_args=Dot} in
316   - let t,attrs,cat,proj = Xlist.fold attrs (t,[],Dot,Dot) (fun (t,attrs,cat,proj) -> function
317   - "gf",Val s -> {t with gf=s},attrs,cat,proj
318   - | "role",Val s -> {t with role=s},attrs,cat,proj
319   - | "role-attr",Val s -> {t with role_attr=s},attrs,cat,proj
320   - | "selprefs",s -> {t with selprefs=s},attrs,cat,proj
321   - | "meaning",s -> {t with meaning=s},attrs,cat,proj
322   - | "hipero",_ -> t,attrs,cat,proj
323   - | "arole",Val s -> {t with arole=s},attrs,cat,proj
324   - | "arole-attr",Val s -> {t with arole_attr=s},attrs,cat,proj
325   - | "arev",Val "-" -> {t with arev=false},attrs,cat,proj
326   - | "arev",Val "+" -> {t with arev=true},attrs,cat,proj
327   - | "agf",Val s -> t,attrs,cat,proj
328   - | "sem-args",s -> {t with sem_args=s},attrs,cat,proj
329   - | "rev-hipero",_ -> t,attrs,cat,proj
330   - | "fopinion",_ -> t,attrs,cat,proj
331   - | "sopinion",_ -> t,attrs,cat,proj
332   - | "ACM",s -> t,("ACM",s) :: attrs,cat,proj
333   - | "ASPECT",s -> t,("ASPECT",s) :: attrs,cat,proj
334   - | "NEGATION",s -> t,("NEGATION",s) :: attrs,cat,proj
335   - | "MOOD",s -> t,("MOOD",s) :: attrs,cat,proj
336   - | "TENSE",s -> t,("TENSE",s) :: attrs,cat,proj
337   - | "controller",s -> t,("controller",s) :: attrs,cat,proj
338   - | "controllee",s -> t,("controllee",s) :: attrs,cat,proj
339   - | "coref",s -> t,attrs,cat,proj
340   - | "label",Val s -> {t with label=s},attrs,cat,proj
341   - | "def-label",Val s -> {t with def_label=s},attrs,cat,proj
342   - | "CAT",s -> t,attrs,s,proj
343   - | "PROJ",s -> t,attrs,cat,s
344   - | "NUM",s -> t,("NUM",s) :: attrs,cat,proj
345   - | "CASE",s -> t,("CASE",s) :: attrs,cat,proj
346   - | "GEND",s -> t,("GEND",s) :: attrs,cat,proj
347   - | "PERS",s -> t,("PERS",s) :: attrs,cat,proj
348   - | "NSYN",s -> t,("NSYN",s) :: attrs,cat,proj
349   - | "NSEM",s -> t,("NSEM",s) :: attrs,cat,proj
350   - | "MODE",s -> t,("MODE",s) :: attrs,cat,proj
351   - | "GRAD",s -> t,("GRAD",s) :: attrs,cat,proj
352   - | "PSEM",s -> t,("PSEM",s) :: attrs,cat,proj
353   - (* | k,v -> printf "translate_node: %s %s\n%!" k (ENIAMsemStringOf.linear_term 0 v); t, (k,v) :: attrs,cat,proj) in *)
  328 + gf=""; role=""; role_attr=""; selprefs=Dot; sense=Dot; arole=""; arole_attr=""; arev=false; sem_args=Dot} in
  329 + let t,attrs,cat,coerced = Xlist.fold attrs (t,[],Dot,Dot) (fun (t,attrs,cat,coerced) -> function
  330 + "gf",Val s -> {t with gf=s},attrs,cat,coerced
  331 + | "role",Val s -> {t with role=s},attrs,cat,coerced
  332 + | "role-attr",Val s -> {t with role_attr=s},attrs,cat,coerced
  333 + | "selprefs",s -> {t with selprefs=s},attrs,cat,coerced
  334 + | "sense",s -> {t with sense=s},attrs,cat,coerced
  335 + | "hipero",_ -> t,attrs,cat,coerced
  336 + | "arole",Val s -> {t with arole=s},attrs,cat,coerced
  337 + | "arole-attr",Val s -> {t with arole_attr=s},attrs,cat,coerced
  338 + | "arev",Val "-" -> {t with arev=false},attrs,cat,coerced
  339 + | "arev",Val "+" -> {t with arev=true},attrs,cat,coerced
  340 + | "agf",Val s -> t,attrs,cat,coerced
  341 + | "sem-args",s -> {t with sem_args=s},attrs,cat,coerced
  342 + | "rev-hipero",_ -> t,attrs,cat,coerced
  343 + | "fopinion",_ -> t,attrs,cat,coerced
  344 + | "sopinion",_ -> t,attrs,cat,coerced
  345 + | "ACM",s -> t,("ACM",s) :: attrs,cat,coerced
  346 + | "ASPECT",s -> t,("ASPECT",s) :: attrs,cat,coerced
  347 + | "NEGATION",s -> t,("NEGATION",s) :: attrs,cat,coerced
  348 + | "MOOD",s -> t,("MOOD",s) :: attrs,cat,coerced
  349 + | "TENSE",s -> t,("TENSE",s) :: attrs,cat,coerced
  350 + | "CTYPE",s -> t,("CTYPE",s) :: attrs,cat,coerced
  351 + | "controller",s -> t,("controller",s) :: attrs,cat,coerced
  352 + | "controllee",s -> t,("controllee",s) :: attrs,cat,coerced
  353 + | "coref",s -> t,attrs,cat,coerced
  354 + | "label",Val s -> {t with label=s},attrs,cat,coerced
  355 + | "def-label",Val s -> {t with def_label=s},attrs,cat,coerced
  356 + | "CAT",s -> t,attrs,s,coerced
  357 + | "COERCED",s -> t,attrs,cat,s
  358 + | "NUM",s -> t,("NUM",s) :: attrs,cat,coerced
  359 + | "CASE",s -> t,("CASE",s) :: attrs,cat,coerced
  360 + | "GEND",s -> t,("GEND",s) :: attrs,cat,coerced
  361 + | "PERS",s -> t,("PERS",s) :: attrs,cat,coerced
  362 + | "NSYN",s -> t,("NSYN",s) :: attrs,cat,coerced
  363 + | "NSEM",s -> t,("NSEM",s) :: attrs,cat,coerced
  364 + | "MODE",s -> t,("MODE",s) :: attrs,cat,coerced
  365 + | "GRAD",s -> t,("GRAD",s) :: attrs,cat,coerced
  366 + | "PSEM",s -> t,("PSEM",s) :: attrs,cat,coerced
  367 + (* | k,v -> printf "translate_node: %s %s\n%!" k (ENIAMsemStringOf.linear_term 0 v); t, (k,v) :: attrs,cat,coerced) in *)
354 368 | k,v -> failwith (sprintf "translate_node: %s %s\n%!" k (ENIAMsemStringOf.linear_term 0 v))) in
355   - {t with attrs=attrs},cat,proj
  369 + {t with attrs=attrs},cat,coerced
356 370  
357 371 and create_concepts tokens lex_sems = function
358 372 ENIAM_LCGtypes.Node t ->
359   - let t,cat,proj = translate_node tokens lex_sems t in
360   - create_normal_concept tokens lex_sems t cat proj
  373 + let t,cat,coerced = translate_node tokens lex_sems t in
  374 + create_normal_concept tokens lex_sems t cat coerced
361 375 | ENIAM_LCGtypes.Tuple l -> Tuple(Xlist.map l (create_concepts tokens lex_sems))
362 376 | ENIAM_LCGtypes.Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, create_concepts tokens lex_sems t))
363 377 | ENIAM_LCGtypes.Dot -> Dot
... ... @@ -382,6 +396,7 @@ let rec make_tree_rec references = function
382 396 | SingleRelation r -> SingleRelation r
383 397 (* | TripleRelation(r,a,s,t) -> TripleRelation(r,a,make_tree_rec references s,make_tree_rec references t) *)
384 398 | AddRelation(t,r,a,s) -> AddRelation(make_tree_rec references t,r,a,make_tree_rec references s)
  399 + | AddSingleRelation(r,s) -> AddSingleRelation(r,make_tree_rec references s)
385 400 | RemoveRelation(r,a,t) -> RemoveRelation(r,a,make_tree_rec references t)
386 401 | SetContextName(s,t) -> SetContextName(s,make_tree_rec references t)
387 402 | CreateContext(s,t) -> CreateContext(s,make_tree_rec references t)
... ... @@ -408,13 +423,16 @@ let rec validate_translation r = function
408 423 | SingleRelation _ -> ()
409 424 (* | TripleRelation(_,_,s,t) -> validate_translation r s; validate_translation r t *)
410 425 | AddRelation(t,_,_,s) -> validate_translation r t; validate_translation r s
  426 + | AddSingleRelation(_,s) -> validate_translation r s
411 427 | RemoveRelation(_,_,t) -> validate_translation r t
412 428 | SetContextName(s,t) -> validate_translation r t
413 429 | CreateContext(s,t) -> validate_translation r t
414 430 (* | MakeTripleRelation(_,_,t) -> validate_translation r t *)
415 431 | ManageCoordination(_,t) -> validate_translation r t
416 432 | Tuple l -> Xlist.iter l (validate_translation r)
417   - | Variant(e,l) -> Xlist.iter l (fun (i,t) -> validate_translation r t)
  433 + | Variant(e,l) ->
  434 + if e = "" then r := "validate_translation: empty variant label" :: !r;
  435 + Xlist.iter l (fun (i,t) -> validate_translation r t)
418 436 | Dot -> ()
419 437 | t -> failwith ("validate_translation: " ^ ENIAMsemStringOf.linear_term 0 t)
420 438  
... ... @@ -500,6 +518,14 @@ let rec reduce_tree = function
500 518 (* | TripleRelation(r,a,s,t) -> TripleRelation(r,a,reduce_tree s,reduce_tree t) *)
501 519 (* | AddRelation(Concept c,r,a,s) -> reduce_tree (Concept{c with c_relations=Tuple[Relation(Val r,Val a,s);c.c_relations]})
502 520 | AddRelation(Context c,r,a,s) -> reduce_tree (Context{c with cx_relations=Tuple[Relation(Val r,Val a,s);c.cx_relations]})*)
  521 + | AddSingleRelation(r,t) ->
  522 + (match reduce_tree t with
  523 + Concept t -> Concept{t with c_relations=Tuple[t.c_relations;SingleRelation r]}
  524 + | Context({cx_sense=Val "czy"} as t) -> Context t
  525 + | Context({cx_sense=Val "jaki"} as t) -> Context t
  526 + | Context({cx_sense=Dot} as t) -> Context{t with cx_sense=Val "czy"}
  527 + | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, reduce_tree (AddSingleRelation(r,t))))
  528 + | t -> AddSingleRelation(r,t))
503 529 | AddRelation(t,r,a,s) -> simplify_tree_add_relation r a (reduce_tree s) (reduce_tree t)
504 530 (* let t = reduce_tree t in
505 531 let s = reduce_tree s in
... ... @@ -511,7 +537,7 @@ let rec reduce_tree = function
511 537 (match reduce_tree t with
512 538 Relation(r,a,t) ->
513 539 if (r = r0 && a = a0) || r0 = "" then t else
514   - Context{empty_context with cx_contents=
  540 + Context{empty_context with cx_cat=Val "Situation"; cx_contents=
515 541 Concept{empty_concept with c_relations=Relation(r,a,t)}; (*cx_variable=string_of_int id,""; cx_pos=c.c_pos*)}
516 542 (* | TripleRelation(r,a,s,t) ->
517 543 Context{empty_context with cx_contents=
... ... @@ -544,7 +570,8 @@ let rec reduce_tree = function
544 570 (match reduce_tree c with
545 571 Context c ->
546 572 let t,args = extract_aroles {t with arole=""} c.cx_contents in
547   - make_relation t (Context {c with cx_contents=args})
  573 + (*make_relation t (Context {c with cx_contents=args})*) (* FIXME: to trzeba poprawić tak by działało w obu wersjach parserów *)
  574 + Relation(t.role,"",Context {c with cx_contents=args})
548 575 | Variant(e,l) -> reduce_tree (Variant(e,Xlist.map l (fun (i,c) -> i,ManageCoordination(t,c))))
549 576 | c -> ManageCoordination(t,c))
550 577 | Tuple l -> Tuple(List.rev (Xlist.rev_map l reduce_tree))
... ... @@ -561,7 +588,9 @@ let rec validate_reduction r = function
561 588 | SingleRelation _ -> ()
562 589 (* | TripleRelation(_,_,s,t) -> validate_reduction r s; validate_reduction r t *)
563 590 | Tuple l -> Xlist.iter l (validate_reduction r)
564   - | Variant(e,l) -> Xlist.iter l (fun (i,t) -> validate_reduction r t)
  591 + | Variant(e,l) ->
  592 + if e = "" then r := "validate_reduction: empty variant label" :: !r;
  593 + Xlist.iter l (fun (i,t) -> validate_reduction r t)
565 594 | Dot -> ()
566 595 | t -> r := ("validate_reduction: " ^ ENIAMsemStringOf.linear_term 0 t) :: !r
567 596  
... ... @@ -605,6 +634,38 @@ let rec remove_variant_labels map = function
605 634 | Val s -> Val s
606 635 | t -> failwith ("remove_variant_labels: " ^ ENIAMsemStringOf.linear_term 0 t)
607 636  
  637 +let rec set_variant_labels map = function
  638 + Concept c -> Concept{c with
  639 + c_sense=set_variant_labels map c.c_sense;
  640 + c_name=set_variant_labels map c.c_name;
  641 + c_quant=set_variant_labels map c.c_quant;
  642 + c_cat=set_variant_labels map c.c_cat;
  643 + c_relations=set_variant_labels map c.c_relations}
  644 + | Context c -> Context{c with
  645 + cx_sense=set_variant_labels map c.cx_sense;
  646 + cx_contents=set_variant_labels map c.cx_contents;
  647 + cx_cat=set_variant_labels map c.cx_cat;
  648 + cx_relations=set_variant_labels map c.cx_relations}
  649 + | Relation(r,a,t) -> Relation(r,a,set_variant_labels map t)
  650 + | RevRelation(r,a,t) -> RevRelation(r,a,set_variant_labels map t)
  651 + | SingleRelation r -> SingleRelation r
  652 + | Tuple l -> Tuple(List.rev (Xlist.rev_map l (set_variant_labels map)))
  653 + | Variant(e,l) ->
  654 + let e = try StringMap.find map e with Not_found -> ENIAM_LCGreductions.get_variant_label () in
  655 + let l = Xlist.rev_map l (fun (i,t) -> i, set_variant_labels map t) in
  656 + Variant(e,List.rev l)
  657 + | Dot -> Dot
  658 + | Val s -> Val s
  659 + | t -> failwith ("set_variant_labels: " ^ ENIAMsemStringOf.linear_term 0 t)
  660 +
  661 +let manage_variant_labels t =
  662 + ENIAM_LCGreductions.reset_variant_label ();
  663 + let qmap = count_variant_labels StringQMap.empty t in
  664 + let map = StringQMap.fold qmap StringMap.empty (fun map k _ ->
  665 + if k = "" then map else
  666 + StringMap.add map k (ENIAM_LCGreductions.get_variant_label ())) in
  667 + set_variant_labels map t
  668 +
608 669 let rec simplify_tree = function
609 670 Concept c -> Concept{c with
610 671 c_sense=simplify_tree c.c_sense;
... ... @@ -625,6 +686,7 @@ let rec simplify_tree = function
625 686 let l = Xlist.fold l [] (fun l t ->
626 687 match simplify_tree t with
627 688 Dot -> l
  689 + | Tuple l2 -> l2 @ l
628 690 | t -> t :: l) in
629 691 (match l with
630 692 [] -> Dot
... ... @@ -655,7 +717,8 @@ let rec simplify_tree = function
655 717 c_cat = simplify_tree (Variant(e,lt3))}
656 718 | Context c ->
657 719 let lt1,lt2,lt3 = Xlist.fold l ([],[],[]) (fun (lt1,lt2,lt3) -> function
658   - i,Context c2 -> if c.cx_sense = c2.cx_sense then (i,c2.cx_contents) :: lt1, (i,c2.cx_relations) :: lt2, (i,c2.cx_cat) :: lt3 else raise Not_found
  720 + i,Context c2 -> if c.cx_sense = c2.cx_sense && c.cx_label = c2.cx_label &&
  721 + c.cx_def_label = c2.cx_def_label then (i,c2.cx_contents) :: lt1, (i,c2.cx_relations) :: lt2, (i,c2.cx_cat) :: lt3 else raise Not_found
659 722 | _ -> raise Not_found) in
660 723 let e = if e = "" then ENIAM_LCGreductions.get_variant_label () else e in
661 724 Context{c with
... ...
semantics/ENIAMsemGraphOf.ml
... ... @@ -32,7 +32,7 @@ let string_of_node t =
32 32 let l = [
33 33 "ORTH",Val t.orth;"LEMMA",Val t.lemma;"POS",Val t.pos;"ID",Val (string_of_int t.id);"LABEL",Val t.label;"DEF-LABEL",Val t.def_label;"WEIGHT",Val (string_of_float t.weight);
34 34 "SYMBOL",t.symbol;"ARG_SYMBOL",t.arg_symbol;"ARG_DIR",Val t.arg_dir;
35   - "GF",Val t.gf;"ROLE",Val t.role;"ROLE_ATTR",Val t.role_attr;"SELPREFS",t.selprefs;"MEANING",t.meaning;
  35 + "GF",Val t.gf;"ROLE",Val t.role;"ROLE_ATTR",Val t.role_attr;"SELPREFS",t.selprefs;"SENSE",t.sense;
36 36 "AROLE",Val t.arole;"AROLE_ATTR",Val t.role_attr;"AREV",Val (string_of_bool t.arev);"SEM_ARGS",t.sem_args] @ t.attrs in
37 37 "{ " ^ String.concat " | " (Xlist.map l (fun (e,t) -> "{ " ^ e ^ " | " ^ escape_string (ENIAMsemStringOf.linear_term 0 t) ^ " }")) ^ " }"
38 38  
... ... @@ -225,7 +225,9 @@ let rec print_graph2_rec file edge_rev edge_label edge_style edge_head upper = f
225 225 | Context t ->
226 226 let id = !id_counter in
227 227 incr id_counter;
228   - fprintf file " subgraph cluster%d {\nlabel=\"%s%s\"\n" id
  228 + fprintf file " subgraph cluster%d {\nlabel=\"%s%s%s%s\"\n" id
  229 + (if t.cx_label="" then "" else "?" ^ t.cx_label ^ " ")
  230 + (if t.cx_def_label="" then "" else "*" ^ t.cx_def_label ^ " ")
229 231 (if t.cx_cat=Dot then "" else escape_string (ENIAMsemStringOf.linear_term 0 t.cx_cat ^ " "))
230 232 (if t.cx_sense = Dot then "" else escape_string (ENIAMsemStringOf.linear_term 0 t.cx_sense));
231 233 let iid = print_graph2_rec file false "" "" "" 0 t.cx_contents in
... ... @@ -268,6 +270,13 @@ let rec print_graph2_rec file edge_rev edge_label edge_style edge_head upper = f
268 270 let _ = print_graph2_rec file false "" "" "" id t in
269 271 let _ = print_graph2_rec file false "" "" "" id s in
270 272 id
  273 + | AddSingleRelation(role,t) ->
  274 + let id = !id_counter in
  275 + incr id_counter;
  276 + fprintf file " %d [shape=circle,label=\"AddSingleRelation\\n%s\"]\n" id (ENIAMsemStringOf.linear_term 0 role);
  277 + print_edge2 file edge_rev edge_label edge_style edge_head "" upper id;
  278 + let _ = print_graph2_rec file false "" "" "" id t in
  279 + id
271 280 | RemoveRelation(role,role_attr,t) ->
272 281 let id = !id_counter in
273 282 incr id_counter;
... ...
semantics/ENIAMsemLatexOf.ml
... ... @@ -50,7 +50,7 @@ let rec linear_term c = function
50 50 "WEIGHT",Val (string_of_float t.weight);"SYMBOL",t.symbol;
51 51 "ARG_SYMBOL",t.arg_symbol;"ARG_DIR",Val t.arg_dir;
52 52 "GF",Val t.gf; "ROLE", Val t.role; "ROLE-ATTR", Val t.role_attr;
53   - "SELPREFS",t.selprefs; "MEANING",t.meaning;
  53 + "SELPREFS",t.selprefs; "SENSE",t.sense;
54 54 "AROLE", Val t.role; "AROLE-ATTR", Val t.role_attr; "AREV", Val (if t.arev then "+" else "-");
55 55 "SEM-ARGS",t.sem_args; "ARGS",t.args] @ t.attrs) (fun (e,t) ->
56 56 "\\text{" ^ (Xlatex.escape_string e) ^ "} & " ^ (linear_term 0 t)))) ^ "\\end{array}\\right]}"
... ... @@ -66,7 +66,7 @@ let rec linear_term c = function
66 66 | Context c ->
67 67 "{\\left[\\begin{array}{ll}" ^
68 68 (String.concat "\\\\ " (Xlist.map ([
69   - "SENSE",c.cx_sense;"CAT",c.cx_cat;
  69 + "SENSE",c.cx_sense;"CAT",c.cx_cat;"LABEL",Val c.cx_label;"DEF-LABEL",Val c.cx_def_label;
70 70 "VARIABLE",Val (fst c.cx_variable ^ "_" ^ snd c.cx_variable);"POS",Val (string_of_int c.cx_pos);
71 71 "RELATIONS",c.cx_relations;"CONTENTS",c.cx_contents]) (fun (e,t) ->
72 72 "\\text{" ^ (Xlatex.escape_string e) ^ "} & " ^ (linear_term 0 t)))) ^ "\\end{array}\\right]}"
... ... @@ -75,6 +75,7 @@ let rec linear_term c = function
75 75 | SingleRelation r -> "{\\bf singlerelation}(" ^ linear_term 0 r ^ ")"
76 76 (* | TripleRelation(r,a,c,t) -> "{\\bf triplerelation}(" ^ (*linear_term 0*) r ^ "," ^ (*linear_term 0*) a ^ "," ^ linear_term 0 c ^ "," ^ linear_term 0 t ^ ")" *)
77 77 | AddRelation(t,r,a,s) -> "{\\bf addrelation}(" ^ linear_term 0 t ^ "," ^ r ^ "," ^ a ^ "," ^ linear_term 0 s ^ ")"
  78 + | AddSingleRelation(r,s) -> "{\\bf addrelation}(" ^ linear_term 0 r ^ "," ^ linear_term 0 s ^ ")"
78 79 | RemoveRelation(r,a,t) -> "{\\bf removerelation}(" ^ r ^ "," ^ a ^ "," ^ linear_term 0 t ^ ")"
79 80 | SetContextName(s,t) -> "{\\bf setcontextname}(" ^ linear_term 0 s ^ "," ^ linear_term 0 t ^ ")"
80 81 | CreateContext(s,t) -> "{\\bf createcontext}(" ^ linear_term 0 (Context s) ^ "," ^ linear_term 0 t ^ ")"
... ...
semantics/ENIAMsemLexicon.ml
... ... @@ -45,7 +45,7 @@ let parse_multi p = function
45 45 | tokens -> tokens,p
46 46  
47 47 let parse_morf p = function
48   - [T "1"] -> {p with is_necessary=Opt}
  48 + [T "1"] -> if p.is_necessary=Multi then p else {p with is_necessary=Opt}
49 49 | tokens ->
50 50 let l = Xlist.map (try Lexer.split_symbol (T "*") [] tokens with _ -> failwith "parse_morf: split_symbol *") (function
51 51 [T s] -> Atom s
... ... @@ -75,6 +75,7 @@ let parse_role p = function
75 75 | "Has" -> {p with role="Has"; sel_prefs=[SynsetName "ALL"]}
76 76 | "PHas" -> {p with role="PHas"; sel_prefs=[SynsetName "ALL"]}
77 77 | "PApoz" -> {p with role="PApoz"; sel_prefs=[SynsetName "ALL"]}
  78 + | "Merge" -> {p with role="Merge"; sel_prefs=[SynsetName "ALL"]}
78 79 | s -> failwith ("parse_role: " ^ s)
79 80  
80 81 let parse_entry = function
... ...
semantics/ENIAMsemStringOf.ml
... ... @@ -43,7 +43,7 @@ let rec linear_term c = function
43 43 | Context c ->
44 44 "[" ^
45 45 (String.concat "; " (Xlist.map ([
46   - "SENSE",c.cx_sense;"CAT",c.cx_cat;
  46 + "SENSE",c.cx_sense;"CAT",c.cx_cat;"LABEL",Val c.cx_label;"DEF-LABEL",Val c.cx_def_label;
47 47 "VARIABLE",Val (fst c.cx_variable ^ "_" ^ snd c.cx_variable);"POS",Val (string_of_int c.cx_pos);
48 48 "RELATIONS",c.cx_relations;"CONTENTS",c.cx_contents]) (fun (e,t) ->
49 49 e ^ ": " ^ (linear_term 0 t)))) ^ "]"
... ... @@ -52,6 +52,7 @@ let rec linear_term c = function
52 52 | SingleRelation r -> "singlerelation(" ^ linear_term 0 r ^ ")"
53 53 (* | TripleRelation(r,a,c,t) -> "triplerelation(" ^ r ^ "," ^ a ^ "," ^ linear_term 0 c ^ "," ^ linear_term 0 t ^ ")" *)
54 54 | AddRelation(t,r,a,s) -> "addrelation(" ^ linear_term 0 t ^ "," ^ r ^ "," ^ a ^ "," ^ linear_term 0 s ^ ")"
  55 + | AddSingleRelation(r,s) -> "addsinglerelation(" ^ linear_term 0 r ^ "," ^ linear_term 0 s ^ ")"
55 56 | RemoveRelation(r,a,t) -> "removerelation(" ^ r ^ "," ^ a ^ "," ^ linear_term 0 t ^ ")"
56 57 | SetContextName(s,t) -> "setcontextname(" ^ linear_term 0 s ^ "," ^ linear_term 0 t ^ ")"
57 58 | CreateContext(s,t) -> "createcontext(" ^ linear_term 0 (Context s) ^ "," ^ linear_term 0 t ^ ")"
... ...
semantics/ENIAMsemTypes.ml
... ... @@ -30,9 +30,9 @@ type node = {
30 30 amorf: ENIAMwalTypes.morf;
31 31 arole: string;
32 32 arole_attr: string;
33   - meaning: string;
  33 + sense: string;
34 34 hipero: StringSet.t;
35   - meaning_weight: float;
  35 + sense_weight: float;
36 36 position: ENIAMwalTypes.schema_field;*)
37 37 attrs: (string * linear_term) list;
38 38 args: linear_term;
... ... @@ -40,13 +40,14 @@ type node = {
40 40 role: string;
41 41 role_attr: string;
42 42 selprefs: linear_term;
43   - meaning: linear_term;
  43 + sense: linear_term;
44 44 arole: string;
45 45 arole_attr: string;
46 46 arev: bool;
47 47 sem_args: linear_term;
48 48 label: string;
49 49 def_label: string;
  50 + snode: string;
50 51 }
51 52  
52 53 and concept =
... ... @@ -57,7 +58,7 @@ and concept =
57 58 c_pos: int; c_cat: linear_term}
58 59  
59 60 and context =
60   - {cx_sense: linear_term; cx_contents: linear_term;
  61 + {cx_sense: linear_term; cx_contents: linear_term; cx_label: string; cx_def_label: string;
61 62 cx_relations: linear_term; cx_variable: (string * string); cx_pos: int; cx_cat: linear_term}
62 63  
63 64 and linear_term =
... ... @@ -72,9 +73,10 @@ and linear_term =
72 73 | RevRelation of string * string * linear_term (* role * role_attr * concept *)
73 74 | SingleRelation of linear_term
74 75 (* | TripleRelation of string * string * linear_term * linear_term (* role * role_attr * concept *) *)
75   - | AddRelation of linear_term * string * string * linear_term (* nadrządnik * role * role_attr * podrzędnik *)
  76 + | AddRelation of linear_term * string * string * linear_term (* nadrzędnik * role * role_attr * podrzędnik *)
  77 + | AddSingleRelation of linear_term * linear_term (* role * podrzędnik *)
76 78 | RemoveRelation of string * string * linear_term
77   - | SetContextName of linear_term * linear_term (* meaning * concept *)
  79 + | SetContextName of linear_term * linear_term (* sense * concept *)
78 80 | CreateContext of context * linear_term (* context * args *)
79 81 (* | MakeTripleRelation of string * string * linear_term (* role * role_attr * concept *) *)
80 82 | ManageCoordination of node * linear_term
... ...
semantics/ENIAMsemValence.ml
... ... @@ -22,7 +22,7 @@ open ENIAM_LCGlexiconTypes
22 22 open ENIAMlexSemanticsTypes
23 23 open Xstd
24 24  
25   -type pos = {role: linear_term; role_attr: linear_term; selprefs: linear_term; gf: ENIAMwalTypes.gf;
  25 +type pos = {role: linear_term; role_attr: linear_term; selprefs: linear_term; catprefs: string list; gf: ENIAMwalTypes.gf;
26 26 cr: string list; ce: string list;
27 27 is_necessary: bool; is_pro: bool; is_prong: bool; is_multi: bool; dir: string; morfs: StringSet.t}
28 28  
... ... @@ -148,6 +148,8 @@ let rec apply_selector v2 chosen_map = function
148 148 | Nsem,("NSEM",v) :: l -> match_value v2 chosen_map v
149 149 | Case,("CASE",v) :: l -> match_value v2 chosen_map v
150 150 | Mode,("MODE",v) :: l -> match_value v2 chosen_map v
  151 + | Acm,("ACM",v) :: l -> match_value v2 chosen_map v
  152 + | Cat,("CAT",v) :: l -> match_value v2 chosen_map v
151 153 | sel,(attr,v) :: l -> (*print_endline ("apply_selector: " ^ ENIAMcategoriesPL.string_of_selector sel ^ " " ^ attr);*) apply_selector v2 chosen_map (sel,l)
152 154  
153 155 let rec apply_neg_selector vals chosen_map = function
... ... @@ -218,7 +220,22 @@ let string_of_arg arg =
218 220 String.concat ", " (Xlist.map arg (fun ((arg_symbol,dir),t) -> (string_of_argdir dir) ^ arg_symbol ^ ":" ^ ENIAM_LCGstringOf.linear_term 0 t))
219 221  
220 222 let string_of_position p =
221   - (string_of_argdir p.dir) ^ String.concat "+" (StringSet.to_list p.morfs)
  223 + (string_of_argdir p.dir) ^
  224 + (if p.is_multi then "?" else "") ^
  225 + String.concat "+" (StringSet.to_list p.morfs)
  226 +
  227 +let manage_arg p t =
  228 + let t = SetAttr("gf",Val (ENIAMwalStringOf.gf p.gf),t) in
  229 + let t =
  230 + if p.gf = ENIAMwalTypes.SUBJ || p.gf = ENIAMwalTypes.OBJ || p.gf = ENIAMwalTypes.ARG then
  231 + SetAttr("role",p.role,SetAttr("role-attr",p.role_attr,SetAttr("selprefs",p.selprefs,t)))
  232 + else if p.gf = ENIAMwalTypes.CORE then SetAttr("selprefs",p.selprefs,t)
  233 + else if p.gf = ENIAMwalTypes.ADJUNCT || p.gf = ENIAMwalTypes.NOSEM || p.gf = ENIAMwalTypes.CORE then t
  234 + else failwith "manage_arg: ni 2" in
  235 + let t = Xlist.fold p.cr t (fun t cr -> SetAttr("controller",Val cr,t)) in
  236 + let t = Xlist.fold p.ce t (fun t ce -> SetAttr("controllee",Val ce,t)) in
  237 + let t = if p.gf = ENIAMwalTypes.NOSEM then Dot else t in
  238 + t
222 239  
223 240 let rec match_arg_positions lemma arg rev = function
224 241 p :: positions ->
... ... @@ -228,16 +245,13 @@ let rec match_arg_positions lemma arg rev = function
228 245 (match l with
229 246 [] -> (*print_endline "match_arg_positions: not matched";*) match_arg_positions lemma arg (p :: rev) positions
230 247 | [t] ->
231   - let t = SetAttr("gf",Val (ENIAMwalStringOf.gf p.gf),t) in
232   - let t =
233   - if p.gf = ENIAMwalTypes.SUBJ || p.gf = ENIAMwalTypes.OBJ || p.gf = ENIAMwalTypes.ARG then
234   - SetAttr("role",p.role,SetAttr("role-attr",p.role_attr,SetAttr("selprefs",p.selprefs,t)))
235   - else if p.gf = ENIAMwalTypes.CORE then SetAttr("selprefs",p.selprefs,t)
236   - else if p.gf = ENIAMwalTypes.ADJUNCT || p.gf = ENIAMwalTypes.NOSEM || p.gf = ENIAMwalTypes.CORE then t
237   - else failwith "match_arg_positions: ni 2" in
238   - let t = Xlist.fold p.cr t (fun t cr -> SetAttr("controller",Val cr,t)) in
239   - let t = Xlist.fold p.ce t (fun t ce -> SetAttr("controllee",Val ce,t)) in
240   - let t = if p.gf = ENIAMwalTypes.NOSEM then Dot else t in
  248 + let t = manage_arg p t in
  249 + if p.is_multi then (t, rev @ (p :: positions)) :: (match_arg_positions lemma arg (p :: rev) positions)
  250 + else (t, rev @ positions) :: (match_arg_positions lemma arg (p :: rev) positions)
  251 + | [t1;t2] -> (* FIXME: przydałoby się to uogólnić na listę dowolnej długości *)
  252 + let t1 = manage_arg p t1 in
  253 + let t2 = manage_arg p t2 in
  254 + let t = Variant("",["1",t1;"2",t2]) in
241 255 if p.is_multi then (t, rev @ (p :: positions)) :: (match_arg_positions lemma arg (p :: rev) positions)
242 256 else (t, rev @ positions) :: (match_arg_positions lemma arg (p :: rev) positions)
243 257 | _ -> failwith ("match_arg_positions: lemma=" ^ lemma ^ " arg=" ^ string_of_arg arg ^ " position=" ^ string_of_position p))
... ... @@ -257,16 +271,17 @@ let rec match_args_positions_rec lemma prong_attrs positions = function
257 271 [Xlist.fold positions [] (fun found p ->
258 272 if not p.is_pro then found else
259 273 let attrs = if p.is_prong then prong_attrs else [] in
260   - let cats = p.selprefs(*ENIAM_LCGrules.make_variant (ENIAMwalRenderer.extract_sel_prefs p.sel_prefs)*) in
  274 + let cats = p.catprefs(*ENIAM_LCGrules.make_variant (ENIAMwalRenderer.extract_sel_prefs p.sel_prefs)*) in
261 275 let lemma = get_pro_lemma attrs in
262   - let attrs = ["CAT",cats;"PROJ",cats] @ attrs in
263 276 let sem_args = try StringMap.find ENIAMlexSemanticsData.pron_sem_args lemma with Not_found -> failwith "match_args_positions_rec" in
264   - let attrs = ["meaning",Val lemma;"hipero",Tuple[Val "ALL"; Val "0"];"role",p.role;
  277 + let attrs = ["sense",Val lemma;"hipero",Tuple[Val "ALL"; Val "0"];"role",p.role;
265 278 "role-attr",p.role_attr; "selprefs",p.selprefs; "gf",Val (ENIAMwalStringOf.gf p.gf);
266 279 "agf",Val ""; "sem-args",make_sem_args sem_args; "rev-hipero",Val "+"] @ attrs in
267 280 let attrs = Xlist.fold p.cr attrs (fun attrs cr -> ("controller",Val cr) :: attrs) in
268 281 let attrs = Xlist.fold p.ce attrs (fun attrs ce -> ("controllee",Val ce) :: attrs) in
269   - Node{ENIAM_LCGrenderer.empty_node with lemma=lemma; pos="pro"; attrs=attrs} :: found)]
  282 + Xlist.fold cats found (fun found cat ->
  283 + let attrs = ["CAT",Val cat;"COERCED",Val cat] @ attrs in
  284 + Node{ENIAM_LCGrenderer.empty_node with lemma=lemma; pos="pro"; attrs=attrs} :: found))]
270 285  
271 286 (* FIXME: opcjonalność podrzędników argumentów zleksykalizowanych *)
272 287  
... ... @@ -314,6 +329,7 @@ let translate_position id p =
314 329 [] -> Dot
315 330 | [s] -> Val s
316 331 | l -> Tuple(Xlist.rev_map l (fun s -> Val s)));
  332 + catprefs = p.ENIAMwalTypes.cat_prefs;
317 333 gf=p.ENIAMwalTypes.gf;
318 334 cr=Xlist.map p.ENIAMwalTypes.cr (fun cr -> id ^ "-" ^ cr);
319 335 ce=Xlist.map p.ENIAMwalTypes.ce (fun ce -> id ^ "-" ^ ce);
... ... @@ -369,17 +385,19 @@ let rec assign_frames_rec tokens lex_sems tree arg_symbols visited = function
369 385 let frame = ENIAMsemLexicon.extend_frame phsymbol frame in
370 386 (* print_endline "passed"; *)
371 387 (attrs,symbol,frame,Xlist.rev_map frame.positions (translate_position (string_of_int t.id))) :: frames
372   - with Not_found -> (*print_endline "rejected";*) frames) in
373   - if frames = [] then failwith "assign_frames_rec: no frame" else
  388 + with Not_found ->
  389 + (* print_endline "rejected"; *)
  390 + frames) in
  391 + if frames = [] then failwith ("assign_frames_rec: no frame phsymbol='" ^ phsymbol ^ "' node='" ^ t.lemma ^ "'") else
374 392 let prong_attrs = get_prong_attrs t.attrs in
375 393 let e = ENIAM_LCGreductions.get_variant_label () in
376 394 let l,_ = Xlist.fold frames ([],1) (fun (l,n) (attrs,symbol,frame,positions) ->
377 395 (* Printf.printf "assign_frames_rec 3: lemma=%s args=[%s] positions=[%s]\n%!" t.lemma (String.concat "; " (Xlist.map args string_of_arg)) (String.concat "; " (Xlist.map positions string_of_position)); *)
378   - if frame.meanings = [] then failwith ("assign_frames_rec: no meanings '" ^ t.lemma ^ "'") else
  396 + if frame.senses = [] then failwith ("assign_frames_rec: no senses '" ^ t.lemma ^ "'") else
379 397 Xlist.fold (match_args_positions t.lemma prong_attrs args positions) (l,n) (fun (l,n) args ->
380   - Xlist.fold frame.meanings (l,n) (fun (l,n) (meaning,hipero,weight) ->
  398 + Xlist.fold frame.senses (l,n) (fun (l,n) (sense,hipero,weight) ->
381 399 (string_of_int n, Node{t with attrs=
382   - ("meaning",Val meaning) ::
  400 + ("sense",Val sense) ::
383 401 ("hipero",ENIAM_LCGrules.make_variant (Xlist.map hipero (fun (h,n) -> Tuple[Val h;Val(string_of_int n)]))) ::
384 402 ("arole",Val frame.arole) ::
385 403 ("arole-attr",Val frame.arole_attr) ::
... ... @@ -390,7 +408,9 @@ let rec assign_frames_rec tokens lex_sems tree arg_symbols visited = function
390 408 ("fopinion",Val (ENIAMwalStringOf.opinion frame.fopinion)) ::
391 409 ("sopinion",Val (ENIAMwalStringOf.opinion frame.sopinion)) :: attrs; args=args; symbol=symbol}) ::
392 410 l,n+1))) in
393   - if l = [] then ((*print_endline ("assign_frames_rec 4: no frame assingment found for " ^ t.lemma ^ " " ^ ENIAM_LCGstringOf.linear_term 0 t.symbol);*)raise (NoFrame(t.lemma,ENIAM_LCGstringOf.linear_term 0 t.symbol,visited))) else
  411 + if l = [] then (
  412 + (* print_endline ("assign_frames_rec 4: no frame assingment found for " ^ t.lemma ^ " " ^ ENIAM_LCGstringOf.linear_term 0 t.symbol); *)
  413 + raise (NoFrame(t.lemma,ENIAM_LCGstringOf.linear_term 0 t.symbol,visited))) else
394 414 Variant(e,l),visited
395 415 | Variant(e,l) ->
396 416 let a = ref "" in
... ...
semantics/ENIAMsemXMLof.ml
... ... @@ -36,7 +36,7 @@ let rec linear_term = function
36 36 Xml.Element("attrs",[],Xlist.map t.attrs (fun (k,v) -> Xml.Element("attr",["name",k],[linear_term v])));
37 37 Xml.Element("args",[],[linear_term t.args]);
38 38 Xml.Element("selprefs",[],[linear_term t.selprefs]);
39   - Xml.Element("meaning",[],[linear_term t.meaning]);
  39 + Xml.Element("sense",[],[linear_term t.sense]);
40 40 Xml.Element("sem_args",[],[linear_term t.sem_args])])
41 41 | Ref i -> Xml.Element("Ref",["index",string_of_int i],[])
42 42 | Concept c ->
... ... @@ -49,8 +49,8 @@ let rec linear_term = function
49 49 Xml.Element("relations",[],[linear_term c.c_relations]);
50 50 Xml.Element("cat",[],[linear_term c.c_cat])])
51 51 | Context c ->
52   - Xml.Element("Context",
53   - ["variable",fst c.cx_variable ^ "_" ^ snd c.cx_variable;"pos",string_of_int c.cx_pos],
  52 + Xml.Element("Context",["label",c.cx_label;"def_label",c.cx_def_label;
  53 + "variable",fst c.cx_variable ^ "_" ^ snd c.cx_variable;"pos",string_of_int c.cx_pos],
54 54 [Xml.Element("sense",[],[linear_term c.cx_sense]);
55 55 Xml.Element("contents",[],[linear_term c.cx_contents]);
56 56 Xml.Element("relations",[],[linear_term c.cx_relations]);
... ... @@ -61,6 +61,9 @@ let rec linear_term = function
61 61 | AddRelation(t,r,a,s) ->
62 62 Xml.Element("AddRelation",["role",r;"role_attribute",a],
63 63 [Xml.Element("",[],[linear_term t]);Xml.Element("",[],[linear_term s])])
  64 + | AddSingleRelation(r,s) ->
  65 + Xml.Element("AddSingleRelation",[],
  66 + [Xml.Element("",[],[linear_term r]);Xml.Element("",[],[linear_term s])])
64 67 | RemoveRelation(r,a,t) -> Xml.Element("RemoveRelation",["role",r;"role_attribute",a],[linear_term t])
65 68 | SetContextName(s,t) ->
66 69 Xml.Element("SetContextName",[],[linear_term s;linear_term t])
... ...
semsources/dzieła/HT/spójniki_wspol.xlsx 0 → 100644
No preview for this file type
semsources/dzieła/HT/synsety i antonimy (1).xlsx 0 → 100644
No preview for this file type
semsources/dzieła/HT/synsety i antonimy .xlsx 0 → 100644
No preview for this file type
semsources/dzieła/HT/synsety.zip 0 → 100644
No preview for this file type
semsources/dzieła/HT/synsety_opis.pdf 0 → 100644
No preview for this file type
semsources/dzieła/HT/synsety_spojniki_podrz_wszystkie.xlsx 0 → 100644
No preview for this file type
semsources/dzieła/JP/Anotacja jednostek leksykalnych - ENIAM.xlsx 0 → 100644
No preview for this file type
semsources/dzieła/JP/Anotacja semantyczna kublików.xlsx 0 → 100644
No preview for this file type
semsources/dzieła/JP/Opis do dzieła- instrumentalny.docx 0 → 100644
No preview for this file type
semsources/dzieła/JP/raport z anotacji semantycznej kublików.docx 0 → 100644
No preview for this file type