diff --git a/LCGlexicon/ENIAM_LCGlexicon.ml b/LCGlexicon/ENIAM_LCGlexicon.ml index 89930b4..ad202d9 100644 --- a/LCGlexicon/ENIAM_LCGlexicon.ml +++ b/LCGlexicon/ENIAM_LCGlexicon.ml @@ -235,6 +235,8 @@ let make_node id orth lemma pos syntax weight cat_list is_raised = | Pos2 -> attrs | Cat -> ("CAT",SubstVar "cat") :: attrs | Coerced -> ("COERCED",SubstVar "coerced") :: attrs + | Role -> ("ROLE",SubstVar "role") :: attrs + | SNode -> ("NODE",SubstVar "node") :: attrs | Number -> ("NUM",SubstVar "number") :: attrs | Case -> ("CASE",SubstVar "case") :: attrs | Gender -> ("GEND",SubstVar "gender") :: attrs diff --git a/LCGlexicon/ENIAM_LCGlexiconTypes.ml b/LCGlexicon/ENIAM_LCGlexiconTypes.ml index 4f3dcd7..5990f8f 100644 --- a/LCGlexicon/ENIAM_LCGlexiconTypes.ml +++ b/LCGlexicon/ENIAM_LCGlexiconTypes.ml @@ -17,7 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. *) -type categories = {lemma: string; pos: string; pos2: string; cat: string; coerced: string list; +type categories = {lemma: string; pos: string; pos2: string; cat: string; coerced: string list; roles: string list; snode: string list; numbers: string list; cases: string list; genders: string list; persons: string list; grads: string list; praeps: string list; acms: string list; aspects: string list; negations: string list; moods: string list; tenses: string list; @@ -25,7 +25,8 @@ type categories = {lemma: string; pos: string; pos2: string; cat: string; coerce } type selector = - Lemma | (*NewLemma |*) Pos | Pos2 | Cat | Coerced | Number | Case | Gender | Person | Grad | Praep | + Lemma | (*NewLemma |*) Pos | Pos2 | Cat | Coerced | Role | SNode | + Number | Case | Gender | Person | Grad | Praep | Acm | Aspect | Negation | Mood | Tense | Nsyn | Nsem | Ctype | Mode | Psem | Icat | Inumber | Igender | Iperson | Nperson | Ncat | Plemma | Unumber | Ucase | Ugender | Uperson | Amode @@ -73,7 +74,7 @@ type selector_relation = Eq | Neq (*| StrictEq*) *) -let empty_cats = {lemma=""; pos=""; pos2=""; cat="X"; coerced=[]; +let empty_cats = {lemma=""; pos=""; pos2=""; cat="X"; coerced=[]; roles=[]; snode=[]; numbers=[]; cases=[]; genders=[]; persons=[]; grads=[]; praeps=[]; acms=[]; aspects=[]; negations=[]; moods=[]; tenses=[]; nsyn=[]; nsem=[]; modes=[]; psem=[]; diff --git a/LCGlexicon/ENIAM_LCGlexiconTypes_old.ml b/LCGlexicon/ENIAM_LCGlexiconTypes_old.ml new file mode 100644 index 0000000..4f3dcd7 --- /dev/null +++ b/LCGlexicon/ENIAM_LCGlexiconTypes_old.ml @@ -0,0 +1,108 @@ +(* + * ENIAM_LCGlexicon is a library that provides LCG lexicon form Polish + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences + * + * This library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + *) + +type categories = {lemma: string; pos: string; pos2: string; cat: string; coerced: string list; + numbers: string list; cases: string list; genders: string list; persons: string list; + grads: string list; praeps: string list; acms: string list; + aspects: string list; negations: string list; moods: string list; tenses: string list; + nsyn: string list; nsem: string list; modes: string list; psem: string list; + } + +type selector = + Lemma | (*NewLemma |*) Pos | Pos2 | Cat | Coerced | Number | Case | Gender | Person | Grad | Praep | + Acm | Aspect | Negation | Mood | Tense | Nsyn | Nsem | Ctype | Mode | Psem | + Icat | Inumber | Igender | Iperson | Nperson | Ncat | Plemma | + Unumber | Ucase | Ugender | Uperson | Amode + +module OrderedSelector = struct + type t = selector + let compare = compare +end + +module SelectorMap=Xmap.Make(OrderedSelector) +module SelectorSet=Xset.Make(OrderedSelector) + +type rule = + Bracket + | Quant of (selector * ENIAM_LCGtypes.internal_grammar_symbol) list + | Raised of selector list + | Syntax of ENIAM_LCGtypes.grammar_symbol + | Sem of string + +type rule_sem = + BasicSem of selector list + | RaisedSem of selector list * selector list + | TermSem of selector list * string + | QuotSem of selector list + | InclusionSem of selector list + | ConjSem of selector list + +type selector_relation = Eq | Neq (*| StrictEq*) + +(* x="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jedną z wartości atrybutu x, reguła zostanie wykonana dla x z usuniętymi pozostałymi wartościami *) +(* x!="s" oznacza, że żeby reguła została użyta token musi mieć jako jedną z wartości atrybutu x symbol inny od "s", reguła zostanie wykonana dla x z usuniętą wartością "s" *) +(* x=="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jednyną z wartość atrybutu x *) + +(* wzajemne zależności między kategoriami (np między case i person w subst) są rozstrzygane w ENIAMcategories *) + +(* Basic oznacza że kwantyfikacja i term są generowane zgodnie ze standardowymi regułami: + - kwantyfikacja przebiega po wszystkich zdefiniowanych kategoriariach i wartościach wziętych z cats + - typ jest zadany bezpośrednio + - term tworzy wierzchołek w strukturze zależnościowej etykietowany wszystkimi zdefiniowanymi kategoriami + + Quant oznacza że typ i term są generowane zgodnie ze standardowymi regułami: + - kwantyfikacja jest zadana bezpośrednio + - typ jest zadany bezpośrednio + - term tworzy wierzchołek w strukturze zależnościowej etykietowany wszystkimi zdefiniowanymi kategoriami + +*) + +let empty_cats = {lemma=""; pos=""; pos2=""; cat="X"; coerced=[]; + numbers=[]; cases=[]; genders=[]; persons=[]; + grads=[]; praeps=[]; acms=[]; aspects=[]; negations=[]; moods=[]; tenses=[]; + nsyn=[]; nsem=[]; modes=[]; psem=[]; + } + +let default_category_flag = ref true + +let resource_path = + try Sys.getenv "ENIAM_RESOURCE_PATH" + with Not_found -> + if Sys.file_exists "/usr/share/eniam" then "/usr/share/eniam" else + if Sys.file_exists "/usr/local/share/eniam" then "/usr/local/share/eniam" else + if Sys.file_exists "resources" then "resources" else + failwith "resource directory does not exists" + +let data_path = + try Sys.getenv "ENIAM_USER_DATA_PATH" + with Not_found -> "data" + +let rules_filename = resource_path ^ "/LCGlexicon/lexicon-pl.dic" +let user_lexicon_filename = data_path ^ "/lexicon.dic" +let user_cats_filename = data_path ^ "/senses.tab" +let user_coerced_filename = data_path ^ "/coercions.tab" + +let subst_uncountable_lexemes_filename = resource_path ^ "/LCGlexicon/subst_uncountable.dat" +let subst_uncountable_lexemes_filename2 = resource_path ^ "/LCGlexicon/subst_uncountable_stare.dat" +let subst_container_lexemes_filename = resource_path ^ "/LCGlexicon/subst_container.dat" +let subst_numeral_lexemes_filename = resource_path ^ "/LCGlexicon/subst_numeral.dat" +let subst_time_lexemes_filename = resource_path ^ "/LCGlexicon/subst_time.dat" + +let adv_modes_filename = resource_path ^ "/Walenty/adv_modes.tab" +let num_nsems_filename = resource_path ^ "/LCGlexicon/num.tab" diff --git a/LCGlexicon/ENIAM_LCGlexicon_old.ml b/LCGlexicon/ENIAM_LCGlexicon_old.ml new file mode 100644 index 0000000..89930b4 --- /dev/null +++ b/LCGlexicon/ENIAM_LCGlexicon_old.ml @@ -0,0 +1,329 @@ +(* + * ENIAM_LCGlexicon is a library that provides LCG lexicon form Polish + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences + * + * This library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + *) + +open Xstd +open ENIAM_LCGtypes +open ENIAM_LCGlexiconTypes +open ENIAMcategoriesPL + +let rec find_selector s = function + (t,Eq,x :: _) :: l -> if t = s then x else find_selector s l + | (t,_,_) :: l -> if t = s then failwith "find_selector 1" else find_selector s l + | [] -> failwith "find_selector 2" + +let rec get_syntax rev = function + Syntax syntax :: rule -> syntax, (List.rev rev) @ rule + | t :: rule -> get_syntax (t :: rev) rule + | [] -> failwith "get_syntax" + +let rec get_quant rev = function + Quant quant :: rule -> quant, (List.rev rev) @ rule + | t :: rule -> get_quant (t :: rev) rule + | [] -> [], List.rev rev + +let rec get_bracket rev = function + Bracket :: rule -> true, (List.rev rev) @ rule + | t :: rule -> get_bracket (t :: rev) rule + | [] -> false, List.rev rev + +let rec get_raised rev = function + Raised raised :: rule -> raised, (List.rev rev) @ rule + | t :: rule -> get_raised (t :: rev) rule + | [] -> raise Not_found + +let rec get_sem_term rev = function + Sem sem_term :: rule -> sem_term, (List.rev rev) @ rule + | t :: rule -> get_sem_term (t :: rev) rule + | [] -> raise Not_found + +let merge_quant pos_quants quants = + let map = Xlist.fold quants SelectorMap.empty (fun map (k,v) -> SelectorMap.add map k v) in + let l,map = Xlist.fold pos_quants ([],map) (fun (l,map) (cat,v) -> + if SelectorMap.mem map cat then (cat,SelectorMap.find map cat) :: l, SelectorMap.remove map cat + else (cat,v) :: l, map) in + List.rev (SelectorMap.fold map l (fun l cat v -> (cat,v) :: l)) + +let assign_quantifiers (selectors,rule,weight) = + let pos = find_selector Pos selectors in + let categories = + try StringMap.find pos_categories pos + with Not_found -> failwith ("assign_quantifiers: unknown part of speech " ^ pos) in + let categories = Xlist.map categories (fun s -> s,Top) in + let syntax,rule = get_syntax [] rule in + let quant,rule = get_quant [] rule in + let bracket,rule = get_bracket [] rule in + let quant = merge_quant categories quant in + selectors, (bracket,quant,syntax),(rule,weight) + +let rec check_quantifiers_int_rec (selectors,syntax) quants = function + Atom x -> () + | AVar "schema" -> () + | AVar x -> + if not (SelectorSet.mem quants (selector_of_string x)) + then failwith ("Variable '" ^ x ^ "' is not quantified in rule " ^ string_of_selectors selectors ^ ": " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax) + | With l -> Xlist.iter l (check_quantifiers_int_rec (selectors,syntax) quants) + | Zero -> () + | Top -> () + +let rec check_quantifiers_rec rule quants = function + Tensor l -> Xlist.iter l (check_quantifiers_int_rec rule quants) + | Plus l -> Xlist.iter l (check_quantifiers_rec rule quants) + | Imp(s,d,t) -> check_quantifiers_rec rule quants s; check_quantifiers_rec rule quants t + | One -> () + | ImpSet(s,l) -> check_quantifiers_rec rule quants s; Xlist.iter l (fun (_,t) -> check_quantifiers_rec rule quants t) + | Star s -> check_quantifiers_rec rule quants s + | Maybe s -> check_quantifiers_rec rule quants s + | _ -> failwith "check_quantifiers_rec" + +let check_quantifiers (selectors,(bracket,quant,syntax),_) = + let quants = Xlist.fold quant SelectorSet.empty (fun quants (q,_) -> SelectorSet.add quants q) in + check_quantifiers_rec (selectors,syntax) quants syntax + +let assign_semantics (selectors,(bracket,quant,syntax),(rule,weight)) = + let semantics = try + let raised,rule = get_raised [] rule in + if rule <> [] then failwith "assign_semantics 1" else + RaisedSem(Xlist.map quant fst, raised) + with Not_found -> (try + let term,rule = get_sem_term [] rule in + if rule <> [] then failwith "assign_semantics 2" else + TermSem(Xlist.map quant fst,term) + with Not_found -> BasicSem(Xlist.map quant fst)) in + selectors,(bracket,quant,syntax),(semantics,weight) + +let rec add_x_args_rec = function + Imp(s,d,t) -> Imp(add_x_args_rec s,d,t) + | ImpSet(s,l) -> ImpSet(add_x_args_rec s,l) + | Tensor[Atom "<conll_root>"] -> Tensor[Atom "<conll_root>"] + | Tensor l -> ImpSet(Tensor l,[Backward,Maybe(Tensor[Atom "X"]);Forward,Maybe(Tensor[Atom "X"])]) + | t -> failwith ("add_x_args_rec: " ^ ENIAM_LCGstringOf.grammar_symbol 0 t) + +let is_raised_semantics = function + RaisedSem _ -> true + | _ -> false + +let rec is_raised_arg = function + Imp _ -> true + | Tensor _ -> false + | Plus l -> Xlist.fold l false (fun b t -> is_raised_arg t || b) + | Maybe t -> is_raised_arg t + | One -> false + | t -> failwith ("is_raised_arg: " ^ ENIAM_LCGstringOf.grammar_symbol 0 t) + +let rec is_raised_syntax = function + Imp(s,d,t) -> is_raised_syntax s || is_raised_arg t + | ImpSet(s,l) -> is_raised_syntax s || Xlist.fold l false (fun b (_,t) -> is_raised_arg t || b) + | Tensor _ -> false + | t -> failwith ("is_raised_syntax: " ^ ENIAM_LCGstringOf.grammar_symbol 0 t) + + +let add_x_args (selectors,(bracket,quant,syntax),(semantics,weight)) = + if is_raised_syntax syntax then (selectors,(bracket,quant,syntax),(semantics,weight)) + else (selectors,(bracket,quant,add_x_args_rec syntax),(semantics,weight)) + +let rec extract_category pat rev = function + (cat,rel,v) :: l -> if cat = pat then rel,v,(List.rev rev @ l) else extract_category pat ((cat,rel,v) :: rev) l + | [] -> raise Not_found + +let dict_of_grammar grammar = + (* print_endline "dict_of_grammar"; *) + Xlist.fold grammar StringMap.empty (fun dict (selectors,(bracket,quant,syntax),semantics) -> + let pos_rel,poss,selectors = try extract_category Pos [] selectors with Not_found -> failwith "dict_of_grammar 1" in + let lemma_rel,lemmas,selectors = try extract_category Lemma [] selectors with Not_found -> Eq,[],selectors in + if pos_rel <> Eq || lemma_rel <> Eq then failwith "dict_of_grammar 2" else + let rule = selectors,(bracket,quant,syntax),semantics in + Xlist.fold poss dict (fun dict pos -> + let dict2,l = try StringMap.find dict pos with Not_found -> StringMap.empty,[] in + let dict2,l = + if lemmas = [] then dict2,rule :: l else + Xlist.fold lemmas dict2 (fun dict2 lemma -> + StringMap.add_inc dict2 lemma [rule] (fun l -> rule :: l)),l in + StringMap.add dict pos (dict2,l))) + +let make_rules x_flag filename = + let lexicon = ENIAM_LCGlexiconParser.load_lexicon filename in + let lexicon = List.rev (Xlist.rev_map lexicon assign_quantifiers) in + Xlist.iter lexicon check_quantifiers; + let lexicon = List.rev (Xlist.rev_map lexicon assign_semantics) in + let lexicon = if x_flag then List.rev (Xlist.rev_map lexicon add_x_args) else lexicon in + dict_of_grammar lexicon + +let find_rules rules cats = + let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith ("find_rules: unable to find rules for category '" ^ cats.pos ^ "' lemma='" ^ cats.lemma ^ "'") in + (* Printf.printf "find_rules: %s %s |rules|=%d\n" cats.lemma cats.pos (Xlist.size rules); *) + let rules = try StringMap.find lex_rules cats.lemma @ rules with Not_found -> rules in + Xlist.fold rules [] (fun rules (selectors,syntax,semantics) -> + try + let cats = apply_selectors cats selectors in + (cats,syntax,semantics) :: rules + with Not_found -> rules) + +let prepare_lex_entries rules lex_entries cats = + Xlist.fold lex_entries rules (fun rules (selectors,rule) -> + let selectors = (Pos,Eq,[cats.pos]) :: selectors in + let selectors,(bracket,quant,syntax),(rule,weight) = assign_quantifiers (selectors,[Syntax rule],0.) in + let selectors,(bracket,quant,syntax),(semantics,weight) = assign_semantics (selectors,(bracket,quant,syntax),(rule,weight)) in + try + let cats = apply_selectors cats selectors in + (cats,(bracket,quant,syntax),(semantics,weight)) :: rules + with Not_found -> rules) + +let assign_valence valence rules = + Xlist.fold rules [] (fun l (cats,(bracket,quant,syntax),semantics) -> + (* Printf.printf "%s %s |valence|=%d\n" cats.lemma cats.pos (Xlist.size valence); *) + if ENIAM_LCGrenderer.count_avar "schema" syntax > 0 then + Xlist.fold valence l (fun l (selectors,schema) -> + try + let cats = apply_selectors cats selectors in + (cats,(bracket,quant,ENIAM_LCGrenderer.substitute_schema "schema" schema syntax),semantics) :: l + with Not_found -> l) + else (cats,(bracket,quant,syntax),semantics) :: l) + +type labels = { + number: string; + case: string; + gender: string; + person: string; + aspect: string; +} + +let get_label e = function + Number -> e.number + | Case -> e.case + | Gender -> e.gender + | Person -> e.person + | Aspect -> e.aspect + | _ -> ENIAM_LCGreductions.get_variant_label () + +let get_labels () = { + number=ENIAM_LCGreductions.get_variant_label (); + case=ENIAM_LCGreductions.get_variant_label (); + gender=ENIAM_LCGreductions.get_variant_label (); + person=ENIAM_LCGreductions.get_variant_label (); + aspect=ENIAM_LCGreductions.get_variant_label (); +} + +let make_quantification e rules = + Xlist.map rules (fun (cats,(bracket,quant,syntax),semantics) -> + let syntax = Xlist.fold (List.rev quant) syntax (fun syntax (cat,t) -> + let t = if t = Top then ENIAM_LCGrenderer.make_quant_restriction (match_selector cats cat) else t in + let category = string_of_selector cat in + WithVar(category,t,get_label e cat,syntax)) in + let syntax = if bracket then ENIAM_LCGtypes.Bracket(true,true,syntax) else ENIAM_LCGtypes.Bracket(false,false,syntax) in + cats,syntax,semantics) + +let make_node id orth lemma pos syntax weight cat_list is_raised = + let attrs = Xlist.fold cat_list [] (fun attrs -> function + | Lemma -> attrs + | Pos -> attrs + | Pos2 -> attrs + | Cat -> ("CAT",SubstVar "cat") :: attrs + | Coerced -> ("COERCED",SubstVar "coerced") :: attrs + | Number -> ("NUM",SubstVar "number") :: attrs + | Case -> ("CASE",SubstVar "case") :: attrs + | Gender -> ("GEND",SubstVar "gender") :: attrs + | Person -> ("PERS",SubstVar "person") :: attrs + | Grad -> ("GRAD",SubstVar "grad") :: attrs + | Praep -> attrs + | Acm -> ("ACM",SubstVar "acm") :: attrs + | Aspect -> ("ASPECT", SubstVar "aspect") :: attrs + | Negation -> ("NEGATION",SubstVar "negation") :: attrs + | Mood -> ("MOOD", SubstVar "mood") :: attrs + | Tense -> ("TENSE", SubstVar "tense") :: attrs + | Nsyn -> ("NSYN", SubstVar "nsyn") :: attrs + | Nsem -> ("NSEM", SubstVar "nsem") :: attrs + | Ctype -> ("CTYPE", SubstVar "ctype") :: attrs + | Mode -> ("MODE", SubstVar "mode") :: attrs + | Psem -> ("PSEM", SubstVar "psem") :: attrs + | Icat -> attrs + | Inumber -> attrs + | Igender -> attrs + | Iperson -> attrs + | Nperson -> attrs + | Ncat -> attrs + | Plemma -> attrs + | Unumber -> attrs + | Ucase -> attrs + | Ugender -> attrs + | Uperson -> attrs + | Amode -> attrs) in + (* | s -> (string_of_selector s, Dot) :: attrs) in *) + (* | "lex" -> ("LEX",Val "+") :: attrs *) + (* | s -> failwith ("make_node: " ^ (string_of_selector s))) in *) + let symbol = if is_raised then + ENIAM_LCGrenderer.make_raised_symbol syntax + else ENIAM_LCGrenderer.make_symbol syntax in + {ENIAM_LCGrenderer.empty_node with + orth=orth; lemma=lemma; pos=pos; symbol=symbol; + weight=weight; id=id; attrs=List.rev attrs; args=Dot} + +let or_frame node = + (*Imp(Imp(Imp(Tensor[Atom "<root>"],Forward, + Tensor[Atom "</speaker>"]),Forward, + Imp(Tensor[Atom "ip"; Top; Top; Top],Forward,Tensor[Atom "or"])),Forward, + Tensor[Atom "or2"]),*) + (* Lambda("x",Lambda("y",Lambda("z",Node{node with gs=make_gs [] ["<root>"]; args=Tuple[ + Cut(SetAttr("AROLE",Val "Clause",SetAttr("GF",Gf CLAUSE,App(Var "y",Var "x"))))]}))) *) + VariantVar("lemma",Lambda("x",Lambda("y",Lambda("z",Node{node with args=Tuple[ + Cut(SetAttr("ARG_SYMBOL",Tuple[Val "TODO"],App(Var "y",Var "x")))]})))) + +let make_term id orth rules = + Xlist.map rules (fun (cats,syntax,(semantics,weight)) -> + ENIAM_LCGrenderer.reset_variable_names (); + ENIAM_LCGrenderer.add_variable_numbers (); + (* print_endline ("make_term 0: " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax); *) + match semantics with + BasicSem cat_list -> + let node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list false in + (* print_endline ("make_term 1: " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax); *) + let semantics = ENIAM_LCGrenderer.make_term node syntax in + ENIAM_LCGrenderer.simplify (syntax,semantics) + | RaisedSem(cat_list,outer_cat_list) -> + (* FIXME: jakie atrybuty powinien mieć outer node (w szczególności jaką wagę?) *) + let node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list true in + let outer_node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) outer_cat_list false in + (* print_endline ("make_term 2: " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax); *) + let semantics = ENIAM_LCGrenderer.make_raised_term node outer_node syntax in + ENIAM_LCGrenderer.simplify (syntax,semantics) + | TermSem(cat_list,"λxλyλz.NODE(yx,z)") -> + let node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list false in + (* print_endline ("make_term 3: " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax); *) + let semantics = or_frame node in + ENIAM_LCGrenderer.simplify (syntax,semantics) + | _ -> failwith "make_term: ni") + +let create_entries rules id orth cats valence lex_entries = + Xlist.fold cats [] (fun l cats -> + (* Printf.printf "create_entries: orth=%s lemma=%s pos=%s\n" orth cats.lemma cats.pos; *) + (* variable_name_ref := []; *) + if cats.pos="interp" && cats.lemma="<clause>" then (BracketSet(Forward),Dot) :: l else + if cats.pos="interp" && cats.lemma="</clause>" then (BracketSet(Backward),Dot) :: l else + if (cats.pos2="noun" || cats.pos2="verb" || cats.pos2="adj" || cats.pos2="adv") && cats.cat="X" && not !default_category_flag && cats.pos <> "aglt" then l else + let e = get_labels () in + (* print_endline "create_entries 1"; *) + let rules = find_rules rules cats in + let rules = prepare_lex_entries rules lex_entries cats in + (* Printf.printf "create_entries 2: %s %s |rules|=%d\n" cats.lemma cats.pos (Xlist.size rules); *) + let rules = assign_valence valence rules in + (* print_endline "create_entries 3"; *) + let rules = make_quantification e rules in + (* print_endline "create_entries 4"; *) + let rules = make_term id orth rules in + (* print_endline "create_entries 5"; *) + rules @ l) diff --git a/LCGlexicon/ENIAMcategoriesPL.ml b/LCGlexicon/ENIAMcategoriesPL.ml index de8275f..b0965cd 100644 --- a/LCGlexicon/ENIAMcategoriesPL.ml +++ b/LCGlexicon/ENIAMcategoriesPL.ml @@ -41,6 +41,8 @@ let selector_values = Xlist.fold [ Pos2, []; Cat, []; Coerced, []; + Role, []; + SNode, ["concept";"context";"dot";"relations"]; Number, all_numbers; Case, "postp" :: "pred" :: all_cases; Gender, all_genders; @@ -150,7 +152,7 @@ let num_nsem lemma = let part_set = StringSet.of_list ["się"; "nie"; "by"; "niech"; "niechaj"; "niechże"; "niechajże"; "czy"; "gdyby"] -let clarify_categories proper cat coerced = function +let clarify_categories proper cat coerced snode = function lemma,"subst",[numbers;cases;genders] -> let numbers = expand_numbers numbers in let cases = expand_cases cases in @@ -158,9 +160,9 @@ let clarify_categories proper cat coerced = function let cases,voc = split_voc cases in let nsyn,nsem = noun_type proper lemma "subst" in (if cases = [] then [] else - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ (if voc = [] then [] else - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | lemma,"subst",[numbers;cases;genders;_] -> let numbers = expand_numbers numbers in let cases = expand_cases cases in @@ -168,9 +170,9 @@ let clarify_categories proper cat coerced = function let cases,voc = split_voc cases in let nsyn,nsem = noun_type proper lemma "subst" in (if cases = [] then [] else - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ (if voc = [] then [] else - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | lemma,"depr",[numbers;cases;genders] -> let numbers = expand_numbers numbers in let cases = expand_cases cases in @@ -178,9 +180,9 @@ let clarify_categories proper cat coerced = function let cases,voc = split_voc cases in let nsyn,nsem = noun_type proper lemma "depr" in (if cases = [] then [] else - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ (if voc = [] then [] else - [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) | lemma,"ppron12",[numbers;cases;genders;persons] -> let numbers = expand_numbers numbers in let cases = expand_cases cases in @@ -326,82 +328,82 @@ let clarify_categories proper cat coerced = function let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in let genders = expand_genders genders in let pos,pos2 = if StringSet.mem adj_pronoun_lexemes lemma then "apron","pron" else "adj","adj" in - [{empty_cats with lemma=lemma; pos=pos; pos2=pos2; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; grads=grads}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *) + [{empty_cats with lemma=lemma; pos=pos; pos2=pos2; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; grads=grads}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *) | lemma,"adjc",[] -> - [{empty_cats with lemma=lemma; pos="adjc"; pos2="adj"; cat=cat; coerced=coerced; numbers=["sg"]; cases=["pred"]; genders=["m1";"m2";"m3"]; grads=["pos"]}] + [{empty_cats with lemma=lemma; pos="adjc"; pos2="adj"; cat=cat; coerced=coerced; snode=snode; numbers=["sg"]; cases=["pred"]; genders=["m1";"m2";"m3"]; grads=["pos"]}] | lemma,"adjp",[] -> - [{empty_cats with lemma=lemma; pos="adjp"; pos2="adj"; cat=cat; coerced=coerced; numbers=all_numbers; cases=["postp"]; genders=all_genders; grads=["pos"]}] - | lemma,"adja",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; pos="adja"; pos2="adja"}] - | lemma,"adv",[grads] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; pos="adv"; pos2="adv"; grads=grads; modes=adv_mode lemma}] - | lemma,"adv",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; pos="adv"; pos2="adv"; grads=["pos"]; modes=adv_mode lemma}] + [{empty_cats with lemma=lemma; pos="adjp"; pos2="adj"; cat=cat; coerced=coerced; snode=snode; numbers=all_numbers; cases=["postp"]; genders=all_genders; grads=["pos"]}] + | lemma,"adja",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; snode=snode; pos="adja"; pos2="adja"}] + | lemma,"adv",[grads] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; snode=snode; pos="adv"; pos2="adv"; grads=grads; modes=adv_mode lemma}] + | lemma,"adv",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; snode=snode; pos="adv"; pos2="adv"; grads=["pos"]; modes=adv_mode lemma}] | lemma,"ger",[numbers;cases;genders;aspects;negations] -> let numbers = expand_numbers numbers in let cases = expand_cases cases in let genders = expand_genders genders in - [{empty_cats with lemma=lemma; pos="ger"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; aspects=aspects; negations=negations}] (* FIXME: kwestia osoby przy voc *) + [{empty_cats with lemma=lemma; pos="ger"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; aspects=aspects; negations=negations}] (* FIXME: kwestia osoby przy voc *) | lemma,"pact",[numbers;cases;genders;aspects;negations] -> let numbers = expand_numbers numbers in let cases = expand_cases cases in let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in let genders = expand_genders genders in - [{empty_cats with lemma=lemma; pos="pact"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}] + [{empty_cats with lemma=lemma; pos="pact"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}] | lemma,"ppas",[numbers;cases;genders;aspects;negations] -> let numbers = expand_numbers numbers in let cases = expand_cases cases in let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in let genders = expand_genders genders in - [{empty_cats with lemma=lemma; pos="ppas"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}] + [{empty_cats with lemma=lemma; pos="ppas"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}] | lemma,"fin",[numbers;persons;aspects] -> (* FIXME: genders bez przymnogich *) let numbers = expand_numbers numbers in let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in - let cats = {empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; negations=["aff"; "neg"]; moods=["indicative"]} in + let cats = {empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; negations=["aff"; "neg"]; moods=["indicative"]} in (Xlist.map aspects (function "imperf" -> {cats with aspects=["imperf"]; tenses=["pres"]} | "perf" -> {cats with aspects=["perf"]; tenses=["fut"]} | _ -> failwith "clarify_categories")) @ (if persons2 = [] then [] else - [{empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]) + [{empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]) | lemma,"bedzie",[numbers;persons;aspects] -> let numbers = expand_numbers numbers in let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in - [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] @ + [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] @ (if persons2 = [] then [] else - [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]) + [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]) | lemma,"praet",[numbers;genders;aspects;nagl] -> let numbers = expand_numbers numbers in let genders = expand_genders genders in - [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @ + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @ (if Xlist.mem aspects "imperf" then - [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] else []) | lemma,"praet",[numbers;genders;aspects] -> let numbers = expand_numbers numbers in let genders = expand_genders genders in - [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @ + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @ (if Xlist.mem aspects "imperf" then - [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] else []) | lemma,"winien",[numbers;genders;aspects] -> let numbers = expand_numbers numbers in let genders = expand_genders genders in - [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["pres"]}; - {empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] @ + [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["pres"]}; + {empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] @ (if Xlist.mem aspects "imperf" then - [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] + [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] else []) | lemma,"impt",[numbers;persons;aspects] -> let numbers = expand_numbers numbers in - [{empty_cats with lemma=lemma; pos="impt"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}] + [{empty_cats with lemma=lemma; pos="impt"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}] | lemma,"imps",[aspects] -> - [{empty_cats with lemma=lemma; pos="imps"; pos2="verb"; cat=cat; coerced=coerced; numbers=all_numbers; genders=all_genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] + [{empty_cats with lemma=lemma; pos="imps"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=all_numbers; genders=all_genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] | lemma,"pred",[] -> (* FIXME: czy predykatyw zawsze jest niedokonany? *) - [{empty_cats with lemma=lemma; pos="pred"; pos2="verb"; cat=cat; coerced=coerced; numbers=["sg"]; genders=[(*"n2"*)"n"]; persons=["ter"]; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["pres";"past";"fut"]}] + [{empty_cats with lemma=lemma; pos="pred"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; numbers=["sg"]; genders=[(*"n2"*)"n"]; persons=["ter"]; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["pres";"past";"fut"]}] | lemma,"aglt",[numbers;persons;aspects;wok] -> let numbers = expand_numbers numbers in [{empty_cats with lemma=lemma; pos="aglt"; pos2="verb"; numbers=numbers; persons=persons; aspects=aspects}] - | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; cat=cat; coerced=coerced; aspects=aspects; negations=["aff"; "neg"]}] - | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="pcon"; pos2="verb"; cat=cat; coerced=coerced; aspects=aspects; negations=["aff"; "neg"]}] - | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="pant"; pos2="verb"; cat=cat; coerced=coerced; aspects=aspects; negations=["aff"; "neg"]}] + | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] + | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="pcon"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] + | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="pant"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] | lemma,"qub",[] -> if StringSet.mem part_set lemma then [{empty_cats with lemma=lemma; pos="part"; pos2="qub"}] else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"; cat=cat}] @@ -429,7 +431,7 @@ let clarify_categories proper cat coerced = function | _ -> [] *) let selector_names = StringSet.of_list [ - "lemma";"pos";"pos2";"cat";"coerced";"number";"case";"gender";"person";"grad"; + "lemma";"pos";"pos2";"cat";"coerced";"role";"node";"number";"case";"gender";"person";"grad"; "praep";"acm";"aspect";"negation";"mood";"tense";"nsyn";"nsem";"ctype";"mode";"psem"; "icat";"inumber";"igender";"iperson";"nperson";"ncat";"plemma"; "unumber";"ucase";"ugender";"uperson";"amode"] @@ -442,6 +444,8 @@ let string_of_selector = function | Pos2 -> "pos2" | Cat -> "cat" | Coerced -> "coerced" + | Role -> "role" + | SNode -> "node" | Number -> "number" | Case -> "case" | Gender -> "gender" @@ -483,6 +487,8 @@ let selector_of_string = function | "pos2" -> Pos2 | "cat" -> Cat | "coerced" -> Coerced + | "role" -> Role + | "node" -> SNode | "number" -> Number | "case" -> Case | "gender" -> Gender @@ -519,6 +525,8 @@ let match_selector cats = function | Pos -> [cats.pos] | Cat -> [cats.cat] | Coerced -> cats.coerced + | Role -> cats.roles + | SNode -> cats.snode | Number -> cats.numbers | Case -> cats.cases | Gender -> cats.genders @@ -556,6 +564,8 @@ let set_selector cats vals = function | Pos -> (match vals with [v] -> {cats with pos=v} | _ -> failwith "set_selector: Pos") | Cat -> (match vals with [v] -> {cats with cat=v} | _ -> failwith "set_selector: Cat") | Coerced -> {cats with coerced=vals} + | Role -> {cats with roles=vals} + | SNode -> {cats with snode=vals} | c -> failwith ("set_selector: " ^ string_of_selector c) let rec apply_selectors cats = function @@ -570,75 +580,75 @@ let rec apply_selectors cats = function apply_selectors (set_selector cats (StringSet.to_list vals) sel) l let pos_categories = Xlist.fold [ - "subst",[Lemma;Cat;Coerced;Number;Case;Gender;Person;Nsyn;Nsem;]; - "depr",[Lemma;Cat;Coerced;Number;Case;Gender;Person;Nsyn;Nsem;]; - "ppron12",[Lemma;Number;Case;Gender;Person;]; - "ppron3",[Lemma;Number;Case;Gender;Person;Praep;]; - "siebie",[Lemma;Number;Case;Gender;Person;]; - "prep",[Lemma;Cat;Coerced;Psem;Case;]; - "compar",[Lemma;Cat;Coerced;Case;]; - "num",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; - "numcomp",[Lemma]; - "intnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; - "realnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; - "intnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; - "realnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; - "symbol",[Lemma;Number;Case;Gender;Person;]; - "ordnum",[Lemma;Number;Case;Gender;Grad;]; - "date",[Lemma;Nsyn;Nsem;]; - "date-interval",[Lemma;Nsyn;Nsem;]; - "hour-minute",[Lemma;Nsyn;Nsem;]; - "hour",[Lemma;Nsyn;Nsem;]; - "hour-minute-interval",[Lemma;Nsyn;Nsem;]; - "hour-interval",[Lemma;Nsyn;Nsem;]; - "year",[Lemma;Nsyn;Nsem;]; - "year-interval",[Lemma;Nsyn;Nsem;]; - "day",[Lemma;Nsyn;Nsem;]; - "day-interval",[Lemma;Nsyn;Nsem;]; - "day-month",[Lemma;Nsyn;Nsem;]; - "day-month-interval",[Lemma;Nsyn;Nsem;]; - "month-interval",[Lemma;Nsyn;Nsem;]; - "roman-ordnum",[Lemma;Number;Case;Gender;Grad;]; - "roman",[Lemma;Nsyn;Nsem;]; - "roman-interval",[Lemma;Nsyn;Nsem;]; - "match-result",[Lemma;Nsyn;Nsem;]; - "url",[Lemma;Nsyn;Nsem;]; - "email",[Lemma;Nsyn;Nsem;]; - "phone-number",[Lemma;Nsyn;Nsem;]; - "postal-code",[Lemma;Nsyn;Nsem;]; - "obj-id",[Lemma;Nsyn;Nsem;]; - "building-number",[Lemma;Nsyn;Nsem;]; - "fixed",[Lemma;]; - "adj",[Lemma;Cat;Coerced;Number;Case;Gender;Grad;]; - "adjc",[Lemma;Cat;Coerced;Number;Case;Gender;Grad;]; - "adjp",[Lemma;Cat;Coerced;Number;Case;Gender;Grad;]; - "apron",[Lemma;Number;Case;Gender;Grad;]; - "adja",[Lemma;Cat;Coerced;]; - "adv",[Lemma;Cat;Coerced;Grad;Mode];(* ctype *) - "ger",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Case;Gender;Person;Aspect;Negation;]; - "pact",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Case;Gender;Aspect;Negation;]; - "ppas",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Case;Gender;Aspect;Negation;]; - "fin",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; - "bedzie",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; - "praet",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; - "winien",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; - "impt",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; - "imps",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; - "pred",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; - "aglt",[Lemma;Number;Person;Aspect;]; - "inf",[Lemma;(*NewLemma;*)Cat;Coerced;Aspect;Negation;]; - "pcon",[Lemma;(*NewLemma;*)Cat;Coerced;Aspect;Negation;]; - "pant",[Lemma;(*NewLemma;*)Cat;Coerced;Aspect;Negation;]; - "qub",[Lemma;Cat;]; - "part",[Lemma;]; - "comp",[Lemma;];(* ctype *) - "conj",[Lemma;];(* ctype *) - "interj",[Lemma;Cat;Coerced;]; - "sinterj",[Lemma;]; - "burk",[Lemma;]; - "interp",[Lemma;]; - "unk",[Lemma;Number;Case;Gender;Person;]; - "xxx",[Lemma;Number;Case;Gender;Person;]; - "html-tag",[Lemma;]; - "list-item",[Lemma;]; + "subst",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Person;Nsyn;Nsem;]; + "depr",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Person;Nsyn;Nsem;]; + "ppron12",[Lemma;SNode;Number;Case;Gender;Person;]; + "ppron3",[Lemma;SNode;Number;Case;Gender;Person;Praep;]; + "siebie",[Lemma;SNode;Number;Case;Gender;Person;]; + "prep",[Lemma;Cat;Coerced;Role;SNode;Psem;Case;]; + "compar",[Lemma;Cat;Coerced;Role;SNode;Case;]; + "num",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;]; + "numcomp",[Lemma;SNode]; + "intnum",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;]; + "realnum",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;]; + "intnum-interval",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;]; + "realnum-interval",[Lemma;SNode;Number;Case;Gender;Person;Acm;Nsem;]; + "symbol",[Lemma;SNode;Number;Case;Gender;Person;]; + "ordnum",[Lemma;SNode;Number;Case;Gender;Grad;]; + "date",[Lemma;SNode;Nsyn;Nsem;]; + "date-interval",[Lemma;SNode;Nsyn;Nsem;]; + "hour-minute",[Lemma;SNode;Nsyn;Nsem;]; + "hour",[Lemma;SNode;Nsyn;Nsem;]; + "hour-minute-interval",[Lemma;SNode;Nsyn;Nsem;]; + "hour-interval",[Lemma;SNode;Nsyn;Nsem;]; + "year",[Lemma;SNode;Nsyn;Nsem;]; + "year-interval",[Lemma;SNode;Nsyn;Nsem;]; + "day",[Lemma;SNode;Nsyn;Nsem;]; + "day-interval",[Lemma;SNode;Nsyn;Nsem;]; + "day-month",[Lemma;SNode;Nsyn;Nsem;]; + "day-month-interval",[Lemma;SNode;Nsyn;Nsem;]; + "month-interval",[Lemma;SNode;Nsyn;Nsem;]; + "roman-ordnum",[Lemma;SNode;Number;Case;Gender;Grad;]; + "roman",[Lemma;SNode;Nsyn;Nsem;]; + "roman-interval",[Lemma;SNode;Nsyn;Nsem;]; + "match-result",[Lemma;SNode;Nsyn;Nsem;]; + "url",[Lemma;SNode;Nsyn;Nsem;]; + "email",[Lemma;SNode;Nsyn;Nsem;]; + "phone-number",[Lemma;SNode;Nsyn;Nsem;]; + "postal-code",[Lemma;SNode;Nsyn;Nsem;]; + "obj-id",[Lemma;SNode;Nsyn;Nsem;]; + "building-number",[Lemma;SNode;Nsyn;Nsem;]; + "fixed",[Lemma;SNode;]; + "adj",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Grad;]; + "adjc",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Grad;]; + "adjp",[Lemma;Cat;Coerced;Role;SNode;Number;Case;Gender;Grad;]; + "apron",[Lemma;SNode;Number;Case;Gender;Grad;]; + "adja",[Lemma;Cat;Coerced;Role;SNode;]; + "adv",[Lemma;Cat;Coerced;Role;SNode;Grad;Mode];(* ctype *) + "ger",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Case;Gender;Person;Aspect;Negation;]; + "pact",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Case;Gender;Aspect;Negation;]; + "ppas",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Case;Gender;Aspect;Negation;]; + "fin",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; + "bedzie",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; + "praet",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; + "winien",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; + "impt",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; + "imps",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; + "pred",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; + "aglt",[Lemma;SNode;Number;Person;Aspect;]; + "inf",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; + "pcon",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; + "pant",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; + "qub",[Lemma;Cat;SNode;]; + "part",[Lemma;SNode]; + "comp",[Lemma;SNode;];(* ctype *) + "conj",[Lemma;SNode;];(* ctype *) + "interj",[Lemma;Cat;Coerced;Role;SNode;]; + "sinterj",[Lemma;SNode;]; + "burk",[Lemma;SNode;]; + "interp",[Lemma;SNode;]; + "unk",[Lemma;SNode;Number;Case;Gender;Person;]; + "xxx",[Lemma;SNode;Number;Case;Gender;Person;]; + "html-tag",[Lemma;SNode;]; + "list-item",[Lemma;SNode;]; ] StringMap.empty (fun map (k,l) -> StringMap.add map k l) diff --git a/LCGlexicon/ENIAMcategoriesPL_old.ml b/LCGlexicon/ENIAMcategoriesPL_old.ml new file mode 100644 index 0000000..de8275f --- /dev/null +++ b/LCGlexicon/ENIAMcategoriesPL_old.ml @@ -0,0 +1,644 @@ +(* + * ENIAM_LCGlexicon is a library that provides LCG lexicon form Polish + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences + * + * This library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + *) + +open ENIAM_LCGlexiconTypes +open Xstd + +let all_numbers = ["sg";"pl"] +let all_cases = ["nom";"gen";"dat";"acc";"inst";"loc";"voc"] +(* let all_genders = ["m1";"m2";"m3";"f";"n1";"n2";"p1";"p2";"p3"] *) +let all_genders = ["m1";"m2";"m3";"f";"n"] +let all_persons = ["pri";"sec";"ter"] +(* FIXME: zamiast wszystkich możliwych wartości można używać Zero gdy nie ma uzgodnienia *) + +let selector_values = Xlist.fold [ + Lemma, []; + Pos, ["subst";"depr";"ppron12";"ppron3";"siebie";"prep";"fixed";"num";"numcomp";"intnum"; + "realnum";"intnum-interval";"realnum-interval";"symbol";"ordnum"; + "date";"date-interval";"hour-minute";"hour";"hour-minute-interval"; + "hour-interval";"year";"year-interval";"day";"day-interval";"day-month"; + "day-month-interval";"month-interval";"roman";"roman-interval";"roman-ordnum"; + "match-result";"url";"email";"phone-number";"postal-code";"obj-id";"building-number";"list-item";"adj";"adjc";"adjp";"adja"; + "adv";"ger";"pact";"ppas";"fin";"bedzie";"praet";"winien";"impt"; + "imps";"pred";"aglt";"inf";"pcon";"pant";"qub";"part";"comp";"conj";"interj"; + "sinterj";"burk";"interp";"xxx";"unk";"html-tag";"apron";"compar"]; + Pos2, []; + Cat, []; + Coerced, []; + Number, all_numbers; + Case, "postp" :: "pred" :: all_cases; + Gender, all_genders; + Person, all_persons; + Grad, ["pos";"com";"sup"]; + Praep, ["praep";"npraep";"praep-npraep"]; + Acm, ["congr";"rec"]; + Ctype, ["int";"rel";"sub";"coord"]; + Mode, ["abl";"adl";"locat";"perl";"dur";"temp";"mod"]; + Aspect, ["perf";"imperf"]; + Negation, ["neg";"aff"]; + Mood, ["indicative";"imperative";"conditional"]; + Tense, ["past";"pres";"fut"]; + Nsyn, ["proper";"pronoun";"common"]; + Nsem, ["count";"time";"mass";"measure"]; + Psem, ["sem";"nosem"]; + Ucase, all_cases; +] SelectorMap.empty (fun map (selector,vals) -> SelectorMap.add map selector vals) + + +let expand_numbers numbers = + if Xlist.mem numbers "_" then all_numbers else numbers + +let expand_genders genders = + if Xlist.mem genders "_" then all_genders else genders + +let expand_cases cases = + if Xlist.mem cases "_" || Xlist.mem cases "$C" then all_cases else cases + +let expand_akcs akcs = + if Xlist.mem akcs "_" then ["akc";"nakc"] else akcs + +let split_voc cases = + Xlist.fold cases ([],[]) (fun (cases,voc) -> function + "voc" -> cases, "voc" :: voc + | s -> s :: cases, voc) + +let load_subst_data filename _ = + StringSet.of_list (File.load_lines filename) + +let subst_uncountable_lexemes = ref StringSet.empty +let subst_uncountable_lexemes2 = ref StringSet.empty +let subst_container_lexemes = ref StringSet.empty +let subst_numeral_lexemes = ref StringSet.empty +let subst_time_lexemes = ref StringSet.empty + +let subst_pronoun_lexemes = StringSet.of_list ["co"; "kto"; "cokolwiek"; "ktokolwiek"; "nic"; "nikt"; "coś"; "ktoś"; "to"] +let adj_pronoun_lexemes = StringSet.of_list ["czyj"; "jaki"; "który"; "jakiś"; "ten"; "taki"] +let compar_lexemes = StringSet.of_list ["jak"; "jako"; "niż"; "niczym"; "niby"; "co"; "zamiast"] + +(* let adj_quant_lexemes = StringSet.of_list ["każdy"; "wszelki"; "wszystek"; "żaden"; "jakiś"; "pewien"; "niektóry"; "jedyny"; "sam"] *) + +let load_adv_modes filename adv_modes = + File.fold_tab filename adv_modes (fun adv_modes -> function + [adv;mode] -> StringMap.add_inc adv_modes adv [mode] (fun l -> mode :: l) + | _ -> failwith "load_adv_modes") + +let load_num_nsems filename num_nsems = + File.fold_tab filename num_nsems (fun num_nsems -> function + lemma :: _ :: nsems :: _ -> + Xlist.fold (Xstring.split "," nsems) num_nsems (fun num_nsems nsem -> + StringMap.add_inc num_nsems lemma [nsem] (fun l -> nsem :: l)) + | _ -> failwith "load_num_nsems") + +let adv_modes = ref (StringMap.empty : string list StringMap.t) +let num_nsems = ref (StringMap.empty : string list StringMap.t) + +let initialize () = + subst_uncountable_lexemes := File.catch_no_file (load_subst_data subst_uncountable_lexemes_filename) StringSet.empty; + subst_uncountable_lexemes2 := File.catch_no_file (load_subst_data subst_uncountable_lexemes_filename2) StringSet.empty; + subst_container_lexemes := File.catch_no_file (load_subst_data subst_container_lexemes_filename) StringSet.empty; + subst_numeral_lexemes := File.catch_no_file (load_subst_data subst_numeral_lexemes_filename) StringSet.empty; + subst_time_lexemes := File.catch_no_file (load_subst_data subst_time_lexemes_filename) StringSet.empty; + adv_modes := File.catch_no_file (load_adv_modes adv_modes_filename) StringMap.empty; + num_nsems := File.catch_no_file (load_num_nsems num_nsems_filename) StringMap.empty; + () + +let noun_type proper lemma pos = + let nsyn = + if proper then "proper" else + if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then "pronoun" else + if pos = "symbol" || pos = "date" || pos = "date-interval" || pos = "hour" || pos = "hour-minute" || pos = "hour-interval" || pos = "hour-minute-interval" || + pos = "year" || pos = "year-interval" || pos = "day" || pos = "day-interval" || pos = "day-month" || pos = "day-month-interval" || + pos = "match-result" || pos = "month-interval" || pos = "roman" || pos = "roman-interval" || pos = "url" || pos = "email" || pos = "phone-number" || pos = "postal-code" || pos = "obj-id" || pos = "building-number" || pos = "date" then "proper" else + if StringSet.mem subst_pronoun_lexemes lemma then "pronoun" else + "common" in + let nsem = + if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then ["count"] else + if StringSet.mem !subst_time_lexemes lemma then ["time"] else + let l = ["count"] in + let l = if StringSet.mem !subst_uncountable_lexemes lemma || StringSet.mem !subst_uncountable_lexemes2 lemma then "mass" :: l else l in + if StringSet.mem !subst_container_lexemes lemma then "measure" :: l else l in + [nsyn],nsem + +let adv_mode lemma = + try + StringMap.find !adv_modes lemma + with Not_found -> ["mod"] + +let num_nsem lemma = + try + StringMap.find !num_nsems lemma + with Not_found -> (*try + StringMap.find !num_nsems (String.lowercase lemma) + with Not_found ->*) failwith ("num_nsem: " ^ lemma) + + +let part_set = StringSet.of_list ["się"; "nie"; "by"; "niech"; "niechaj"; "niechże"; "niechajże"; "czy"; "gdyby"] + +let clarify_categories proper cat coerced = function + lemma,"subst",[numbers;cases;genders] -> + let numbers = expand_numbers numbers in + let cases = expand_cases cases in + let genders = expand_genders genders in + let cases,voc = split_voc cases in + let nsyn,nsem = noun_type proper lemma "subst" in + (if cases = [] then [] else + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ + (if voc = [] then [] else + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) + | lemma,"subst",[numbers;cases;genders;_] -> + let numbers = expand_numbers numbers in + let cases = expand_cases cases in + let genders = expand_genders genders in + let cases,voc = split_voc cases in + let nsyn,nsem = noun_type proper lemma "subst" in + (if cases = [] then [] else + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ + (if voc = [] then [] else + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) + | lemma,"depr",[numbers;cases;genders] -> + let numbers = expand_numbers numbers in + let cases = expand_cases cases in + let genders = expand_genders genders in + let cases,voc = split_voc cases in + let nsyn,nsem = noun_type proper lemma "depr" in + (if cases = [] then [] else + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @ + (if voc = [] then [] else + [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; cat=cat; coerced=coerced; numbers=numbers; cases=voc; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}]) + | lemma,"ppron12",[numbers;cases;genders;persons] -> + let numbers = expand_numbers numbers in + let cases = expand_cases cases in + let genders = expand_genders genders in + [{empty_cats with lemma=lemma; pos="ppron12"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons}] + | lemma,"ppron12",[numbers;cases;genders;persons;akcs] -> + let numbers = expand_numbers numbers in + let cases = expand_cases cases in + let genders = expand_genders genders in + [{empty_cats with lemma=lemma; pos="ppron12"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons}] + | lemma,"ppron3",[numbers;cases;genders;persons] -> + let numbers = expand_numbers numbers in + let cases = expand_cases cases in + let genders = expand_genders genders in + [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=["praep-npraep"]}] + | lemma,"ppron3",[numbers;cases;genders;persons;akcs] -> + let numbers = expand_numbers numbers in + let cases = expand_cases cases in + let genders = expand_genders genders in + [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=["praep-npraep"]}] + | lemma,"ppron3",[numbers;cases;genders;persons;akcs;praep] -> + let numbers = expand_numbers numbers in + let cases = expand_cases cases in + let genders = expand_genders genders in + let praep = match praep with + ["praep";"npraep"] -> ["praep-npraep"] + | ["npraep";"praep"] -> ["praep-npraep"] + | _ -> praep in + [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=praep}] + | lemma,"siebie",[cases] -> (* FIXME: czy tu określać numbers genders persons? *) + let cases = expand_cases cases in + [{empty_cats with lemma=lemma; pos="siebie"; pos2="pron"; numbers=all_numbers; cases=cases; genders=all_genders; persons=["ter"]}] + | lemma,"prep",[cases;woks] -> + if StringSet.mem compar_lexemes lemma then + [{empty_cats with lemma=lemma; pos="compar"; pos2="prep"}] else + let cases = expand_cases cases in + [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; cases=cases; psem=["sem";"nosem"]}] + | lemma,"prep",[cases] -> + if StringSet.mem compar_lexemes lemma then + [{empty_cats with lemma=lemma; pos="compar"; pos2="prep"}] else + let cases = expand_cases cases in + [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; cases=cases; psem=["sem";"nosem"]}] + | lemma,"num",[numbers;cases;genders;acms] -> + let numbers = expand_numbers numbers in + let cases = expand_cases cases in + let genders = expand_genders genders in + let nsem = num_nsem lemma in + [{empty_cats with lemma=lemma; pos="num"; pos2="num"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms; nsem=nsem}] + | lemma,"num",[numbers;cases;genders;acms;_] -> + let numbers = expand_numbers numbers in + let cases = expand_cases cases in + let genders = expand_genders genders in + let nsem = num_nsem lemma in + [{empty_cats with lemma=lemma; pos="num"; pos2="num"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms; nsem=nsem}] + | lemma,"numcomp",[] -> [{empty_cats with lemma=lemma; pos="numcomp"; pos2="numcomp"}] + | lemma,"intnum",[] -> + let numbers,acms = + if lemma = "1" || lemma = "-1" then ["sg"],["congr"] else + let s = String.get lemma (String.length lemma - 1) in + ["pl"],if s = '2' || s = '3' || s = '4' then ["rec";"congr"] else ["rec"] in + [{empty_cats with lemma=lemma; pos="intnum"; pos2="num"; numbers=numbers; cases=all_cases; genders=all_genders; persons=["ter"]; acms=acms; nsem=["count"]}] + | lemma,"realnum",[] -> + [{empty_cats with lemma=lemma; pos="realnum"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}] + | lemma,"intnum-interval",[] -> + [{empty_cats with lemma=lemma; pos="intnum-interval"; pos2="num"; numbers=["pl"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec";"congr"]; nsem=["count"]}] + | lemma,"realnum-interval",[] -> + [{empty_cats with lemma=lemma; pos="realnum-interval"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}] + | lemma,"symbol",[] -> + [{empty_cats with lemma=lemma; pos="symbol"; pos2="noun"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]}] + | lemma,"ordnum",[] -> + [{empty_cats with lemma=lemma; pos="ordnum"; pos2="adj"; numbers=all_numbers; cases=all_cases; genders=all_genders; grads=["pos"]}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *) + | lemma,"date",[] -> + let nsyn,nsem = noun_type proper lemma "date" in + [{empty_cats with lemma=lemma; pos="date"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"date-interval",[] -> + let nsyn,nsem = noun_type proper lemma "date-interval" in + [{empty_cats with lemma=lemma; pos="date-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"hour-minute",[] -> + let nsyn,nsem = noun_type proper lemma "hour-minute" in + [{empty_cats with lemma=lemma; pos="hour-minute"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"hour",[] -> + let nsyn,nsem = noun_type proper lemma "hour" in + [{empty_cats with lemma=lemma; pos="hour"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"hour-minute-interval",[] -> + let nsyn,nsem = noun_type proper lemma "hour-minute-interval" in + [{empty_cats with lemma=lemma; pos="hour-minute-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"hour-interval",[] -> + let nsyn,nsem = noun_type proper lemma "hour-interval" in + [{empty_cats with lemma=lemma; pos="hour-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"year",[] -> + let nsyn,nsem = noun_type proper lemma "year" in + [{empty_cats with lemma=lemma; pos="year"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"year-interval",[] -> + let nsyn,nsem = noun_type proper lemma "year-interval" in + [{empty_cats with lemma=lemma; pos="year-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"day",[] -> + let nsyn,nsem = noun_type proper lemma "day" in + [{empty_cats with lemma=lemma; pos="day"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"day-interval",[] -> + let nsyn,nsem = noun_type proper lemma "day-interval" in + [{empty_cats with lemma=lemma; pos="day-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"day-month",[] -> + let nsyn,nsem = noun_type proper lemma "day-month" in + [{empty_cats with lemma=lemma; pos="day-month"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"day-month-interval",[] -> + let nsyn,nsem = noun_type proper lemma "day-month-interval" in + [{empty_cats with lemma=lemma; pos="day-month-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"month-interval",[] -> + let nsyn,nsem = noun_type proper lemma "month-interval" in + [{empty_cats with lemma=lemma; pos="month-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"roman",[] -> + let nsyn,nsem = noun_type proper lemma "roman" in + [{empty_cats with lemma=lemma; pos="roman-ordnum"; pos2="adj"; numbers=all_numbers; cases=all_cases; genders=all_genders; grads=["pos"]}; + {empty_cats with lemma=lemma; pos="roman"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"roman-interval",[] -> + let nsyn,nsem = noun_type proper lemma "roman-interval" in + [{empty_cats with lemma=lemma; pos="roman-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"match-result",[] -> + let nsyn,nsem = noun_type proper lemma "match-result" in + [{empty_cats with lemma=lemma; pos="match-result"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"url",[] -> + let nsyn,nsem = noun_type proper lemma "url" in + [{empty_cats with lemma=lemma; pos="url"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"email",[] -> + let nsyn,nsem = noun_type proper lemma "email" in + [{empty_cats with lemma=lemma; pos="email"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"phone-number",[] -> + let nsyn,nsem = noun_type proper lemma "phone-number" in + [{empty_cats with lemma=lemma; pos="phone-number"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"postal-code",[] -> + let nsyn,nsem = noun_type proper lemma "postal-code" in + [{empty_cats with lemma=lemma; pos="postal-code"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"obj-id",[] -> + let nsyn,nsem = noun_type proper lemma "obj-id" in + [{empty_cats with lemma=lemma; pos="obj-id"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"building-number",[] -> + let nsyn,nsem = noun_type proper lemma "building-number" in + [{empty_cats with lemma=lemma; pos="building-number"; pos2="symbol"; nsyn=nsyn; nsem=nsem}] + | lemma,"fixed",[] -> [{empty_cats with lemma=lemma; pos="fixed"; pos2="fixed"}] + | lemma,"adj",[numbers;cases;genders;grads] -> (* FIXME: adjsyn *) + let numbers = expand_numbers numbers in + let cases = expand_cases cases in + let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in + let genders = expand_genders genders in + let pos,pos2 = if StringSet.mem adj_pronoun_lexemes lemma then "apron","pron" else "adj","adj" in + [{empty_cats with lemma=lemma; pos=pos; pos2=pos2; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; grads=grads}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *) + | lemma,"adjc",[] -> + [{empty_cats with lemma=lemma; pos="adjc"; pos2="adj"; cat=cat; coerced=coerced; numbers=["sg"]; cases=["pred"]; genders=["m1";"m2";"m3"]; grads=["pos"]}] + | lemma,"adjp",[] -> + [{empty_cats with lemma=lemma; pos="adjp"; pos2="adj"; cat=cat; coerced=coerced; numbers=all_numbers; cases=["postp"]; genders=all_genders; grads=["pos"]}] + | lemma,"adja",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; pos="adja"; pos2="adja"}] + | lemma,"adv",[grads] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; pos="adv"; pos2="adv"; grads=grads; modes=adv_mode lemma}] + | lemma,"adv",[] -> [{empty_cats with lemma=lemma; cat=cat; coerced=coerced; pos="adv"; pos2="adv"; grads=["pos"]; modes=adv_mode lemma}] + | lemma,"ger",[numbers;cases;genders;aspects;negations] -> + let numbers = expand_numbers numbers in + let cases = expand_cases cases in + let genders = expand_genders genders in + [{empty_cats with lemma=lemma; pos="ger"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; aspects=aspects; negations=negations}] (* FIXME: kwestia osoby przy voc *) + | lemma,"pact",[numbers;cases;genders;aspects;negations] -> + let numbers = expand_numbers numbers in + let cases = expand_cases cases in + let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in + let genders = expand_genders genders in + [{empty_cats with lemma=lemma; pos="pact"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}] + | lemma,"ppas",[numbers;cases;genders;aspects;negations] -> + let numbers = expand_numbers numbers in + let cases = expand_cases cases in + let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in + let genders = expand_genders genders in + [{empty_cats with lemma=lemma; pos="ppas"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}] + | lemma,"fin",[numbers;persons;aspects] -> (* FIXME: genders bez przymnogich *) + let numbers = expand_numbers numbers in + let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in + let cats = {empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; negations=["aff"; "neg"]; moods=["indicative"]} in + (Xlist.map aspects (function + "imperf" -> {cats with aspects=["imperf"]; tenses=["pres"]} + | "perf" -> {cats with aspects=["perf"]; tenses=["fut"]} + | _ -> failwith "clarify_categories")) @ + (if persons2 = [] then [] else + [{empty_cats with lemma=lemma; pos="fin"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]) + | lemma,"bedzie",[numbers;persons;aspects] -> + let numbers = expand_numbers numbers in + let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in + [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] @ + (if persons2 = [] then [] else + [{empty_cats with lemma=lemma; pos="bedzie"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]) + | lemma,"praet",[numbers;genders;aspects;nagl] -> + let numbers = expand_numbers numbers in + let genders = expand_genders genders in + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @ + (if Xlist.mem aspects "imperf" then + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] + else []) + | lemma,"praet",[numbers;genders;aspects] -> + let numbers = expand_numbers numbers in + let genders = expand_genders genders in + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @ + (if Xlist.mem aspects "imperf" then + [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] + else []) + | lemma,"winien",[numbers;genders;aspects] -> + let numbers = expand_numbers numbers in + let genders = expand_genders genders in + [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["pres"]}; + {empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] @ + (if Xlist.mem aspects "imperf" then + [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] + else []) + | lemma,"impt",[numbers;persons;aspects] -> + let numbers = expand_numbers numbers in + [{empty_cats with lemma=lemma; pos="impt"; pos2="verb"; cat=cat; coerced=coerced; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}] + | lemma,"imps",[aspects] -> + [{empty_cats with lemma=lemma; pos="imps"; pos2="verb"; cat=cat; coerced=coerced; numbers=all_numbers; genders=all_genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] + | lemma,"pred",[] -> (* FIXME: czy predykatyw zawsze jest niedokonany? *) + [{empty_cats with lemma=lemma; pos="pred"; pos2="verb"; cat=cat; coerced=coerced; numbers=["sg"]; genders=[(*"n2"*)"n"]; persons=["ter"]; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["pres";"past";"fut"]}] + | lemma,"aglt",[numbers;persons;aspects;wok] -> + let numbers = expand_numbers numbers in + [{empty_cats with lemma=lemma; pos="aglt"; pos2="verb"; numbers=numbers; persons=persons; aspects=aspects}] + | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; cat=cat; coerced=coerced; aspects=aspects; negations=["aff"; "neg"]}] + | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="pcon"; pos2="verb"; cat=cat; coerced=coerced; aspects=aspects; negations=["aff"; "neg"]}] + | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="pant"; pos2="verb"; cat=cat; coerced=coerced; aspects=aspects; negations=["aff"; "neg"]}] + | lemma,"qub",[] -> + if StringSet.mem part_set lemma then [{empty_cats with lemma=lemma; pos="part"; pos2="qub"}] + else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"; cat=cat}] + | lemma,"comp",[] -> [{empty_cats with lemma=lemma; pos="comp"; pos2="comp"}] + | lemma,"conj",[] -> [{empty_cats with lemma=lemma; pos="conj"; pos2="conj"}] + | lemma,"interj",[] -> [{empty_cats with lemma=lemma; pos="interj"; pos2="interj"; cat=cat; coerced=coerced}] + | lemma,"sinterj",[] -> [{empty_cats with lemma=lemma; pos="sinterj"; pos2="sinterj"; (*cat=cat; coerced=coerced*)}] + | lemma,"burk",[] -> [{empty_cats with lemma=lemma; pos="burk"; pos2="burk"}] + | ",","interp",[] -> [{empty_cats with lemma=","; pos="conj"; pos2="conj"}] + | lemma,"interp",[] -> [{empty_cats with lemma=lemma; pos="interp"; pos2="interp"}] + | lemma,"unk",[] -> + [{empty_cats with lemma=lemma; pos="unk"; pos2="noun"; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}] + | lemma,"xxx",[] -> + [{empty_cats with lemma=lemma; pos="xxx"; pos2="noun"; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}] + | lemma,"html-tag",[] -> [{empty_cats with lemma=lemma; pos="html-tag"; pos2="html-tag"}] + | lemma,"list-item",[] -> [{empty_cats with lemma=lemma; pos="list-item"; pos2="list-item"}] + | lemma,c,l -> failwith ("clarify_categories: " ^ lemma ^ ":" ^ c ^ ":" ^ (String.concat ":" (Xlist.map l (String.concat ".")))) + +(* FIXME: przenieść gdzieś indziej *) +(* let assign token = + match token.ENIAMtokenizerTypes.token with + ENIAMtokenizerTypes.Lemma(lemma,pos,interp) -> List.flatten (Xlist.map interp (fun interp -> clarify_categories false (lemma,pos,interp))) + | ENIAMtokenizerTypes.Proper(lemma,pos,interp,_) -> List.flatten (Xlist.map interp (fun interp -> clarify_categories true (lemma,pos,interp))) + | ENIAMtokenizerTypes.Interp lemma -> clarify_categories false (lemma,"interp",[]) + | _ -> [] *) + +let selector_names = StringSet.of_list [ + "lemma";"pos";"pos2";"cat";"coerced";"number";"case";"gender";"person";"grad"; + "praep";"acm";"aspect";"negation";"mood";"tense";"nsyn";"nsem";"ctype";"mode";"psem"; + "icat";"inumber";"igender";"iperson";"nperson";"ncat";"plemma"; + "unumber";"ucase";"ugender";"uperson";"amode"] + + +let string_of_selector = function + Lemma -> "lemma" + (* | NewLemma -> "newlemma" *) + | Pos -> "pos" + | Pos2 -> "pos2" + | Cat -> "cat" + | Coerced -> "coerced" + | Number -> "number" + | Case -> "case" + | Gender -> "gender" + | Person -> "person" + | Grad -> "grad" + | Praep -> "praep" + | Acm -> "acm" + | Aspect -> "aspect" + | Negation -> "negation" + | Mood -> "mood" + | Tense -> "tense" + | Nsyn -> "nsyn" + | Nsem -> "nsem" + | Ctype -> "ctype" + | Mode -> "mode" + | Psem -> "psem" + | Icat -> "icat" + | Inumber -> "inumber" + | Igender -> "igender" + | Iperson -> "iperson" + | Nperson -> "nperson" + | Ncat -> "ncat" + | Plemma -> "plemma" + | Unumber -> "unumber" + | Ucase -> "ucase" + | Ugender -> "ugender" + | Uperson -> "uperson" + | Amode -> "amode" + +let string_of_selectors selectors = + String.concat ", " (Xlist.map selectors (fun (cat,rel,l) -> + let rel = if rel = Eq then "=" else "!=" in + string_of_selector cat ^ rel ^ (String.concat "|" l))) + +let selector_of_string = function + "lemma" -> Lemma + (* | NewLemma -> "newlemma" *) + | "pos" -> Pos + | "pos2" -> Pos2 + | "cat" -> Cat + | "coerced" -> Coerced + | "number" -> Number + | "case" -> Case + | "gender" -> Gender + | "person" -> Person + | "grad" -> Grad + | "praep" -> Praep + | "acm" -> Acm + | "aspect" -> Aspect + | "negation" -> Negation + | "mood" -> Mood + | "tense" -> Tense + | "nsyn" -> Nsyn + | "nsem" -> Nsem + | "ctype" -> Ctype + | "mode" -> Mode + | "psem" -> Psem + | "icat" -> Icat + | "inumber" -> Inumber + | "igender" -> Igender + | "iperson" -> Iperson + | "nperson" -> Nperson + | "ncat" -> Ncat + | "plemma" -> Plemma + | "unumber" -> Unumber + | "ucase" -> Ucase + | "ugender" -> Ugender + | "uperson" -> Uperson + | "amode" -> Amode + | s -> failwith ("selector_of_string: " ^ s) + +let match_selector cats = function + Lemma -> [cats.lemma] +(* | NewLemma -> [] *) + | Pos -> [cats.pos] + | Cat -> [cats.cat] + | Coerced -> cats.coerced + | Number -> cats.numbers + | Case -> cats.cases + | Gender -> cats.genders + | Person -> cats.persons + | Grad -> cats.grads + | Praep -> cats.praeps + | Acm -> cats.acms + | Aspect -> cats.aspects + | Negation -> cats.negations + | Mood -> cats.moods + | Tense -> cats.tenses + | Nsyn -> cats.nsyn + | Nsem -> cats.nsem + | Mode -> cats.modes + | Psem -> cats.psem + | c -> failwith ("match_selector: " ^ string_of_selector c) + +let set_selector cats vals = function + Number -> {cats with numbers=vals} + | Case -> {cats with cases=vals} + | Gender -> {cats with genders=vals} + | Person -> {cats with persons=vals} + | Grad -> {cats with grads=vals} + | Praep -> {cats with praeps=vals} + | Acm -> {cats with acms=vals} + | Aspect -> {cats with aspects=vals} + | Negation -> {cats with negations=vals} + | Mood -> {cats with moods=vals} + | Tense -> {cats with tenses=vals} + | Nsyn -> {cats with nsyn=vals} + | Nsem -> {cats with nsem=vals} + | Mode -> {cats with modes=vals} + | Psem -> {cats with psem=vals} + | Lemma -> (match vals with [v] -> {cats with lemma=v} | _ -> failwith "set_selector: Lemma") + | Pos -> (match vals with [v] -> {cats with pos=v} | _ -> failwith "set_selector: Pos") + | Cat -> (match vals with [v] -> {cats with cat=v} | _ -> failwith "set_selector: Cat") + | Coerced -> {cats with coerced=vals} + | c -> failwith ("set_selector: " ^ string_of_selector c) + +let rec apply_selectors cats = function + [] -> cats + | (sel,Eq,vals) :: l -> + let vals = StringSet.intersection (StringSet.of_list (match_selector cats sel)) (StringSet.of_list vals) in + if StringSet.is_empty vals then raise Not_found else + apply_selectors (set_selector cats (StringSet.to_list vals) sel) l + | (sel,Neq,vals) :: l -> + let vals = StringSet.difference (StringSet.of_list (match_selector cats sel)) (StringSet.of_list vals) in + if StringSet.is_empty vals then raise Not_found else + apply_selectors (set_selector cats (StringSet.to_list vals) sel) l + +let pos_categories = Xlist.fold [ + "subst",[Lemma;Cat;Coerced;Number;Case;Gender;Person;Nsyn;Nsem;]; + "depr",[Lemma;Cat;Coerced;Number;Case;Gender;Person;Nsyn;Nsem;]; + "ppron12",[Lemma;Number;Case;Gender;Person;]; + "ppron3",[Lemma;Number;Case;Gender;Person;Praep;]; + "siebie",[Lemma;Number;Case;Gender;Person;]; + "prep",[Lemma;Cat;Coerced;Psem;Case;]; + "compar",[Lemma;Cat;Coerced;Case;]; + "num",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; + "numcomp",[Lemma]; + "intnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; + "realnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; + "intnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; + "realnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; + "symbol",[Lemma;Number;Case;Gender;Person;]; + "ordnum",[Lemma;Number;Case;Gender;Grad;]; + "date",[Lemma;Nsyn;Nsem;]; + "date-interval",[Lemma;Nsyn;Nsem;]; + "hour-minute",[Lemma;Nsyn;Nsem;]; + "hour",[Lemma;Nsyn;Nsem;]; + "hour-minute-interval",[Lemma;Nsyn;Nsem;]; + "hour-interval",[Lemma;Nsyn;Nsem;]; + "year",[Lemma;Nsyn;Nsem;]; + "year-interval",[Lemma;Nsyn;Nsem;]; + "day",[Lemma;Nsyn;Nsem;]; + "day-interval",[Lemma;Nsyn;Nsem;]; + "day-month",[Lemma;Nsyn;Nsem;]; + "day-month-interval",[Lemma;Nsyn;Nsem;]; + "month-interval",[Lemma;Nsyn;Nsem;]; + "roman-ordnum",[Lemma;Number;Case;Gender;Grad;]; + "roman",[Lemma;Nsyn;Nsem;]; + "roman-interval",[Lemma;Nsyn;Nsem;]; + "match-result",[Lemma;Nsyn;Nsem;]; + "url",[Lemma;Nsyn;Nsem;]; + "email",[Lemma;Nsyn;Nsem;]; + "phone-number",[Lemma;Nsyn;Nsem;]; + "postal-code",[Lemma;Nsyn;Nsem;]; + "obj-id",[Lemma;Nsyn;Nsem;]; + "building-number",[Lemma;Nsyn;Nsem;]; + "fixed",[Lemma;]; + "adj",[Lemma;Cat;Coerced;Number;Case;Gender;Grad;]; + "adjc",[Lemma;Cat;Coerced;Number;Case;Gender;Grad;]; + "adjp",[Lemma;Cat;Coerced;Number;Case;Gender;Grad;]; + "apron",[Lemma;Number;Case;Gender;Grad;]; + "adja",[Lemma;Cat;Coerced;]; + "adv",[Lemma;Cat;Coerced;Grad;Mode];(* ctype *) + "ger",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Case;Gender;Person;Aspect;Negation;]; + "pact",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Case;Gender;Aspect;Negation;]; + "ppas",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Case;Gender;Aspect;Negation;]; + "fin",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; + "bedzie",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; + "praet",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; + "winien",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; + "impt",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; + "imps",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; + "pred",[Lemma;(*NewLemma;*)Cat;Coerced;Number;Gender;Person;Aspect;Negation;Mood;Tense;]; + "aglt",[Lemma;Number;Person;Aspect;]; + "inf",[Lemma;(*NewLemma;*)Cat;Coerced;Aspect;Negation;]; + "pcon",[Lemma;(*NewLemma;*)Cat;Coerced;Aspect;Negation;]; + "pant",[Lemma;(*NewLemma;*)Cat;Coerced;Aspect;Negation;]; + "qub",[Lemma;Cat;]; + "part",[Lemma;]; + "comp",[Lemma;];(* ctype *) + "conj",[Lemma;];(* ctype *) + "interj",[Lemma;Cat;Coerced;]; + "sinterj",[Lemma;]; + "burk",[Lemma;]; + "interp",[Lemma;]; + "unk",[Lemma;Number;Case;Gender;Person;]; + "xxx",[Lemma;Number;Case;Gender;Person;]; + "html-tag",[Lemma;]; + "list-item",[Lemma;]; + ] StringMap.empty (fun map (k,l) -> StringMap.add map k l) diff --git a/exec/ENIAMexec.ml b/exec/ENIAMexec.ml index c39b53c..692eab8 100644 --- a/exec/ENIAMexec.ml +++ b/exec/ENIAMexec.ml @@ -63,16 +63,19 @@ let rec translate_text = function | ENIAMsubsyntaxTypes.AltText l -> AltText(Xlist.map l (fun (mode,text) -> translate_mode mode, translate_text text)) -let clarify_categories cats token = +let clarify_categories cats has_context token = + let snode = match has_context with + false -> ["concept"] + | true -> ["context"] in match token.ENIAMtokenizerTypes.token with ENIAMtokenizerTypes.Lemma(lemma,pos,interp) -> List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,coerced) -> (* Printf.printf "lemma=%s pos=%s cat=%s coerced=%s\n%!" lemma pos cat (String.concat "," coerced); *) - ENIAMcategoriesPL.clarify_categories false cat coerced (lemma,pos,interp))))) + ENIAMcategoriesPL.clarify_categories false cat coerced snode (lemma,pos,interp))))) | ENIAMtokenizerTypes.Proper(lemma,pos,interp,senses2) -> - List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,coerced) -> ENIAMcategoriesPL.clarify_categories true cat coerced (lemma,pos,interp))))) + List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,coerced) -> ENIAMcategoriesPL.clarify_categories true cat coerced snode (lemma,pos,interp))))) | ENIAMtokenizerTypes.Interp lemma -> - List.flatten (Xlist.map cats (fun (cat,coerced) -> ENIAMcategoriesPL.clarify_categories false cat coerced (lemma,"interp",[]))) + List.flatten (Xlist.map cats (fun (cat,coerced) -> ENIAMcategoriesPL.clarify_categories false cat coerced snode (lemma,"interp",[]))) | _ -> [] let create_chart rules tokens lex_sems paths last = @@ -84,8 +87,8 @@ let create_chart rules tokens lex_sems paths last = ENIAM_LCGrenderer.reset_variable_names (); ENIAM_LCGrenderer.add_variable_numbers (); (* if s.ENIAMlexSemanticsTypes.schemata = [] then failwith ("create_chart: no schema for token=" ^ t.ENIAMtokenizerTypes.orth ^ " lemma=" ^ ENIAMtokens.get_lemma t.ENIAMtokenizerTypes.token) else *) - Xlist.fold s.ENIAMlexSemanticsTypes.schemata chart (fun chart (selectors,cats,schema) -> - let cats = clarify_categories cats t in + Xlist.fold s.ENIAMlexSemanticsTypes.schemata chart (fun chart (selectors,cats,has_context,schema) -> + let cats = clarify_categories cats has_context t in (* let chart = ENIAM_LCGchart.add_inc_list chart lnode rnode s.ENIAMlexSemanticsTypes.lex_entries 0 in *) let l = ENIAM_LCGlexicon.create_entries rules id t.ENIAMtokenizerTypes.orth cats [selectors,schema] s.ENIAMlexSemanticsTypes.lex_entries in ENIAM_LCGchart.add_inc_list chart lnode rnode l 0)) in @@ -115,7 +118,7 @@ let create_dep_chart dep_rules tokens lex_sems paths = ENIAM_LCGrenderer.reset_variable_names (); ENIAM_LCGrenderer.add_variable_numbers (); let cats = clarify_categories ["X",["X"]] t in - let schemata = Xlist.map s.ENIAMlexSemanticsTypes.schemata (fun (selectors,_,schema) -> selectors,schema) in + let schemata = Xlist.map s.ENIAMlexSemanticsTypes.schemata (fun (selectors,_,_,schema) -> selectors,schema) in let l = ENIAM_LCGlexicon.create_entries dep_rules id t.ENIAMtokenizerTypes.orth cats schemata s.ENIAMlexSemanticsTypes.lex_entries in IntMap.add nodes i l) in (* print_endline "create_dep_chart 3"; *) diff --git a/lexSemantics/ENIAMadjuncts.ml b/lexSemantics/ENIAMadjuncts.ml index f3253a2..89b2ef5 100644 --- a/lexSemantics/ENIAMadjuncts.ml +++ b/lexSemantics/ENIAMadjuncts.ml @@ -253,19 +253,19 @@ let simplify_schemata lexemes pos pos2 lemma schemata = "{" ^ String.concat ";" (PhraseSet.fold morfs [] (fun l m -> ENIAMwalStringOf.phrase m :: l)) ^ "}")))); *) schemata -let add_adjuncts preps compreps compars pos2 (selectors,cat,schema) = +let add_adjuncts preps compreps compars pos2 (selectors,cat,has_context,schema) = let compreps = Xlist.rev_map compreps ENIAMwalRenderer.render_comprep in let prepnps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_prepnp prep cases) in let prepadjps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_prepadjp prep cases) in let compars = Xlist.rev_map compars ENIAMwalRenderer.render_compar in match pos2 with - "verb" -> [selectors,cat,schema @ ENIAMwalRenderer.verb_adjuncts_simp @ prepnps @ prepadjps @ compreps @ compars] + "verb" -> [selectors,cat,has_context,schema @ ENIAMwalRenderer.verb_adjuncts_simp @ prepnps @ prepadjps @ compreps @ compars] | "noun" -> [ - [Nsyn,Eq,["proper"]] @ selectors,cat,ENIAMwalRenderer.proper_noun_adjuncts_simp @ prepnps @ compreps @ compars; - [Nsyn,Eq,["common"];Nsem,Eq,["measure"]] @ selectors,cat,ENIAMwalRenderer.measure_noun_adjuncts_simp @ prepnps @ compreps @ compars; - [Nsyn,Eq,["common"];Nsem,Neq,["measure"]] @ selectors,cat,ENIAMwalRenderer.common_noun_adjuncts_simp @ prepnps @ compreps @ compars] - | "adj" -> [selectors,cat,schema @ ENIAMwalRenderer.adj_adjuncts_simp @ compars] - | "adv" -> [selectors,cat,schema @ ENIAMwalRenderer.adv_adjuncts_simp @ compars] + [Nsyn,Eq,["proper"]] @ selectors,cat,has_context,ENIAMwalRenderer.proper_noun_adjuncts_simp @ prepnps @ compreps @ compars; + [Nsyn,Eq,["common"];Nsem,Eq,["measure"]] @ selectors,cat,has_context,ENIAMwalRenderer.measure_noun_adjuncts_simp @ prepnps @ compreps @ compars; + [Nsyn,Eq,["common"];Nsem,Neq,["measure"]] @ selectors,cat,has_context,ENIAMwalRenderer.common_noun_adjuncts_simp @ prepnps @ compreps @ compars] + | "adj" -> [selectors,cat,has_context,schema @ ENIAMwalRenderer.adj_adjuncts_simp @ compars] + | "adv" -> [selectors,cat,has_context,schema @ ENIAMwalRenderer.adv_adjuncts_simp @ compars] | _ -> [] open ENIAMlexSemanticsTypes diff --git a/lexSemantics/ENIAMlexSemantics.ml b/lexSemantics/ENIAMlexSemantics.ml index 21aca57..03d9b1e 100644 --- a/lexSemantics/ENIAMlexSemantics.ml +++ b/lexSemantics/ENIAMlexSemantics.ml @@ -303,9 +303,9 @@ let assign_valence tokens lex_sems group = let schemata = ENIAMadjuncts.simplify_schemata lexemes pos pos2 lemma schemata1 in (* Printf.printf "C %s |schemata|=%d\n" lemma (Xlist.size schemata); *) let schemata = Xlist.rev_map schemata (fun (selectors,schema) -> - selectors,["X",["X"]],ENIAMwalRenderer.render_simple_schema schema) in + selectors,["X",["X"]],false,ENIAMwalRenderer.render_simple_schema schema) in let schemata = List.flatten (Xlist.rev_map schemata (ENIAMadjuncts.add_adjuncts preps compreps compars pos2)) in - let schemata = if schemata = [] then [[],["X",["X"]],[]] else schemata in + let schemata = if schemata = [] then [[],["X",["X"]],false,[]] else schemata in (* Printf.printf "D %s |schemata|=%d\n" lemma (Xlist.size schemata); *) let entries = List.flatten (Xlist.rev_map entries (ENIAMvalence.transform_lex_entry pos lemma)) in let entries = Xlist.map entries (fun (selectors,entry) -> diff --git a/lexSemantics/ENIAMlexSemanticsHTMLof.ml b/lexSemantics/ENIAMlexSemanticsHTMLof.ml index 8f15878..6aa902d 100644 --- a/lexSemantics/ENIAMlexSemanticsHTMLof.ml +++ b/lexSemantics/ENIAMlexSemanticsHTMLof.ml @@ -60,8 +60,9 @@ let html_of_lex_sems tokens lex_sems = let core = Printf.sprintf "%3d %s %s" id orth lemma in let lex_entries = Xlist.map t.lex_entries (fun (selectors,s) -> "  [" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] " ^ ENIAM_LCGstringOf.grammar_symbol 0 s) in - let schemata = Xlist.map t.schemata (fun (selectors,cat,l) -> + let schemata = Xlist.map t.schemata (fun (selectors,cat,has_context,l) -> "  [" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "]" ^ + (if has_context then "T" else "F") ^ String.concat "," (Xlist.map cat (fun (m,l) -> m ^ "[" ^ String.concat "," l ^ "]")) ^ " {" ^ String.concat ", " (Xlist.map l (fun (d,s) -> ENIAM_LCGstringOf.direction d ^ ENIAM_LCGstringOf.grammar_symbol 0 s)) ^ "}") in diff --git a/lexSemantics/ENIAMlexSemanticsStringOf.ml b/lexSemantics/ENIAMlexSemanticsStringOf.ml index 39caa25..8688c3e 100644 --- a/lexSemantics/ENIAMlexSemanticsStringOf.ml +++ b/lexSemantics/ENIAMlexSemanticsStringOf.ml @@ -40,8 +40,9 @@ let string_of_lex_sems tokens lex_sems = let core = Printf.sprintf "%3d %s %s" id orth lemma in let lex_entries = Xlist.map t.lex_entries (fun (selectors,s) -> "&[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] " ^ ENIAM_LCGstringOf.grammar_symbol 0 s) in - let schemata = Xlist.map t.schemata (fun (selectors,cat,l) -> + let schemata = Xlist.map t.schemata (fun (selectors,cat,has_context,l) -> "[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "]" ^ + (if has_context then "T" else "F") ^ String.concat "," (Xlist.map cat (fun (m,l) -> m ^ "[" ^ String.concat "," l ^ "]")) ^ " {" ^ String.concat "," (Xlist.map l (fun (d,s) -> ENIAM_LCGstringOf.direction d ^ ENIAM_LCGstringOf.grammar_symbol 0 s)) ^ "}") in diff --git a/lexSemantics/ENIAMlexSemanticsTypes.ml b/lexSemantics/ENIAMlexSemanticsTypes.ml index 23d76a9..171d02d 100644 --- a/lexSemantics/ENIAMlexSemanticsTypes.ml +++ b/lexSemantics/ENIAMlexSemanticsTypes.ml @@ -42,6 +42,7 @@ let empty_frame = {selectors=[]; senses=[]; cats=["X",["X"]]; positions=[]; arol type lex_sem = { schemata: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * (string * string list) list * (* sensy *) + bool * (* has_context *) (ENIAM_LCGtypes.direction * ENIAM_LCGtypes.grammar_symbol) list) list; lex_entries: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * ENIAM_LCGtypes.grammar_symbol) list; diff --git a/lexSemantics/ENIAMwalRenderer.ml b/lexSemantics/ENIAMwalRenderer.ml index a1a2650..e1fe8d6 100644 --- a/lexSemantics/ENIAMwalRenderer.ml +++ b/lexSemantics/ENIAMwalRenderer.ml @@ -177,60 +177,60 @@ let render_phrase = function | E (PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top] | phrase -> failwith ("render_phrase: " ^ ENIAMwalStringOf.phrase phrase) -let render_phrase_cat cat = function - NP(Case case) -> Tensor[Atom "np"; Atom cat; Top; Atom case; Top; Top] - | NP NomAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] - | NP VocAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"] -(* | NP GenAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"] - | NP AllAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]*) - | NP CaseAgr -> Tensor[Atom "np"; Atom cat; Top; AVar "case"; Top; Top] - | NP CaseUndef -> Tensor[Atom "np"; Atom cat; Top; Top; Top; Top] - | PrepNP(Psem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] - | PrepNP(Psem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] - | PrepNP(Psem,"_",Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Atom case] - | PrepNP(Psem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Top] - | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case] - | PrepNP(Pnosem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] - | PrepNP(Pnosem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] - | PrepNP(Pnosem,"_",Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Atom case] - | PrepNP(Pnosem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Top] - | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case] - | AdjP(Case case) -> Tensor[Atom "adjp"; Atom cat; Top; Atom case; Top] - | AdjP NomAgr -> Tensor[Atom "adjp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"] - | AdjP AllAgr -> Tensor[Atom "adjp"; Atom cat; AVar "number"; AVar "case"; AVar "gender"] -(* | AdjP CaseAgr -> Tensor[Atom "adjp"; Top; AVar "case"; Top] - | PrepAdjP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]*) - | PrepAdjP(prep,Case case) -> Tensor[Atom "prepadjp"; Atom cat; Atom prep; Atom case] +let render_phrase_cat cat role = function + NP(Case case) -> Tensor[Atom "np"; Top; Atom case; Top; Top; Atom cat; Atom role; Top] + | NP NomAgr -> Tensor[Atom "np"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Atom cat; Atom role; Top] + | NP VocAgr -> Tensor[Atom "np"; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; Atom cat; Atom role; Top] +(* | NP GenAgr -> Tensor[Atom "np"; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"; Atom cat; Atom role; Top] + | NP AllAgr -> Tensor[Atom "np"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"; Atom cat; Atom role; Top]*) + | NP CaseAgr -> Tensor[Atom "np"; Top; AVar "case"; Top; Top; Atom cat; Atom role; Top] + | NP CaseUndef -> Tensor[Atom "np"; Top; Top; Top; Top; Atom cat; Atom role; Top] + | PrepNP(Psem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom "sem"; Top; Top; Atom cat; Atom role; Top] + | PrepNP(Psem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom "sem"; Top; Top; Atom cat; Atom role; Top] + | PrepNP(Psem,"_",Case case) -> Tensor[Atom "prepnp"; Atom "sem"; Top; Atom case; Atom cat; Atom role; Top] + | PrepNP(Psem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Top; Atom cat; Atom role; Top] + | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Atom case; Atom cat; Atom role; Top] + | PrepNP(Pnosem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom "nosem"; Top; Top; Atom cat; Atom role; Top] + | PrepNP(Pnosem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom "nosem"; Top; Top; Atom cat; Atom role; Top] + | PrepNP(Pnosem,"_",Case case) -> Tensor[Atom "prepnp"; Atom "nosem"; Top; Atom case; Atom cat; Atom role; Top] + | PrepNP(Pnosem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Top; Atom cat; Atom role; Top] + | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case; Atom cat; Atom role; Top] + | AdjP(Case case) -> Tensor[Atom "adjp"; Top; Atom case; Top; Top; Atom cat; Atom role; Top] + | AdjP NomAgr -> Tensor[Atom "adjp"; AVar "number"; Atom "nom"; AVar "gender"; Top; Atom cat; Atom role; Top] + | AdjP AllAgr -> Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"; Top; Atom cat; Atom role; Top] +(* | AdjP CaseAgr -> Tensor[Atom "adjp"; Top; AVar "case"; Top; Top; Atom cat; Atom role; Top] + | PrepAdjP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top; Atom cat; Atom role; Top]*) + | PrepAdjP(prep,Case case) -> Tensor[Atom "prepadjp"; Atom prep; Atom case; Atom cat; Atom role; Top] (* | NumP(Case case) -> Tensor[Atom "nump"; Top; Atom case; Top; Top] | NumP NomAgr -> Tensor[Atom "nump"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] | NumP CaseAgr -> Tensor[Atom "nump"; Top; AVar "case"; Top; Top] | NumP CaseUndef -> Tensor[Atom "nump"; Top; Top; Top; Top] | PrepNumP(_,"",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top] | PrepNumP(_,prep,Case case) -> Tensor[Atom "prepnump"; Atom prep; Atom case] *) -(* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top]*) - | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom cat; Atom prep] - | ComparP(prep,Case case) -> Tensor[Atom "compar"; Atom cat; Atom prep; Atom case] - (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep] *) - (* | IP -> Tensor[Atom "ip";Top;Top;Top] *) - | CP (ctype,Comp comp) -> Tensor[Atom "cp"; Atom cat; arg_of_ctype ctype; Atom comp] - (* | CP (ctype,CompUndef) -> Tensor[Atom "cp"; arg_of_ctype ctype; Top]*) - | NCP(Case case,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; arg_of_ctype ctype; Atom comp] - | NCP(Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; Top; Top] - | NCP(NomAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp] - | NCP(NomAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] - | NCP(VocAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp] - | NCP(VocAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; Top; Top] - | PrepNCP(Psem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom cat; Atom "sem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] - | PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom cat; Atom "sem"; Atom prep; Atom case; Top; Top] - | PrepNCP(Pnosem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom cat; Atom "nosem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] - | PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom cat; Atom "nosem"; Atom prep; Atom case; Top; Top] - | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom cat; Atom aspect] - | InfP AspectUndef -> Tensor[Atom "infp"; Atom cat; Top] - (* | PadvP -> Tensor[Atom "padvp"] *) - | AdvP "misc" -> Tensor[Atom "advp"; Atom cat; Top] (* FIXME: a może Atom "mod" zamiast Top *) - | AdvP "" -> Tensor[Atom "advp"; Atom cat; Top] (* FIXME: a może Atom "mod" zamiast Top *) - | AdvP mode -> Tensor[Atom "advp"; Atom cat; Atom mode] - | ColonP -> Tensor[Atom "colonp"; Atom cat] +(* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top; Atom cat; Atom role; Top]*) + | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom prep; Atom cat; Atom role; Top] + | ComparP(prep,Case case) -> Tensor[Atom "compar"; Atom prep; Atom case; Atom cat; Atom role; Top] + (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep; Atom cat; Atom role; Top] *) + (* | IP -> Tensor[Atom "ip";Top;Top;Top; Atom cat; Atom role; Top] *) + | CP (ctype,Comp comp) -> Tensor[Atom "cp"; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Top] + (* | CP (ctype,CompUndef) -> Tensor[Atom "cp"; arg_of_ctype ctype; Top; Atom cat; Atom role; Top]*) + | NCP(Case case,ctype,Comp comp) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Top] + | NCP(Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top; Atom cat; Atom role; Top] + | NCP(NomAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Top] + | NCP(NomAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top; Atom cat; Atom role; Top] + | NCP(VocAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Top] + | NCP(VocAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; Top; Top; Atom cat; Atom role; Top] + | PrepNCP(Psem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Top] + | PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top; Atom cat; Atom role; Top] + | PrepNCP(Pnosem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Top] + | PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top; Atom cat; Atom role; Top] + | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom aspect; Atom cat; Atom role; Top] + | InfP AspectUndef -> Tensor[Atom "infp"; Top; Atom cat; Atom role; Top] + (* | PadvP -> Tensor[Atom "padvp"; Atom cat; Atom role; Top] *) + | AdvP "misc" -> Tensor[Atom "advp"; (*Top;*) Atom cat; Atom role; Top] (* FIXME: a może Atom "mod" zamiast Top *) + | AdvP "" -> Tensor[Atom "advp"; (*Top;*) Atom cat; Atom role; Top] (* FIXME: a może Atom "mod" zamiast Top *) + | AdvP mode -> Tensor[Atom "advp"; (*Atom mode;*) Atom cat; Atom role; Top] + | ColonP -> Tensor[Atom "colonp"; Atom cat; Atom cat; Atom role; Top] (* | PrepP -> Tensor[Atom "prepp";Top] | Prep("",CaseAgr) -> Tensor[Atom "prep"; Top; AVar "case"] | Prep("",CaseUAgr) -> Tensor[Atom "prep"; Top; AVar "ucase"] @@ -245,15 +245,15 @@ let render_phrase_cat cat = function | AuxImp -> Tensor[Atom "aux-imp"] | Pro -> One | ProNG -> One *) - | E (CP(CompTypeUndef,CompUndef)) -> Tensor[Atom "cp"; Atom cat; Top; Top] - | E (NCP(NomAgr,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] - | E (NP(NomAgr)) -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] - | E (PrepNP(Psem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case] - | E (PrepNP(Pnosem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case] - | E (NP(Case case)) -> Tensor[Atom "np"; Atom cat; Top; Atom case; Top; Top] - | E (NCP(Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; Top; Top] - | E (PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom cat; Atom "sem"; Atom prep; Atom case; Top; Top] - | E (PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom cat; Atom "nosem"; Atom prep; Atom case; Top; Top] + | E (CP(CompTypeUndef,CompUndef)) -> Tensor[Atom "cp"; Top; Top; Atom cat; Atom role; Top] + | E (NCP(NomAgr,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top; Atom cat; Atom role; Top] + | E (NP(NomAgr)) -> Tensor[Atom "np"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Atom cat; Atom role; Top] + | E (PrepNP(Psem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Atom case; Atom cat; Atom role; Top] + | E (PrepNP(Pnosem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case; Atom cat; Atom role; Top] + | E (NP(Case case)) -> Tensor[Atom "np"; Top; Atom case; Top; Top; Atom cat; Atom role; Top] + | E (NCP(Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top; Atom cat; Atom role; Top] + | E (PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top; Atom cat; Atom role; Top] + | E (PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top; Atom cat; Atom role; Top] | phrase -> failwith ("render_phrase_cat: " ^ ENIAMwalStringOf.phrase phrase) let render_morf = function @@ -264,7 +264,7 @@ let render_morf = function | SimpleLexArg(lex,pos) -> Tensor([Atom "lex";Atom lex] @ render_pos pos) | phrase -> render_phrase phrase -let render_morf_cat cats = function +let render_morf_cat cats role = function | Null -> [One] | Pro -> [One] | ProNG -> [One] @@ -275,7 +275,7 @@ let render_morf_cat cats = function | Lex lex -> Tensor[Atom lex] *) | LexArg(id,lex,pos) -> [Tensor([Atom "lex";Atom (string_of_int id);Atom lex] @ render_pos pos)] | SimpleLexArg(lex,pos) -> [Tensor([Atom "lex";Atom lex] @ render_pos pos)] - | phrase -> Xlist.map cats (fun cat -> render_phrase_cat cat phrase) + | phrase -> Xlist.map cats (fun cat -> render_phrase_cat cat role phrase) (* let extract_sel_prefs sel_prefs = Xlist.map sel_prefs (function @@ -296,7 +296,7 @@ let translate_dir = function let render_schema_cat schema = Xlist.map schema (fun p -> - match List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs)) with + match List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs p.role)) with [] -> failwith "render_schema" | [s] -> translate_dir p.dir,s | l -> translate_dir p.dir,Plus l) @@ -312,7 +312,7 @@ let render_connected_schema schema = let render_connected_schema_cat schema = Xlist.map schema (fun p -> {p with - morfs=Xlist.map (List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs))) (fun morf -> LCG morf)}) + morfs=Xlist.map (List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs p.role))) (fun morf -> LCG morf)}) (* FIXME: tu trzeba by dodać zwykłe reguły dla czasowników dotyczące ich negacji, aglutynatu itp. *) let render_lex_entry = function diff --git a/lexSemantics/ENIAMwalRenderer_old.ml b/lexSemantics/ENIAMwalRenderer_old.ml new file mode 100644 index 0000000..a1a2650 --- /dev/null +++ b/lexSemantics/ENIAMwalRenderer_old.ml @@ -0,0 +1,493 @@ +(* + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences + * + * This library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + *) + +open ENIAM_LCGtypes +open ENIAMwalTypes + +let arg_of_ctype = function + Int -> Atom "int" + | Rel -> Atom "rel" + (* | Sub -> LCGtypes.Atom "sub" + | Coord -> LCGtypes.Atom "coord" *) + | CompTypeUndef -> Top + (* | CompTypeAgr -> LCGtypes.AVar "ctype" *) + +let render_number = function + Number n -> Atom n + | NumberUndef -> Top + | NumberAgr -> Top + +let render_negation = function + Negation -> Atom "neg" + | Aff -> Atom "aff" + | NegationUndef -> Top + +let render_pos_entry = function + "subst" -> [Atom "subst"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"] + | "ppron12" -> [Atom "ppron12"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"] + | "ppron3" -> [Atom "ppron3"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"] + | "siebie" -> [Atom "siebie"; AVar "case"] + | "num" -> [Atom "num"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"] + | "intnum" -> [Atom "num"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"] + | "prep" -> [Atom "prep"; AVar "case"] + | "adj" -> [Atom "adj"; AVar "number"; AVar "case"; AVar "gender"; AVar "grad"] + | "adv" -> [Atom "adv"; AVar "grad"] + | "ger" -> [Atom "ger"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"; AVar "negation"] + | "pact" -> [Atom "pact"; AVar "number"; AVar "case"; AVar "gender"; AVar "negation"] + | "ppas" -> [Atom "ppas"; AVar "number"; AVar "case"; AVar "gender"; AVar "negation"] + | "inf" -> [Atom "inf"; AVar "aspect"; AVar "negation"] + | "qub" -> [Atom "qub"] + | "compar" -> [Atom "compar"; AVar "case"] + | "comp" -> [Atom "comp"; AVar "ctype"] + | "fin" -> [Atom "pers"; AVar "negation"] + | "praet" -> [Atom "pers"; AVar "negation"] + | "pred" -> [Atom "pers"; AVar "negation"] + | "winien" -> [Atom "pers"; AVar "negation"] + | "bedzie" -> [Atom "pers"; AVar "negation"] + | s -> failwith ("render_pos_entry: " ^ s) + +let render_pos = function (* wprowadzam uzgodnienia a nie wartości cech, bo wartości cech są wprowadzane przez leksem a uzgodnienia wiążą je z wartościami u nadrzędnika *) + | SUBST(number,Case case) -> [Atom "subst"; render_number number; Atom case; Top; Top] + | SUBST(_,NomAgr) -> [Atom "subst"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] + | SUBST(_,GenAgr) -> [Atom "subst"; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"] + | SUBST(_,AllAgr) -> [Atom "subst"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"] + | SUBST(number,CaseAgr) -> [Atom "subst"; render_number number; AVar "case"; Top; Top] + | SUBST(_,CaseUndef) -> [Atom "subst"; Top; Top; Top; Top] + | PPRON12(number,Case case) -> [Atom "ppron12"; render_number number; Atom case; Top; Top] + | PPRON3(number,Case case) -> [Atom "ppron3"; render_number number; Atom case; Top; Top] + | SIEBIE(Case case) -> [Atom "siebie"; Atom case] + | NUM(Case case,_) -> [Atom "num"; Top; Atom case; Top; Top] + | NUM(NomAgr,_) -> [Atom "num"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] +(* | NUM(CaseAgr,_) -> [Atom "num"; Top; AVar "case"; Top; Top] + | NUM(CaseUndef,_) -> [Atom "num"; Top; Top; Top; Top]*) + | PREP(Case case) -> [Atom "prep"; Atom case] + | ADJ(_,Case case,_,Grad grad) -> [Atom "adj"; Top; Atom case; Top; Atom grad] +(* | ADJ(_,NomAgr,_,_) -> [Atom "adj"; AVar "number"; Atom "nom"; AVar "gender"] + | ADJ(_,CaseAgr,_,_) -> [Atom "adj"; Top; AVar "case"; Top]*) + | ADJ(_,CaseUndef,_,Grad grad) -> [Atom "adj"; Top; Top; Top; Atom grad] + | ADJ(_,AllAgr,_,Grad grad) -> [Atom "adj"; AVar "number"; AVar "case"; AVar "gender"; Atom grad] + | ADJ(_,AllAgr,_,GradUndef) -> [Atom "adj"; AVar "number"; AVar "case"; AVar "gender"; Top] + | ADV (Grad grad) -> [Atom "adv"; Atom grad] + | ADV GradUndef -> [Atom "adv"; Top] + | GER(_,Case case,_,_,neg) -> [Atom "ger"; Top; Atom case; Top; Top; render_negation neg] +(* | GER(_,NomAgr,_,_,_) -> [Atom "ger"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] + | GER(_,CaseAgr,_,_,_) -> [Atom "ger"; Top; AVar "case"; Top; Top] + | GER(_,CaseUndef,_,_,_) -> [Atom "ger"; Top; Top; Top; Top] + | PACT(_,Case case,_,_,_) -> [Atom "pact"; Top; Atom case; Top] + | PACT(_,NomAgr,_,_,_) -> [Atom "pact"; AVar "number"; Atom "nom"; AVar "gender"]*) + | PACT(_,AllAgr,_,_,neg) -> [Atom "pact"; AVar "number"; AVar "case"; AVar "gender"; render_negation neg] +(* | PACT(_,CaseAgr,_,_,_) -> [Atom "pact"; Top; AVar "case"; Top]*) + | PPAS(_,Case case,_,_,neg) -> [Atom "ppas"; Top; Atom case; Top; render_negation neg] + | PPAS(_,CaseUndef,_,_,neg) -> [Atom "ppas"; Top; Top; Top; render_negation neg] + (* | PPAS(_,NomAgr,_,_,_) -> [Atom "ppas"; AVar "number"; Atom "nom"; AVar "gender"]*) + | PPAS(_,AllAgr,_,_,neg) -> [Atom "ppas"; AVar "number"; AVar "case"; AVar "gender"; render_negation neg] +(* | PPAS(_,CaseAgr,_,_,_) -> [Atom "ppas"; Top; AVar "case"; Top]*) + | INF(Aspect aspect,neg) -> [Atom "inf"; Atom aspect; render_negation neg] + | INF(AspectUndef,neg) -> [Atom "inf"; Top; render_negation neg] + | QUB -> [Atom "qub"] + | COMPAR (Case case) -> [Atom "compar"; Atom case] + | COMP ctype -> [Atom "comp"; arg_of_ctype ctype] + | PERS neg -> [Atom "pers"; render_negation neg] + | pos -> failwith ("render_pos: " ^ ENIAMwalStringOf.pos pos) + +let render_phrase = function + NP(Case case) -> Tensor[Atom "np"; Top; Atom case; Top; Top] + | NP NomAgr -> Tensor[Atom "np"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] +(* | NP GenAgr -> Tensor[Atom "np"; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"] + | NP AllAgr -> Tensor[Atom "np"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]*) + | NP CaseAgr -> Tensor[Atom "np"; Top; AVar "case"; Top; Top] +(* | NP CaseUndef -> Tensor[Atom "np"; Top; Top; Top; Top] + | PrepNP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]*) + | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Atom case] + | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case] + | AdjP(Case case) -> Tensor[Atom "adjp"; Top; Atom case; Top] + | AdjP NomAgr -> Tensor[Atom "adjp"; AVar "number"; Atom "nom"; AVar "gender"] + | AdjP AllAgr -> Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"] +(* | AdjP CaseAgr -> Tensor[Atom "adjp"; Top; AVar "case"; Top] + | PrepAdjP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]*) + | PrepAdjP(prep,Case case) -> Tensor[Atom "prepadjp"; Atom prep; Atom case] + (* | NumP(Case case) -> Tensor[Atom "nump"; Top; Atom case; Top; Top] + | NumP NomAgr -> Tensor[Atom "nump"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] + | NumP CaseAgr -> Tensor[Atom "nump"; Top; AVar "case"; Top; Top] + | NumP CaseUndef -> Tensor[Atom "nump"; Top; Top; Top; Top] + | PrepNumP(_,"",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top] + | PrepNumP(_,prep,Case case) -> Tensor[Atom "prepnump"; Atom prep; Atom case] *) +(* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top]*) + | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom prep] + | ComparP(prep,Case case) -> Tensor[Atom "compar"; Atom prep; Atom case] + (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep] *) + (* | IP -> Tensor[Atom "ip";Top;Top;Top] *) + | CP (ctype,Comp comp) -> Tensor[Atom "cp"; arg_of_ctype ctype; Atom comp] + (* | CP (ctype,CompUndef) -> Tensor[Atom "cp"; arg_of_ctype ctype; Top]*) + | NCP(Case case,ctype,Comp comp) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; arg_of_ctype ctype; Atom comp] + | NCP(Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top] + | NCP(NomAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp] + | NCP(NomAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] + | PrepNCP(Psem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] + | PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top] + | PrepNCP(Pnosem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] + | PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top] + | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom aspect] + | InfP AspectUndef -> Tensor[Atom "infp"; Top] + (* | PadvP -> Tensor[Atom "padvp"] *) + | AdvP "misc" -> Tensor[Atom "advp"; Top] (* FIXME: a może Atom "mod" zamiast Top *) + | AdvP mode -> Tensor[Atom "advp"; Atom mode] + | ColonP -> Tensor[Atom "colonp"] + | FixedP lex -> Tensor[Atom "fixed"; Atom lex] + (* | PrepP -> Tensor[Atom "prepp";Top] + | Prep("",CaseAgr) -> Tensor[Atom "prep"; Top; AVar "case"] + | Prep("",CaseUAgr) -> Tensor[Atom "prep"; Top; AVar "ucase"] + | Num(AllAgr,Acm acm) -> Tensor[Atom "num"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"; Atom acm] + | Measure(AllUAgr) -> Tensor[Atom "measure"; AVar "unumber"; AVar "ucase"; AVar "ugender"; AVar "uperson"] *) + | Or -> Tensor[Atom "or"] + (* | Qub -> Tensor[Atom "qub"]*) + (* | Inclusion -> Tensor[Atom "inclusion"] + | Adja -> Tensor[Atom "adja"] + | Aglt -> Tensor[Atom "aglt"; AVar "number"; AVar "person"] + | AuxPast -> Tensor[Atom "aux-past"; AVar "number"; AVar "gender"; AVar "person"] + | AuxFut -> Tensor[Atom "aux-fut"; AVar "number"; AVar "gender"; AVar "person"] + | AuxImp -> Tensor[Atom "aux-imp"] + | Pro -> One + | ProNG -> One *) + | E Or -> Tensor[Atom "or"] + | E (CP(CompTypeUndef,CompUndef)) -> Tensor[Atom "cp"; Top; Top] + | E (NCP(NomAgr,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] + | E (NP(NomAgr)) -> Tensor[Atom "np"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] + | E (PrepNP(Psem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom "sem"; Atom prep; Atom case] + | E (PrepNP(Pnosem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom "nosem"; Atom prep; Atom case] + | E (NP(Case case)) -> Tensor[Atom "np"; Top; Atom case; Top; Top] + | E (NCP(Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Top; Atom case; Top; Top; Top; Top] + | E (PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "sem"; Atom prep; Atom case; Top; Top] + | E (PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom "nosem"; Atom prep; Atom case; Top; Top] + | phrase -> failwith ("render_phrase: " ^ ENIAMwalStringOf.phrase phrase) + +let render_phrase_cat cat = function + NP(Case case) -> Tensor[Atom "np"; Atom cat; Top; Atom case; Top; Top] + | NP NomAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] + | NP VocAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"] +(* | NP GenAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "gen"; AVar "gender"; AVar "person"] + | NP AllAgr -> Tensor[Atom "np"; Atom cat; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]*) + | NP CaseAgr -> Tensor[Atom "np"; Atom cat; Top; AVar "case"; Top; Top] + | NP CaseUndef -> Tensor[Atom "np"; Atom cat; Top; Top; Top; Top] + | PrepNP(Psem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] + | PrepNP(Psem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] + | PrepNP(Psem,"_",Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Atom case] + | PrepNP(Psem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Top] + | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case] + | PrepNP(Pnosem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] + | PrepNP(Pnosem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] + | PrepNP(Pnosem,"_",Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Atom case] + | PrepNP(Pnosem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Top] + | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case] + | AdjP(Case case) -> Tensor[Atom "adjp"; Atom cat; Top; Atom case; Top] + | AdjP NomAgr -> Tensor[Atom "adjp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"] + | AdjP AllAgr -> Tensor[Atom "adjp"; Atom cat; AVar "number"; AVar "case"; AVar "gender"] +(* | AdjP CaseAgr -> Tensor[Atom "adjp"; Top; AVar "case"; Top] + | PrepAdjP("",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top]*) + | PrepAdjP(prep,Case case) -> Tensor[Atom "prepadjp"; Atom cat; Atom prep; Atom case] + (* | NumP(Case case) -> Tensor[Atom "nump"; Top; Atom case; Top; Top] + | NumP NomAgr -> Tensor[Atom "nump"; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] + | NumP CaseAgr -> Tensor[Atom "nump"; Top; AVar "case"; Top; Top] + | NumP CaseUndef -> Tensor[Atom "nump"; Top; Top; Top; Top] + | PrepNumP(_,"",CaseUndef) -> Tensor[Atom "prepnp"; Top; Top] + | PrepNumP(_,prep,Case case) -> Tensor[Atom "prepnump"; Atom prep; Atom case] *) +(* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top]*) + | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom cat; Atom prep] + | ComparP(prep,Case case) -> Tensor[Atom "compar"; Atom cat; Atom prep; Atom case] + (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep] *) + (* | IP -> Tensor[Atom "ip";Top;Top;Top] *) + | CP (ctype,Comp comp) -> Tensor[Atom "cp"; Atom cat; arg_of_ctype ctype; Atom comp] + (* | CP (ctype,CompUndef) -> Tensor[Atom "cp"; arg_of_ctype ctype; Top]*) + | NCP(Case case,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; arg_of_ctype ctype; Atom comp] + | NCP(Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; Top; Top] + | NCP(NomAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp] + | NCP(NomAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] + | NCP(VocAgr,ctype,Comp comp) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; arg_of_ctype ctype; Atom comp] + | NCP(VocAgr,CompTypeUndef,CompUndef) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "voc"; AVar "gender"; AVar "person"; Top; Top] + | PrepNCP(Psem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom cat; Atom "sem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] + | PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom cat; Atom "sem"; Atom prep; Atom case; Top; Top] + | PrepNCP(Pnosem,prep,Case case,ctype,Comp comp) -> Tensor[Atom "prepncp"; Atom cat; Atom "nosem"; Atom prep; Atom case; arg_of_ctype ctype; Atom comp] + | PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef) -> Tensor[Atom "prepncp"; Atom cat; Atom "nosem"; Atom prep; Atom case; Top; Top] + | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom cat; Atom aspect] + | InfP AspectUndef -> Tensor[Atom "infp"; Atom cat; Top] + (* | PadvP -> Tensor[Atom "padvp"] *) + | AdvP "misc" -> Tensor[Atom "advp"; Atom cat; Top] (* FIXME: a może Atom "mod" zamiast Top *) + | AdvP "" -> Tensor[Atom "advp"; Atom cat; Top] (* FIXME: a może Atom "mod" zamiast Top *) + | AdvP mode -> Tensor[Atom "advp"; Atom cat; Atom mode] + | ColonP -> Tensor[Atom "colonp"; Atom cat] + (* | PrepP -> Tensor[Atom "prepp";Top] + | Prep("",CaseAgr) -> Tensor[Atom "prep"; Top; AVar "case"] + | Prep("",CaseUAgr) -> Tensor[Atom "prep"; Top; AVar "ucase"] + | Num(AllAgr,Acm acm) -> Tensor[Atom "num"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"; Atom acm] + | Measure(AllUAgr) -> Tensor[Atom "measure"; AVar "unumber"; AVar "ucase"; AVar "ugender"; AVar "uperson"] *) + (* | Qub -> Tensor[Atom "qub"]*) + (* | Inclusion -> Tensor[Atom "inclusion"] + | Adja -> Tensor[Atom "adja"] + | Aglt -> Tensor[Atom "aglt"; AVar "number"; AVar "person"] + | AuxPast -> Tensor[Atom "aux-past"; AVar "number"; AVar "gender"; AVar "person"] + | AuxFut -> Tensor[Atom "aux-fut"; AVar "number"; AVar "gender"; AVar "person"] + | AuxImp -> Tensor[Atom "aux-imp"] + | Pro -> One + | ProNG -> One *) + | E (CP(CompTypeUndef,CompUndef)) -> Tensor[Atom "cp"; Atom cat; Top; Top] + | E (NCP(NomAgr,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"; Top; Top] + | E (NP(NomAgr)) -> Tensor[Atom "np"; Atom cat; AVar "number"; Atom "nom"; AVar "gender"; AVar "person"] + | E (PrepNP(Psem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case] + | E (PrepNP(Pnosem,prep,Case case)) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case] + | E (NP(Case case)) -> Tensor[Atom "np"; Atom cat; Top; Atom case; Top; Top] + | E (NCP(Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "ncp"; Atom cat; Top; Atom case; Top; Top; Top; Top] + | E (PrepNCP(Psem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom cat; Atom "sem"; Atom prep; Atom case; Top; Top] + | E (PrepNCP(Pnosem,prep,Case case,CompTypeUndef,CompUndef)) -> Tensor[Atom "prepncp"; Atom cat; Atom "nosem"; Atom prep; Atom case; Top; Top] + | phrase -> failwith ("render_phrase_cat: " ^ ENIAMwalStringOf.phrase phrase) + +let render_morf = function + | Null -> One + (* | X -> Tensor[Atom "X"] + | Lex lex -> Tensor[Atom lex] *) + | LexArg(id,lex,pos) -> Tensor([Atom "lex";Atom (string_of_int id);Atom lex] @ render_pos pos) + | SimpleLexArg(lex,pos) -> Tensor([Atom "lex";Atom lex] @ render_pos pos) + | phrase -> render_phrase phrase + +let render_morf_cat cats = function + | Null -> [One] + | Pro -> [One] + | ProNG -> [One] + | FixedP lex -> [Tensor[Atom "fixed"; Atom lex]] + | Or -> [Tensor[Atom "or"]] + | E Or -> [Tensor[Atom "or"]] + (* | X -> Tensor[Atom "X"] + | Lex lex -> Tensor[Atom lex] *) + | LexArg(id,lex,pos) -> [Tensor([Atom "lex";Atom (string_of_int id);Atom lex] @ render_pos pos)] + | SimpleLexArg(lex,pos) -> [Tensor([Atom "lex";Atom lex] @ render_pos pos)] + | phrase -> Xlist.map cats (fun cat -> render_phrase_cat cat phrase) + +(* let extract_sel_prefs sel_prefs = + Xlist.map sel_prefs (function + SynsetName s -> s + | _ -> failwith "extract_sel_prefs") *) + +let render_schema schema = + Xlist.map schema (fun p -> + match Xlist.map p.morfs render_morf with + [] -> failwith "render_schema" + | [s] -> Both,s + | l -> Both,Plus l) + +let translate_dir = function + Both_ -> Both + | Forward_ -> Forward + | Backward_ -> Backward + +let render_schema_cat schema = + Xlist.map schema (fun p -> + match List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs)) with + [] -> failwith "render_schema" + | [s] -> translate_dir p.dir,s + | l -> translate_dir p.dir,Plus l) + +let render_simple_schema schema = + Xlist.map schema (fun morfs -> + Both,Plus(One :: Xlist.map morfs render_morf)) + +let render_connected_schema schema = + Xlist.map schema (fun p -> + {p with morfs=Xlist.map p.morfs (fun morf -> LCG (render_morf morf))}) + +let render_connected_schema_cat schema = + Xlist.map schema (fun p -> + {p with + morfs=Xlist.map (List.flatten (Xlist.map p.morfs (render_morf_cat p.cat_prefs))) (fun morf -> LCG morf)}) + +(* FIXME: tu trzeba by dodać zwykłe reguły dla czasowników dotyczące ich negacji, aglutynatu itp. *) +let render_lex_entry = function + SimpleLexEntry(lemma,pos) -> Tensor([Atom "lex";Atom lemma] @ render_pos_entry pos) + | LexEntry(id,lemma,pos,NoRestr,schema) -> + ImpSet(Tensor([Atom "lex";Atom (string_of_int id);Atom lemma] @ render_pos_entry pos),render_schema schema) + (*Xlist.map (transform_entry pos lemma NegationUndef PredFalse AspectUndef schema) (fun (sel,schema) -> + sel,LexEntry(id,lemma,pos,NoRestr,schema))*) + | ComprepNPEntry(prep,NoRestr,schema) -> ImpSet(Tensor[Atom "comprepnp"; Atom prep],render_schema schema) + (*Xlist.map (transform_entry "comprep" s NegationUndef PredFalse AspectUndef schema) (fun (sel,schema) -> + sel,ComprepNPEntry(s,NoRestr,schema))*) + | LexEntry(id,lemma,pos,_,[]) (*as entry*) -> + ImpSet(Tensor([Atom "lex";Atom (string_of_int id);Atom lemma] @ render_pos_entry pos),[Both,Tensor[AVar "schema"]]) + | entry -> failwith ("render_entry:" ^ ENIAMwalStringOf.lex_entry entry) + +(* let schemata,entries = ENIAMvalence.prepare_all_valence ENIAMwalParser.phrases ENIAMwalParser.schemata ENIAMwalParser.entries *) + +(* let _ = + (* Entries.map schemata (fun pos lemma (selectors,schema) -> + (* Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema); *) + render_schema schema) *) + Entries.map entries (fun pos lemma (selectors,entry) -> + (* Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema); *) + selectors,render_lex_entry entry) *) + +let adjunct morfs = {empty_position with gf=ADJUNCT; is_necessary=Opt; morfs=Xlist.map morfs (fun morf -> LCG morf)} +let adjunct_multi dir morfs = {empty_position with gf=ADJUNCT; is_necessary=Multi; dir=dir; morfs=Xlist.map morfs (fun morf -> LCG morf)} +let adjunct_dir dir morfs = {empty_position with gf=ADJUNCT; is_necessary=Opt; dir=dir; morfs=Xlist.map morfs (fun morf -> LCG morf)} +let adjunct_ce ce morfs = {empty_position with gf=ADJUNCT; ce=[ce]; is_necessary=Opt; morfs=Xlist.map morfs (fun morf -> LCG morf)} + +let render_comprep prep = Both,Plus[One;Tensor[Atom "comprepnp"; Atom prep]] + +let render_connected_comprep prep = adjunct [Tensor[Atom "comprepnp"; Atom prep]] + +let render_prepnp prep cases = + Both,Plus(One :: List.flatten (Xlist.map cases (fun case -> + [Tensor[Atom "prepnp"; Atom prep; Atom case]; + Tensor[Atom "prepncp"; Atom prep; Atom case; Top; Top]]))) + +let render_connected_prepnp prep cases = + adjunct (List.flatten (Xlist.map cases (fun case -> + [Tensor[Atom "prepnp"; Atom prep; Atom case]; + Tensor[Atom "prepncp"; Atom prep; Atom case; Top; Top]]))) + +let render_prepadjp prep cases = + let postp = if prep = "z" || prep = "po" || prep = "na" then [Tensor[Atom "prepadjp"; Atom prep; Atom "postp"]] else [] in + Both,Plus(One :: postp @ (Xlist.map cases (fun case -> + Tensor[Atom "prepadjp"; Atom prep; Atom case]))) + +let render_connected_prepadjp prep cases = + let postp = if prep = "z" || prep = "po" || prep = "na" then [Tensor[Atom "prepadjp"; Atom prep; Atom "postp"]] else [] in + adjunct (postp @ (Xlist.map cases (fun case -> + Tensor[Atom "prepadjp"; Atom prep; Atom case]))) + +let render_compar prep = Both,Plus[One;Tensor[Atom "compar"; Atom prep; Top]] + +let render_connected_compar prep = adjunct [Tensor[Atom "compar"; Atom prep; Top]] + +let verb_adjuncts_simp = [ + Both, Plus[One;Tensor[Atom "advp"; Atom "pron"]]; + Both, Plus[One;Tensor[Atom "advp"; Atom "locat"]]; + Both, Plus[One;Tensor[Atom "advp"; Atom "abl"]]; + Both, Plus[One;Tensor[Atom "advp"; Atom "adl"]]; + Both, Plus[One;Tensor[Atom "advp"; Atom "perl"]]; + Both, Plus[One;Tensor[Atom "advp"; Atom "temp"]]; + Both, Plus[One;Tensor[Atom "advp"; Atom "dur"]]; + Both, Plus[One;Tensor[Atom "advp"; Atom "mod"]]; + Both, Plus[One;Tensor[Atom "np";Top;Atom "dat"; Top; Top];Tensor[Atom "ncp"; Top; Atom "dat"; Top; Top; Top; Top]]; + Both, Plus[One;Tensor[Atom "np";Top;Atom "inst"; Top; Top];Tensor[Atom "ncp"; Top; Atom "inst"; Top; Top; Top; Top]]; + Both, Plus[One;Tensor[Atom "date"];Tensor[Atom "day-lex"];Tensor[Atom "day-month"];Tensor[Atom "day"]]; + Forward, Plus[One;Tensor[Atom "cp";Top; Top]]; (* FIXME: to powinno być jako ostatnia lista argumentów *) + Both, Plus[One;Tensor[Atom "or"]]; + Both, Plus[One;Tensor[Atom "lex";Atom "się";Atom "qub"]]; + Both, Plus[One;Tensor[Atom "padvp"]]; +] + +let verb_connected_adjuncts_simp = [ + adjunct [Tensor[Atom "advp"; Atom "pron"]]; + adjunct [Tensor[Atom "advp"; Atom "locat"]]; + adjunct [Tensor[Atom "advp"; Atom "abl"]]; + adjunct [Tensor[Atom "advp"; Atom "adl"]]; + adjunct [Tensor[Atom "advp"; Atom "perl"]]; + adjunct [Tensor[Atom "advp"; Atom "temp"]]; + adjunct [Tensor[Atom "advp"; Atom "dur"]]; + adjunct [Tensor[Atom "advp"; Atom "mod"]]; + adjunct [Tensor[Atom "np";Top;Atom "dat"; Top; Top];Tensor[Atom "ncp"; Top; Atom "dat"; Top; Top; Top; Top]]; + adjunct [Tensor[Atom "np";Top;Atom "inst"; Top; Top];Tensor[Atom "ncp"; Top; Atom "inst"; Top; Top; Top; Top]]; + adjunct [Tensor[Atom "date"];Tensor[Atom "day-lex"];Tensor[Atom "day-month"];Tensor[Atom "day"]]; + adjunct_dir Forward_ [Tensor[Atom "cp";Top; Top]]; + adjunct [Tensor[Atom "or"]]; + adjunct [Tensor[Atom "lex";Atom "się";Atom "qub"]]; + adjunct_ce "3" [Tensor[Atom "padvp"]]; +] + +let proper_noun_adjuncts_simp = [ + Both, Plus[One;Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]]; + Forward, Plus[One;Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]]; + Backward, Maybe(Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]); + Forward, Plus[One;Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; +] + +let proper_noun_connected_adjuncts_simp = [ + adjunct [Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]]; + adjunct_dir Forward_ [Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]]; + adjunct_multi Backward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; + adjunct_dir Forward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; +] + +let common_noun_adjuncts_simp = [ + Both, Plus[One;Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]]; + Forward, Plus[One;Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]]; + Backward, Maybe(Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]); + Forward, Plus[One;Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; +] + +let common_noun_connected_adjuncts_simp = [ + adjunct [Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]]; + adjunct_dir Forward_ [Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]]; + adjunct_multi Backward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; + adjunct_dir Forward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; +] + +let measure_noun_adjuncts_simp = [ + Backward, Maybe(Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]); + Forward, Plus[One;Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; +] + +let measure_noun_connected_adjuncts_simp = [ + adjunct_multi Backward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; + adjunct_dir Forward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]]; +] + +let adj_adjuncts_simp = [ + Both, Plus[One;Tensor[Atom "advp"; Top]]; +] + +let adj_connected_adjuncts_simp = [ + adjunct [Tensor[Atom "advp"; Top]]; +] + +let adv_adjuncts_simp = [ + Both, Plus[One;Tensor[Atom "advp"; Top]]; + ] + +let adv_connected_adjuncts_simp = [ + adjunct [Tensor[Atom "advp"; Top]]; + ] + +let assing_prep_morfs = function + "po","postp" -> [ + LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "dat"; Atom "m1"]); + LCG(Tensor[Atom "adjp"; Top; Atom "postp"; Top])] + | "z","postp" -> [LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "nom"; Atom "f"])] + | "na","postp" -> [LCG(Tensor[Atom "advp"; Top])] + | _,case -> [ + LCG(Tensor[Atom "np"; Top; Atom case; Top; Top]); + LCG(Tensor[Atom "adjp"; Top; Atom case; Top])] + +let prep_morfs = [ + LCG(Tensor[Atom "np"; Top; Atom "case"; Top; Top]); + LCG(Tensor[Atom "adjp"; Top; Atom "case"; Top]); + LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "dat"; Atom "m1"]); + LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "nom"; Atom "f"]); + LCG(Tensor[Atom "advp"; Top]); + LCG(Tensor[Atom "year"]); + LCG(Tensor[Atom "hour-minute"]); + LCG(Tensor[Atom "day-month"]); + LCG(Tensor[Atom "hour"]); + LCG(Tensor[Atom "day"]); + LCG(Tensor[Atom "date"]); + ] + +let compar_morfs = [ + LCG(Tensor[Atom "np"; Top; Atom "case"; Top; Top]); + LCG(Tensor[Atom "adjp"; Top; Atom "case"; Top]); + LCG(Tensor[Atom "prepnp"; Top; Top]); + LCG(Tensor[Atom "prepadjp"; Top; Top]); + ]