diff --git a/LCGlexicon/ENIAM_LCGlexicon.ml b/LCGlexicon/ENIAM_LCGlexicon.ml index 7d293dd..65b566a 100644 --- a/LCGlexicon/ENIAM_LCGlexicon.ml +++ b/LCGlexicon/ENIAM_LCGlexicon.ml @@ -42,6 +42,11 @@ let rec get_bracket rev = function | t :: rule -> get_bracket (t :: rev) rule | [] -> false, List.rev rev +let rec get_raised rev = function + Raised raised :: rule -> raised, (List.rev rev) @ rule + | t :: rule -> get_raised (t :: rev) rule + | [] -> raise Not_found + let merge_quant pos_quants quants = let map = Xlist.fold quants SelectorMap.empty (fun map (k,v) -> SelectorMap.add map k v) in let l,map = Xlist.fold pos_quants ([],map) (fun (l,map) (cat,v) -> @@ -61,9 +66,91 @@ let assign_quantifiers (selectors,rule,weight) = let quant = merge_quant categories quant in selectors, (bracket,quant,syntax),(rule,weight) -let _ = - let lexicon = ENIAM_LCGlexiconParser.load_lexicon "resources/lexicon-pl.dic" in - List.rev (Xlist.rev_map lexicon assign_quantifiers) +let assign_semantics (selectors,(bracket,quant,syntax),(rule,weight)) = + let semantics = try + let raised,rule = get_raised [] rule in + if rule <> [] then failwith "assign_semantics" else + RaisedSem(Xlist.map quant fst, raised) + with Not_found -> BasicSem(Xlist.map quant fst) in + selectors,(bracket,quant,syntax),(semantics,weight) + +let rec extract_category pat rev = function + (cat,rel,v) :: l -> if cat = pat then rel,v,(List.rev rev @ l) else extract_category pat ((cat,rel,v) :: rev) l + | [] -> raise Not_found + +let dict_of_grammar grammar = + (* print_endline "dict_of_grammar"; *) + Xlist.fold grammar StringMap.empty (fun dict (selectors,(bracket,quant,syntax),semantics) -> + let pos_rel,poss,selectors = try extract_category Pos [] selectors with Not_found -> failwith "dict_of_grammar 1" in + let lemma_rel,lemmas,selectors = try extract_category Lemma [] selectors with Not_found -> Eq,[],selectors in + if pos_rel <> Eq || lemma_rel <> Eq then failwith "dict_of_grammar 2" else + let rule = selectors,(bracket,quant,syntax),semantics in + Xlist.fold poss dict (fun dict pos -> + let dict2,l = try StringMap.find dict pos with Not_found -> StringMap.empty,[] in + let dict2,l = + if lemmas = [] then dict2,rule :: l else + Xlist.fold lemmas dict2 (fun dict2 lemma -> + StringMap.add_inc dict2 lemma [rule] (fun l -> rule :: l)),l in + StringMap.add dict pos (dict2,l))) + +let make_rules filename = + let lexicon = ENIAM_LCGlexiconParser.load_lexicon filename in + let lexicon = List.rev (Xlist.rev_map lexicon assign_quantifiers) in + let lexicon = List.rev (Xlist.rev_map lexicon assign_semantics) in + dict_of_grammar lexicon + +let find_rules rules cats = + let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith "find_rules 1" in + let rules = try StringMap.find lex_rules cats.lemma @ rules with Not_found -> rules in + Xlist.fold rules [] (fun rules (selectors,syntax,semantics) -> + try + let cats = apply_selectors cats selectors in + (cats,syntax,semantics) :: rules + with Not_found -> rules) + +let assign_valence valence rules = + Xlist.fold rules [] (fun l (cats,(bracket,quant,syntax),semantics) -> + (* Printf.printf "%s |valence|=%d\n" cats.lemma (Xlist.size valence); *) + if ENIAM_LCGrenderer.count_avar "schema" syntax > 0 then + Xlist.fold valence l (fun l (selectors,schema) -> + try + let cats = apply_selectors cats selectors in + (cats,(bracket,quant,ENIAM_LCGrenderer.substitute_schema "schema" schema syntax),semantics) :: l + with Not_found -> l) + else (cats,(bracket,quant,syntax),semantics) :: l) + +type labels = { + number: string; + case: string; + gender: string; + person: string; + aspect: string; +} + +let get_label e = function + Number -> e.number + | Case -> e.case + | Gender -> e.gender + | Person -> e.person + | Aspect -> e.aspect + | _ -> ENIAM_LCGreductions.get_variant_label () + +let get_labels () = { + number=ENIAM_LCGreductions.get_variant_label (); + case=ENIAM_LCGreductions.get_variant_label (); + gender=ENIAM_LCGreductions.get_variant_label (); + person=ENIAM_LCGreductions.get_variant_label (); + aspect=ENIAM_LCGreductions.get_variant_label (); +} + +let make_quantification e rules = + Xlist.map rules (fun (cats,(bracket,quant,syntax),semantics) -> + let syntax = Xlist.fold (List.rev quant) syntax (fun syntax (cat,t) -> + let t = if t = Top then ENIAM_LCGrenderer.make_quant_restriction (match_selector cats cat) else t in + let category = string_of_selector cat in + WithVar(category,t,get_label e cat,syntax)) in + let syntax = if bracket then ENIAM_LCGtypes.Bracket(true,true,syntax) else ENIAM_LCGtypes.Bracket(false,false,syntax) in + cats,syntax,semantics) (*** type rule2 = @@ -114,27 +201,6 @@ let parse_quants_range quant = (selectors,rule,weight) :: grammar)) **) -let rec extract_category pat rev = function - (cat,rel,v) :: l -> if cat = pat then rel,v,(List.rev rev @ l) else extract_category pat ((cat,rel,v) :: rev) l - | [] -> raise Not_found - -let dict_of_grammar grammar = - (* print_endline "dict_of_grammar"; *) - Xlist.fold grammar StringMap.empty (fun dict (selectors,(bracket,quant,syntax,semantics),weight) -> - let pos_rel,poss,selectors = try extract_category Pos [] selectors with Not_found -> failwith "dict_of_grammar 1" in - let lemma_rel,lemmas,selectors = try extract_category Lemma [] selectors with Not_found -> Eq,[],selectors in - if pos_rel <> Eq || lemma_rel <> Eq then failwith "dict_of_grammar 2" else - let rule = selectors,(bracket,quant,syntax,semantics),weight in - Xlist.fold poss dict (fun dict pos -> - let dict2,l = try StringMap.find dict pos with Not_found -> StringMap.empty,[] in - let dict2,l = - if lemmas = [] then dict2,rule :: l else - Xlist.fold lemmas dict2 (fun dict2 lemma -> - StringMap.add_inc dict2 lemma [rule] (fun l -> rule :: l)),l in - StringMap.add dict pos (dict2,l))) - -(* let rules = dict_of_grammar ENIAM_LCGlexiconPL.grammar *) - (* let translate_negation = function (Negation:negation) -> ["neg"] | Aff -> ["aff"] @@ -165,16 +231,7 @@ let dict_of_grammar grammar = | GerAtrs(m,le,neg,a) -> [Negation,Eq,translate_negation neg;Aspect,Eq,translate_aspect a] | NonPersAtrs(m,le,role,role_attr,neg,a) -> [Negation,Eq,translate_negation neg;Aspect,Eq,translate_aspect a] | ComprepAtrs _ -> failwith "apply_valence_selectors" *) - -let find_rules rules cats = - let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith "find_rules 1" in - let rules = try StringMap.find lex_rules cats.lemma @ rules with Not_found -> rules in - Xlist.fold rules [] (fun rules (selectors,(bracket,quant,syntax,semantics),weight) -> - try - let cats = apply_selectors cats selectors in - (cats,(bracket,quant,syntax,semantics),weight) :: rules - with Not_found -> rules) - + ***) (* FIXME: argumenty X i raised i inne *) (* let render_schema schema = @@ -196,20 +253,9 @@ let find_rules rules cats = | _ -> l) else (cats,(bracket,quant,syntax,semantics),weight) :: l) *) -let assign_valence valence rules = - Xlist.fold rules [] (fun l (cats,(bracket,quant,syntax,semantics),weight) -> - (* Printf.printf "%s |valence|=%d\n" cats.lemma (Xlist.size valence); *) - if ENIAM_LCGrenderer.count_avar "schema" syntax > 0 then - Xlist.fold valence l (fun l (selectors,schema) -> - try - let cats = apply_selectors cats selectors in - (cats,(bracket,quant,ENIAM_LCGrenderer.substitute_schema "schema" schema syntax,semantics),weight) :: l - with Not_found -> l) - else (cats,(bracket,quant,syntax,semantics),weight) :: l) - (* FIXME: ustawienie wartości symbol *) (* FIXME: problem z atrybutami przy zamianie kolejności rzędników *) -let make_node id orth lemma cat weight cat_list = +let make_node id orth lemma pos syntax weight cat_list is_raised = let attrs = Xlist.fold cat_list(*Xlist.rev_map quant fst*) [] (fun attrs -> function | Lemma -> attrs | Cat -> ("CAT",SubstVar "cat") :: attrs @@ -230,53 +276,33 @@ let make_node id orth lemma cat weight cat_list = | s -> (string_of_selector s, Dot) :: attrs) in (* | "lex" -> ("LEX",Val "+") :: attrs *) (* | s -> failwith ("make_node: " ^ (string_of_selector s))) in *) - {ENIAM_LCGrenderer.empty_node with orth=orth; lemma=lemma; pos=cat; weight=weight; id=id; attrs=List.rev attrs; args=Dot} - -type labels = { - number: string; - case: string; - gender: string; - person: string; - aspect: string; -} - -let get_label e = function - Number -> e.number - | Case -> e.case - | Gender -> e.gender - | Person -> e.person - | Aspect -> e.aspect - | _ -> ENIAM_LCGreductions.get_variant_label () - -let get_labels () = { - number=ENIAM_LCGreductions.get_variant_label (); - case=ENIAM_LCGreductions.get_variant_label (); - gender=ENIAM_LCGreductions.get_variant_label (); - person=ENIAM_LCGreductions.get_variant_label (); - aspect=ENIAM_LCGreductions.get_variant_label (); -} - -let make_quantification e rules = - Xlist.map rules (fun (cats,(bracket,quant,syntax,semantics),weight) -> - let syntax = Xlist.fold (List.rev quant) syntax (fun syntax (cat,t) -> - let t = if t = Top then ENIAM_LCGrenderer.make_quant_restriction (match_selector cats cat) else t in - let category = string_of_selector cat in - WithVar(category,t,get_label e cat,syntax)) in - let syntax = if bracket then Bracket(true,true,syntax) else Bracket(false,false,syntax) in - cats,syntax,semantics,weight) + let symbol = if is_raised then + ENIAM_LCGrenderer.make_raised_symbol syntax + else ENIAM_LCGrenderer.make_symbol syntax in + {ENIAM_LCGrenderer.empty_node with + orth=orth; lemma=lemma; pos=pos; symbol=symbol; + weight=weight; id=id; attrs=List.rev attrs; args=Dot} let make_term id orth rules = - Xlist.map rules (fun (cats,syntax,semantics,weight) -> + Xlist.map rules (fun (cats,syntax,(semantics,weight)) -> + ENIAM_LCGrenderer.reset_variable_names (); + ENIAM_LCGrenderer.add_variable_numbers (); match semantics with BasicSem cat_list -> - let node = make_node id orth cats.lemma cats.pos weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list in + let node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list false in let semantics = ENIAM_LCGrenderer.make_term node syntax in ENIAM_LCGrenderer.simplify (syntax,semantics) + | RaisedSem(cat_list,outer_cat_list) -> + (* FIXME: jakie atrybuty powinien mieć outer node (w szczególności jaką wagę?) *) + let node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list true in + let outer_node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) outer_cat_list false in + let semantics = ENIAM_LCGrenderer.make_raised_term node outer_node syntax in + ENIAM_LCGrenderer.simplify (syntax,semantics) | _ -> failwith "make_term: ni") (*cats,bracket,quant,syntax,Dot*) -(** - let create_entries id orth cats valence = - Xlist.fold cats [] (fun l cats -> + +let create_entries rules id orth cats valence = + Xlist.fold cats [] (fun l cats -> (* variable_name_ref := []; *) if cats.pos="interp" && cats.lemma="<clause>" then (BracketSet(Forward),Dot) :: l else if cats.pos="interp" && cats.lemma="</clause>" then (BracketSet(Backward),Dot) :: l else @@ -291,7 +317,7 @@ let make_term id orth rules = let rules = make_term id orth rules in (* print_endline "create_entries 5"; *) rules @ l) - **)(* + (* (* FIXME: poprawić i dodać moduł testujący *) module OrderedIntInt = struct type t = int * int @@ -314,4 +340,3 @@ let create (paths,last) tokens lex_sems = let chart = IntIntSet.fold set chart (fun chart (i,j) -> LCGchart.make_unique chart i j) in chart *) - ***) diff --git a/LCGlexicon/ENIAM_LCGlexiconParser.ml b/LCGlexicon/ENIAM_LCGlexiconParser.ml index f974f6e..f342809 100644 --- a/LCGlexicon/ENIAM_LCGlexiconParser.ml +++ b/LCGlexicon/ENIAM_LCGlexiconParser.ml @@ -282,6 +282,12 @@ let parse_quantifiers tokens = | t :: _ -> failwith ("parse_quantifiers: unexpected token '" ^ t ^ "'") | [] -> failwith "parse_quantifiers: no token") +let parse_raised tokens = + Xlist.map (split_comma [] [] tokens) (function + [cat] -> selector_of_string cat + | t :: _ -> failwith ("parse_raised: unexpected token '" ^ t ^ "'") + | [] -> failwith "parse_raised: no token") + let rec parse_rule atoms = function "BRACKET" :: tokens -> Bracket :: parse_rule atoms tokens | "QUANT" :: "[" :: tokens -> @@ -289,7 +295,7 @@ let rec parse_rule atoms = function Quant(parse_quantifiers quant) :: parse_rule atoms tokens | "RAISED" :: "[" :: tokens -> let raised,tokens = find_right_bracket [] tokens in - Raised(raised) :: parse_rule atoms tokens + Raised(parse_raised raised) :: parse_rule atoms tokens | tokens -> (* print_prefix 100 tokens; *) [Syntax(parse_syntax atoms tokens)] diff --git a/LCGlexicon/ENIAM_LCGlexiconTypes.ml b/LCGlexicon/ENIAM_LCGlexiconTypes.ml index 2e05b84..ee97a8f 100644 --- a/LCGlexicon/ENIAM_LCGlexiconTypes.ml +++ b/LCGlexicon/ENIAM_LCGlexiconTypes.ml @@ -37,6 +37,12 @@ end module SelectorMap=Xmap.Make(OrderedSelector) +type rule = + Bracket + | Quant of (selector * ENIAM_LCGtypes.internal_grammar_symbol) list + | Raised of selector list + | Syntax of ENIAM_LCGtypes.grammar_symbol + type rule_sem = BasicSem of selector list | RaisedSem of selector list * selector list @@ -46,12 +52,6 @@ type rule_sem = type selector_relation = Eq | Neq (*| StrictEq*) -type rule = - Bracket - | Quant of (selector * ENIAM_LCGtypes.internal_grammar_symbol) list - | Raised of string list - | Syntax of ENIAM_LCGtypes.grammar_symbol - (* x="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jedną z wartości atrybutu x, reguła zostanie wykonana dla x z usuniętymi pozostałymi wartościami *) (* x!="s" oznacza, że żeby reguła została użyta token musi mieć jako jedną z wartości atrybutu x symbol inny od "s", reguła zostanie wykonana dla x z usuniętą wartością "s" *) diff --git a/LCGlexicon/resources/lexicon-pl.dic b/LCGlexicon/resources/lexicon-pl.dic index 5db7722..09d2f10 100644 --- a/LCGlexicon/resources/lexicon-pl.dic +++ b/LCGlexicon/resources/lexicon-pl.dic @@ -151,36 +151,36 @@ pos=ger: np*number*case*gender*person{schema}{\(1+qub),/(1+inclusion)}; pos=pact: adjp*number*case*gender{schema}{\(1+qub),/(1+inclusion)}; pos=ppas: adjp*number*case*gender{schema}{\(1+qub),/(1+inclusion)}; -pos=fin|bedzie,negation=aff,mood=indicative: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}; -pos=fin|bedzie,negation=neg,mood=indicative: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}{\nie}; -pos=fin,negation=aff,mood=imperative: ip*number*gender*person{/(1+int)}{schema,|aux-imp}{\(1+qub),/(1+inclusion)}; -pos=fin,negation=neg,mood=imperative: ip*number*gender*person{/(1+int)}{schema,|aux-imp}{\(1+qub),/(1+inclusion)}{\nie}; -pos=impt|imps,negation=aff: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}; -pos=impt|imps,negation=neg: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}{\nie}; - -pos=pred,negation=aff,tense=pres: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}; -pos=pred,negation=neg,tense=pres: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}{\nie}; -pos=pred,negation=aff,tense=fut: ip*number*gender*person{/(1+int)}{schema,|aux-fut*number*gender*person}{\(1+qub),/(1+inclusion)}; -pos=pred,negation=neg,tense=fut: ip*number*gender*person{/(1+int)}{schema,|aux-fut*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie}; -pos=pred,negation=aff,tense=past: ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}; -pos=pred,negation=neg,tense=past: ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie}; - -pos=praet|winien,person=ter,negation=aff,mood=indicative: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}; -pos=praet|winien,person=ter,negation=neg,mood=indicative: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}{\nie}; -pos=praet|winien,person!=ter,negation=aff,mood=indicative: ip*number*gender*person{/(1+int)}{schema,|aglt*number*person}{\(1+qub),/(1+inclusion)}; -pos=praet|winien,person!=ter,negation=neg,mood=indicative: ip*number*gender*person{/(1+int)}{schema,|aglt*number*person}{\(1+qub),/(1+inclusion)}{\nie}; - -pos=praet|winien,person=ter,negation=aff,mood=conditional: ip*number*gender*person{/(1+int)}{schema,|by}{\(1+qub),/(1+inclusion)}; -pos=praet|winien,person=ter,negation=neg,mood=conditional: ip*number*gender*person{/(1+int)}{schema,|by}{\(1+qub),/(1+inclusion)}{\nie}; -pos=praet|winien,person!=ter,negation=aff,mood=conditional: ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|by}{\(1+qub),/(1+inclusion)}; -pos=praet|winien,person!=ter,negation=neg,mood=conditional: ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|by}{\(1+qub),/(1+inclusion)}{\nie}; - -pos=praet|winien,negation=aff,tense=fut: ip*number*gender*person{/(1+int)}{schema,|aux-fut*number*gender*person}{\(1+qub),/(1+inclusion)}; - -pos=winien,person=ter,negation=aff,tense=past: ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}; -pos=winien,person=ter,negation=neg,tense=past: ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie}; -pos=winien,person!=ter,negation=aff,tense=past: ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}; -pos=winien,person!=ter,negation=neg,tense=past: ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie}; +pos=fin|bedzie,negation=aff,mood=indicative: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}; +pos=fin|bedzie,negation=neg,mood=indicative: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}{\nie}; +pos=fin,negation=aff,mood=imperative: ip*number*gender*person{/(1+int),schema,|aux-imp}{\(1+qub),/(1+inclusion)}; +pos=fin,negation=neg,mood=imperative: ip*number*gender*person{/(1+int),schema,|aux-imp}{\(1+qub),/(1+inclusion)}{\nie}; +pos=impt|imps,negation=aff: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}; +pos=impt|imps,negation=neg: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}{\nie}; + +pos=pred,negation=aff,tense=pres: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}; +pos=pred,negation=neg,tense=pres: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}{\nie}; +pos=pred,negation=aff,tense=fut: ip*number*gender*person{/(1+int),schema,|aux-fut*number*gender*person}{\(1+qub),/(1+inclusion)}; +pos=pred,negation=neg,tense=fut: ip*number*gender*person{/(1+int),schema,|aux-fut*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie}; +pos=pred,negation=aff,tense=past: ip*number*gender*person{/(1+int),schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}; +pos=pred,negation=neg,tense=past: ip*number*gender*person{/(1+int),schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie}; + +pos=praet|winien,person=ter,negation=aff,mood=indicative: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}; +pos=praet|winien,person=ter,negation=neg,mood=indicative: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}{\nie}; +pos=praet|winien,person!=ter,negation=aff,mood=indicative: ip*number*gender*person{/(1+int),schema,|aglt*number*person}{\(1+qub),/(1+inclusion)}; +pos=praet|winien,person!=ter,negation=neg,mood=indicative: ip*number*gender*person{/(1+int),schema,|aglt*number*person}{\(1+qub),/(1+inclusion)}{\nie}; + +pos=praet|winien,person=ter,negation=aff,mood=conditional: ip*number*gender*person{/(1+int),schema,|by}{\(1+qub),/(1+inclusion)}; +pos=praet|winien,person=ter,negation=neg,mood=conditional: ip*number*gender*person{/(1+int),schema,|by}{\(1+qub),/(1+inclusion)}{\nie}; +pos=praet|winien,person!=ter,negation=aff,mood=conditional: ip*number*gender*person{/(1+int),schema,|aglt*number*person,|by}{\(1+qub),/(1+inclusion)}; +pos=praet|winien,person!=ter,negation=neg,mood=conditional: ip*number*gender*person{/(1+int),schema,|aglt*number*person,|by}{\(1+qub),/(1+inclusion)}{\nie}; + +pos=praet|winien,negation=aff,tense=fut: ip*number*gender*person{/(1+int),schema,|aux-fut*number*gender*person}{\(1+qub),/(1+inclusion)}; + +pos=winien,person=ter,negation=aff,tense=past: ip*number*gender*person{/(1+int),schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}; +pos=winien,person=ter,negation=neg,tense=past: ip*number*gender*person{/(1+int),schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie}; +pos=winien,person!=ter,negation=aff,tense=past: ip*number*gender*person{/(1+int),schema,|aglt*number*person,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}; +pos=winien,person!=ter,negation=neg,tense=past: ip*number*gender*person{/(1+int),schema,|aglt*number*person,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie}; pos=bedzie: aux-fut*number*gender*person; lemma=być,pos=praet: aux-past*number*gender*person; diff --git a/LCGlexicon/test.ml b/LCGlexicon/test.ml index 0c7baf5..f9d7745 100644 --- a/LCGlexicon/test.ml +++ b/LCGlexicon/test.ml @@ -20,8 +20,10 @@ open ENIAM_LCGlexiconTypes open ENIAM_LCGtypes +let rules = ENIAM_LCGlexicon.make_rules "resources/lexicon-pl.dic" + let examples = [ - "kot",[ + (*"kot",[ 1, 0, 1, "","<sentence>","interp", [],false; 2, 1, 2, "","<clause>","interp", [],false; 3, 2, 3, "Ala","Ala","subst", [["sg"];["nom"];["f"]],true; @@ -40,7 +42,18 @@ let examples = [ 7, 4, 5, "kota","kota","subst", [["sg"];["nom"];["f"]],false; 8, 5, 6, "","</clause>","interp", [],false; 9, 6, 7, ".","</sentence>","interp", [],false; - ],7; + ],7;*) + "jaki",[ + 1, 0, 1, "","<sentence>","interp", [],false; + 2, 1, 2, "","<clause>","interp", [],false; + 3, 2, 3, "Jakiego","jaki","adj", [["sg"];["gen";"acc"];["m1";"m2"];["pos"]],false; + 4, 3, 4, "kota","kot","subst", [["sg"];["gen";"acc"];["m1";"m2"]],false; + 5, 4, 5, "Ala","Ala","subst", [["sg"];["nom"];["f"]],true; + 6, 5, 6, "ma","mieć","fin", [["sg"];["ter"];["imperf"]],false; + 7, 6, 7, "?","?","interp", [],false; + 8, 7, 8, "","</clause>","interp", [],false; + 9, 8, 9, ".","</sentence>","interp", [],false; + ],9; ] let valence = [ @@ -49,16 +62,18 @@ let valence = [ Both,Plus[One;Tensor[Atom "np";Top;Atom "acc";Top;Top]]]; [Lemma,Eq,["mieć"];Pos,Eq,["fin"];Negation,Eq,["neg"];Mood,Eq,["indicative"]],[Both,Plus[One;Tensor[Atom "np";AVar "number";Atom "nom";AVar "gender";AVar "person"]]; Both,Plus[One;Tensor[Atom "np";Top;Atom "gen";Top;Top]]]; - [Lemma,Eq,["kot"];Pos,Eq,["subst"]],[]; + [Lemma,Eq,["kot"];Pos,Eq,["subst"]],[Both,Plus[One;Tensor[Atom "adjp";AVar "number";AVar "case";AVar "gender"]]]; [Lemma,Eq,["kota"];Pos,Eq,["subst"]],[]; ] -(** + let create_chart valence tokens last = + ENIAM_LCGrenderer.reset_variable_numbers (); let chart = ENIAM_LCGchart.make last in let chart = Xlist.fold tokens chart (fun chart (id,lnode,rnode,orth,lemma,pos,interp,proper) -> ENIAM_LCGrenderer.reset_variable_names (); + ENIAM_LCGrenderer.add_variable_numbers (); let cats = ENIAMcategoriesPL.clarify_categories proper ["X"] (lemma,pos,interp) in - let l = ENIAM_LCGrendererPL.create_entries id orth cats valence in + let l = ENIAM_LCGlexicon.create_entries rules id orth cats valence in ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in chart @@ -89,6 +104,6 @@ let test_example valence (name,tokens,last) = ()) else print_endline "not reduced") else print_endline "not parsed" - **) -(*let _ = - Xlist.iter examples (test_example valence)*) + +let _ = + Xlist.iter examples (test_example valence) diff --git a/LCGparser/ENIAM_LCGrenderer.ml b/LCGparser/ENIAM_LCGrenderer.ml index 494b049..b0cbc2f 100644 --- a/LCGparser/ENIAM_LCGrenderer.ml +++ b/LCGparser/ENIAM_LCGrenderer.ml @@ -97,6 +97,14 @@ let rec substitute_substvar v g = function let empty_node = { orth=""; lemma=""; pos=""; weight=0.; id=0; symbol=Dot; arg_symbol=Dot; attrs=[]; args=Dot;} +let variable_num_ref = ref 0 + +let reset_variable_numbers () = + variable_num_ref := 0 + +let add_variable_numbers () = + incr variable_num_ref + let variable_name_ref = ref [] let reset_variable_names () = @@ -109,7 +117,7 @@ let rec add_variable_name = function let get_variable_name () = variable_name_ref := add_variable_name (!variable_name_ref); - String.concat "" (List.rev (!variable_name_ref)) + String.concat "" (List.rev (!variable_name_ref)) ^ (string_of_int !variable_num_ref) let make_arg_symbol l = Tuple(Xlist.map l (function diff --git a/LCGparser/test.ml b/LCGparser/test.ml index c588212..a6c2320 100644 --- a/LCGparser/test.ml +++ b/LCGparser/test.ml @@ -24,7 +24,7 @@ type entry = | Raised of grammar_symbol let examples = [ -(* "kot",[ + "kot",[ 0, 1, "Ala","Ala","subst", Basic(Tensor[Atom "np"; Atom "nom"]); 1, 2, "ma","mieć","fin", Basic(ImpSet(Tensor[Atom "ip"],[Both,Tensor[Atom "np"; Atom "nom"];Both,Tensor[Atom "np"; Atom "acc"]])); (* 1, 2, "ma","mieć","fin", Basic(Imp(Imp(Tensor[Atom "ip"],Backward,Tensor[Atom "np"; Atom "nom"]),Forward,Tensor[Atom "np"; Atom "nom"])); *) @@ -39,7 +39,7 @@ let examples = [ 3, 4, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Maybe(Tensor[Atom "adjp"; AVar "case"])]))); (* 3, 4, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Tensor[Atom "adjp"; AVar "case"]]))); *) 4, 5, ".",".","interp", Basic(Imp(Tensor[Atom "<root>"],Backward,Tensor[Atom "ip"])); - ],5;*) + ],5; "jaki",[ 0, 1, "Jakiego","jaki","adj",Raised(WithVar("case",With[Atom "gen"; Atom "acc"],"A",ImpSet(ImpSet(Tensor[Atom "cp"; Atom "int"; Atom "jaki"], @@ -49,16 +49,17 @@ let examples = [ (* 1, 2, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",Imp(Tensor[Atom "np"; AVar "case"],Backward,Maybe(Tensor[Atom "adjp"; AVar "case"])))); *) (* 1, 2, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Tensor[Atom "adjp"; AVar "case"]]))); *) 2, 3, "Ala","Ala","subst", Basic(Tensor[Atom "np"; Atom "nom"]); - 2, 3, "Ala","Ala","subst", Basic(Tensor[Atom "np"; Atom "nom"]); 3, 4, "ma","mieć","fin", Basic(ImpSet(Tensor[Atom "ip"],[Both,Tensor[Atom "np"; Atom "nom"];Both,Tensor[Atom "np"; Atom "acc"]])); 4, 5, "?","?","interp", Basic(Imp(Tensor[Atom "<root>"],Backward,Tensor[Atom "cp";Atom "int";Top])); ],5 ] let create_chart tokens last = + ENIAM_LCGrenderer.reset_variable_numbers (); let chart = ENIAM_LCGchart.make last in let chart = Xlist.fold tokens chart (fun chart (lnode,rnode,orth,lemma,pos,entry) -> ENIAM_LCGrenderer.reset_variable_names (); + ENIAM_LCGrenderer.add_variable_numbers (); let syntax,semantics = match entry with Basic syntax -> let node = {ENIAM_LCGrenderer.empty_node with