From ff65689173cf144a2852201c1130b57e55cb863f Mon Sep 17 00:00:00 2001 From: Wojciech Jaworski <wjaworski@mimuw.edu.pl> Date: Tue, 5 Dec 2017 11:29:10 +0100 Subject: [PATCH] Wydzielenie pojeć projektywnych --- LCGlexicon/resources/lexicon-pl.dic | 19 ++++++++++--------- semantics/ENIAMsemGraph.ml | 38 +++++++++++++++++++++++++++++++++----- semantics/ENIAMsemLexicon.ml | 3 +++ 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/LCGlexicon/resources/lexicon-pl.dic b/LCGlexicon/resources/lexicon-pl.dic index 12b527d..c621b43 100644 --- a/LCGlexicon/resources/lexicon-pl.dic +++ b/LCGlexicon/resources/lexicon-pl.dic @@ -70,6 +70,7 @@ pos=subst|depr,nsyn!=pronoun,nsem!=measure: pos=subst,case=gen,nsyn!=pronoun,nsem!=measure: QUANT[number=T,case=all_cases,gender=T,person=T] np*sg*case*n*person{\num*number*case*gender*person*rec*nsem}{schema}{\(1+qub),/(1+inclusion)}; # UWAGA: number "sg" i gender "n", żeby uzgadniać z podmiotem czasownika +#FIXME: w poniższych nie powinno być zmiany przypadka pos=subst,case=gen,nsyn!=pronoun,nsem!=measure: QUANT[unumber=all_numbers,ucase=gen,ugender=all_genders, uperson=all_persons,case=gen] np*unumber*ucase*ugender*uperson{\measure*unumber*ucase*ugender*uperson}{schema}{\(1+qub),/(1+inclusion)}; @@ -77,7 +78,7 @@ pos=subst,case=gen,nsyn!=pronoun,nsem!=measure: QUANT[unumber=all_numbers,ucase=dat,ugender=all_genders, uperson=all_persons,case=dat] np*unumber*ucase*ugender*uperson{\measure*unumber*ucase*ugender*uperson}{schema}{\(1+qub),/(1+inclusion)}; pos=subst,case=gen,nsyn!=pronoun,nsem!=measure: - QUANT[unumber=all_numbers,ucase=acc,ugender=all_genders, uperson=all_persons,case=acc] + QUANT[unumber=all_numbers,ucase=acc,ugender=all_genders, uperson=all_persons,case=acc] np*unumber*ucase*ugender*uperson{\measure*unumber*ucase*ugender*uperson}{schema}{\(1+qub),/(1+inclusion)}; pos=subst,case=gen,nsyn!=pronoun,nsem!=measure: QUANT[unumber=all_numbers,ucase=inst,ugender=all_genders, uperson=all_persons,case=inst] @@ -227,20 +228,20 @@ pos=pant,negation=neg: padvp{schema}{\(1+qub),/(1+inclusion)}{\nie}; pos=comp: QUANT[ctype=sub] cp*ctype*lemma/ip*T*T*T; pos=conj: QUANT[ctype=coord] cp*ctype*lemma/ip*T*T*T; -lemma=i|lub|czy|bądź,pos=conj: +lemma=i|oraz|lub|czy|bądź,pos=conj: QUANT[number=all_numbers,gender=all_genders,person=all_persons] (ip*number*gender*person/ip*T*T*T)\ip*T*T*T; -lemma=,|i|lub|czy|bądź,pos=conj: (advp*mod/prepnp*T*T)\prepnp*T*T; -lemma=,|i|lub|czy|bądź,pos=conj: QUANT[mode=0] (advp*mode/advp*mode)\prepnp*T*T; -lemma=,|i|lub|czy|bądź,pos=conj: QUANT[mode=0] (advp*mode/prepnp*T*T)\advp*mode; -lemma=,|i|lub|czy|bądź,pos=conj: (advp*mod/advp*T)\advp*T; #FIXME: przydałaby się wersja zachowująca mode -lemma=,|i|lub|czy|bądź,pos=conj: +lemma=,|i|oraz|lub|czy|bądź,pos=conj: (advp*mod/prepnp*T*T)\prepnp*T*T; +lemma=,|i|oraz|lub|czy|bądź,pos=conj: QUANT[mode=0] (advp*mode/advp*mode)\prepnp*T*T; +lemma=,|i|oraz|lub|czy|bądź,pos=conj: QUANT[mode=0] (advp*mode/prepnp*T*T)\advp*mode; +lemma=,|i|oraz|lub|czy|bądź,pos=conj: (advp*mod/advp*T)\advp*T; #FIXME: przydałaby się wersja zachowująca mode +lemma=,|i|oraz|lub|czy|bądź,pos=conj: QUANT[plemma=0,case=all_cases] (prepnp*plemma*case/prepnp*plemma*case)\prepnp*plemma*case; -lemma=,|i|lub|czy|bądź,pos=conj: +lemma=,|i|oraz|lub|czy|bądź,pos=conj: QUANT[number=all_numbers,case=all_cases,gender=all_genders,person=all_persons] (np*number*case*gender*person/np*T*case*T*T)\np*T*case*T*T; -lemma=,|i|lub|czy|bądź,pos=conj: +lemma=,|i|oraz|lub|czy|bądź,pos=conj: QUANT[number=all_numbers,case=all_cases,gender=all_genders] (adjp*number*case*gender/adjp*number*case*gender)\adjp*number*case*gender; diff --git a/semantics/ENIAMsemGraph.ml b/semantics/ENIAMsemGraph.ml index a415163..2c8df1e 100644 --- a/semantics/ENIAMsemGraph.ml +++ b/semantics/ENIAMsemGraph.ml @@ -46,7 +46,7 @@ let rec make_args_list = function let symbols = StringSet.of_list [ "symbol"; "date"; "date-interval"; "hour-minute"; "hour"; "hour-minute-interval"; "hour-interval"; "year"; "year-interval"; "day"; "day-interval"; "day-month"; "day-month-interval"; "month-interval"; "roman"; "roman-interval"; - "match-result"; "url"; "email"; "obj-id"; + "match-result"; "url"; "email"; "obj-id"; "building-number"; "month-lex"; "day-lex"] let rec get_person = function @@ -77,8 +77,35 @@ let make_make_triple_relation t c = | "adjunct" -> MakeTripleRelation(t.arole,t.arole_attr,c) | s -> failwith ("make_make_triple_relation: " ^ s) +let rec split_relations = function + Tuple l -> + let pl,cl = Xlist.fold l ([],[]) (fun (pl,cl) t -> + let p,c = split_relations t in + p :: pl, c :: cl) in + Tuple(List.rev pl), Tuple(List.rev cl) + | Variant(i,l) -> + let pl,cl = Xlist.fold l ([],[]) (fun (pl,cl) (e,t) -> + let p,c = split_relations t in + (e,p) :: pl, (e,c) :: cl) in + Variant(i,List.rev pl), Variant(i,List.rev cl) + | Dot -> Dot, Dot + | t -> failwith ("split_relations: " ^ ENIAMsemStringOf.linear_term 0 t) + +(* let add_proj proj c = + if proj = Dot then Concept c else + Concept{empty_concept with c_cat=proj; c_relations=Tuple[Relation("Has","",Concept{c with c_relations=Dot});c.c_relations]} *) +let add_proj proj c = + if proj = Dot then Concept c else + Concept{empty_concept with c_cat=proj; c_relations=Relation("Has","",Concept c)} +(* let add_proj proj c = + if proj = Dot then Concept c else + let proj_rels,c_rels = split_relations c.c_relations in + Concept{empty_concept with c_cat=proj; c_relations=Tuple[Relation("Has","",Concept{c with c_relations=c_rels});proj_rels]} *) + + let create_normal_concept (*roles role_attrs*) tokens lex_sems t cat proj = (*if t.agf = ENIAMwalTypes.NOSEM then t.args else*) + let proj = if proj = cat then Dot else proj in let c = {empty_concept with c_sense = (*if t.lemma = "<root>" then Dot else*) (*t.meaning*)Val t.lemma; (* FIXME: zaślepka na potrzeby gramatyk semantycznych *) c_relations=t.args; @@ -86,8 +113,8 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t cat proj = c_variable=string_of_int t.id,""; c_pos=(*if t.id >= Array.length tokens then -1 else*) (ExtArray.get tokens t.id).ENIAMtokenizerTypes.beg; (* FIXME: pro nie mają przydzielonego id *) c_local_quant=true; - c_cat=cat; - c_proj=proj} in + c_cat=cat(*; + c_proj=proj*)} in if t.pos = "subst" || t.pos = "depr" || t.pos = "ger" || t.pos = "unk" || StringSet.mem symbols t.pos then (* FIXME: wykrywanie plurale tantum *) let c = {c with c_local_quant=false} in let c,measure,cx_flag = Xlist.fold t.attrs (c,false,false) (fun (c,measure,cx_flag) -> function @@ -119,9 +146,10 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t cat proj = if cx_flag then let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in - make_relation t (Context{empty_context with cx_contents=Concept c; cx_variable=string_of_int id,""; cx_pos=c.c_pos}) + make_relation t (Context{empty_context with cx_contents=add_proj proj c; cx_variable=string_of_int id,""; cx_pos=c.c_pos}) else - make_relation t (Concept c) else + make_relation t (add_proj proj c) else + if proj <> Dot then failwith ("create_normal_concept proj: " ^ t.lemma) else if t.pos = "fin" || t.pos = "bedzie" || t.pos = "praet" || t.pos = "winien" || t.pos = "impt" || t.pos = "imps" || t.pos = "pred" || t.lemma = "pro-komunikować" then let c = {c with c_local_quant=false} in let c = Xlist.fold t.attrs c (fun c -> function diff --git a/semantics/ENIAMsemLexicon.ml b/semantics/ENIAMsemLexicon.ml index 936488a..3f8e313 100644 --- a/semantics/ENIAMsemLexicon.ml +++ b/semantics/ENIAMsemLexicon.ml @@ -71,6 +71,9 @@ let parse_role p = function | "Count" -> {p with role="Count"; sel_prefs=[SynsetName "ALL"]} | "Measure" -> {p with role="Measure"; sel_prefs=[SynsetName "ALL"]} | "Apoz" -> {p with role="Apoz"; sel_prefs=[SynsetName "ALL"]} + | "Has" -> {p with role="Has"; sel_prefs=[SynsetName "ALL"]} + | "PHas" -> {p with role="PHas"; sel_prefs=[SynsetName "ALL"]} + | "PApoz" -> {p with role="PApoz"; sel_prefs=[SynsetName "ALL"]} | s -> failwith ("parse_role: " ^ s) let parse_entry = function -- libgit2 0.22.2