Commit ff65689173cf144a2852201c1130b57e55cb863f
1 parent
fe29dfc9
Wydzielenie pojeć projektywnych
Showing
3 changed files
with
46 additions
and
14 deletions
LCGlexicon/resources/lexicon-pl.dic
@@ -70,6 +70,7 @@ pos=subst|depr,nsyn!=pronoun,nsem!=measure: | @@ -70,6 +70,7 @@ pos=subst|depr,nsyn!=pronoun,nsem!=measure: | ||
70 | pos=subst,case=gen,nsyn!=pronoun,nsem!=measure: | 70 | pos=subst,case=gen,nsyn!=pronoun,nsem!=measure: |
71 | QUANT[number=T,case=all_cases,gender=T,person=T] | 71 | QUANT[number=T,case=all_cases,gender=T,person=T] |
72 | np*sg*case*n*person{\num*number*case*gender*person*rec*nsem}{schema}{\(1+qub),/(1+inclusion)}; # UWAGA: number "sg" i gender "n", żeby uzgadniać z podmiotem czasownika | 72 | np*sg*case*n*person{\num*number*case*gender*person*rec*nsem}{schema}{\(1+qub),/(1+inclusion)}; # UWAGA: number "sg" i gender "n", żeby uzgadniać z podmiotem czasownika |
73 | +#FIXME: w poniższych nie powinno być zmiany przypadka | ||
73 | pos=subst,case=gen,nsyn!=pronoun,nsem!=measure: | 74 | pos=subst,case=gen,nsyn!=pronoun,nsem!=measure: |
74 | QUANT[unumber=all_numbers,ucase=gen,ugender=all_genders, uperson=all_persons,case=gen] | 75 | QUANT[unumber=all_numbers,ucase=gen,ugender=all_genders, uperson=all_persons,case=gen] |
75 | np*unumber*ucase*ugender*uperson{\measure*unumber*ucase*ugender*uperson}{schema}{\(1+qub),/(1+inclusion)}; | 76 | np*unumber*ucase*ugender*uperson{\measure*unumber*ucase*ugender*uperson}{schema}{\(1+qub),/(1+inclusion)}; |
@@ -77,7 +78,7 @@ pos=subst,case=gen,nsyn!=pronoun,nsem!=measure: | @@ -77,7 +78,7 @@ pos=subst,case=gen,nsyn!=pronoun,nsem!=measure: | ||
77 | QUANT[unumber=all_numbers,ucase=dat,ugender=all_genders, uperson=all_persons,case=dat] | 78 | QUANT[unumber=all_numbers,ucase=dat,ugender=all_genders, uperson=all_persons,case=dat] |
78 | np*unumber*ucase*ugender*uperson{\measure*unumber*ucase*ugender*uperson}{schema}{\(1+qub),/(1+inclusion)}; | 79 | np*unumber*ucase*ugender*uperson{\measure*unumber*ucase*ugender*uperson}{schema}{\(1+qub),/(1+inclusion)}; |
79 | pos=subst,case=gen,nsyn!=pronoun,nsem!=measure: | 80 | pos=subst,case=gen,nsyn!=pronoun,nsem!=measure: |
80 | - QUANT[unumber=all_numbers,ucase=acc,ugender=all_genders, uperson=all_persons,case=acc] | 81 | + QUANT[unumber=all_numbers,ucase=acc,ugender=all_genders, uperson=all_persons,case=acc] |
81 | np*unumber*ucase*ugender*uperson{\measure*unumber*ucase*ugender*uperson}{schema}{\(1+qub),/(1+inclusion)}; | 82 | np*unumber*ucase*ugender*uperson{\measure*unumber*ucase*ugender*uperson}{schema}{\(1+qub),/(1+inclusion)}; |
82 | pos=subst,case=gen,nsyn!=pronoun,nsem!=measure: | 83 | pos=subst,case=gen,nsyn!=pronoun,nsem!=measure: |
83 | QUANT[unumber=all_numbers,ucase=inst,ugender=all_genders, uperson=all_persons,case=inst] | 84 | QUANT[unumber=all_numbers,ucase=inst,ugender=all_genders, uperson=all_persons,case=inst] |
@@ -227,20 +228,20 @@ pos=pant,negation=neg: padvp{schema}{\(1+qub),/(1+inclusion)}{\nie}; | @@ -227,20 +228,20 @@ pos=pant,negation=neg: padvp{schema}{\(1+qub),/(1+inclusion)}{\nie}; | ||
227 | 228 | ||
228 | pos=comp: QUANT[ctype=sub] cp*ctype*lemma/ip*T*T*T; | 229 | pos=comp: QUANT[ctype=sub] cp*ctype*lemma/ip*T*T*T; |
229 | pos=conj: QUANT[ctype=coord] cp*ctype*lemma/ip*T*T*T; | 230 | pos=conj: QUANT[ctype=coord] cp*ctype*lemma/ip*T*T*T; |
230 | -lemma=i|lub|czy|bądź,pos=conj: | 231 | +lemma=i|oraz|lub|czy|bądź,pos=conj: |
231 | QUANT[number=all_numbers,gender=all_genders,person=all_persons] | 232 | QUANT[number=all_numbers,gender=all_genders,person=all_persons] |
232 | (ip*number*gender*person/ip*T*T*T)\ip*T*T*T; | 233 | (ip*number*gender*person/ip*T*T*T)\ip*T*T*T; |
233 | -lemma=,|i|lub|czy|bądź,pos=conj: (advp*mod/prepnp*T*T)\prepnp*T*T; | ||
234 | -lemma=,|i|lub|czy|bądź,pos=conj: QUANT[mode=0] (advp*mode/advp*mode)\prepnp*T*T; | ||
235 | -lemma=,|i|lub|czy|bądź,pos=conj: QUANT[mode=0] (advp*mode/prepnp*T*T)\advp*mode; | ||
236 | -lemma=,|i|lub|czy|bądź,pos=conj: (advp*mod/advp*T)\advp*T; #FIXME: przydałaby się wersja zachowująca mode | ||
237 | -lemma=,|i|lub|czy|bądź,pos=conj: | 234 | +lemma=,|i|oraz|lub|czy|bądź,pos=conj: (advp*mod/prepnp*T*T)\prepnp*T*T; |
235 | +lemma=,|i|oraz|lub|czy|bądź,pos=conj: QUANT[mode=0] (advp*mode/advp*mode)\prepnp*T*T; | ||
236 | +lemma=,|i|oraz|lub|czy|bądź,pos=conj: QUANT[mode=0] (advp*mode/prepnp*T*T)\advp*mode; | ||
237 | +lemma=,|i|oraz|lub|czy|bądź,pos=conj: (advp*mod/advp*T)\advp*T; #FIXME: przydałaby się wersja zachowująca mode | ||
238 | +lemma=,|i|oraz|lub|czy|bądź,pos=conj: | ||
238 | QUANT[plemma=0,case=all_cases] | 239 | QUANT[plemma=0,case=all_cases] |
239 | (prepnp*plemma*case/prepnp*plemma*case)\prepnp*plemma*case; | 240 | (prepnp*plemma*case/prepnp*plemma*case)\prepnp*plemma*case; |
240 | -lemma=,|i|lub|czy|bądź,pos=conj: | 241 | +lemma=,|i|oraz|lub|czy|bądź,pos=conj: |
241 | QUANT[number=all_numbers,case=all_cases,gender=all_genders,person=all_persons] | 242 | QUANT[number=all_numbers,case=all_cases,gender=all_genders,person=all_persons] |
242 | (np*number*case*gender*person/np*T*case*T*T)\np*T*case*T*T; | 243 | (np*number*case*gender*person/np*T*case*T*T)\np*T*case*T*T; |
243 | -lemma=,|i|lub|czy|bądź,pos=conj: | 244 | +lemma=,|i|oraz|lub|czy|bądź,pos=conj: |
244 | QUANT[number=all_numbers,case=all_cases,gender=all_genders] | 245 | QUANT[number=all_numbers,case=all_cases,gender=all_genders] |
245 | (adjp*number*case*gender/adjp*number*case*gender)\adjp*number*case*gender; | 246 | (adjp*number*case*gender/adjp*number*case*gender)\adjp*number*case*gender; |
246 | 247 |
semantics/ENIAMsemGraph.ml
@@ -46,7 +46,7 @@ let rec make_args_list = function | @@ -46,7 +46,7 @@ let rec make_args_list = function | ||
46 | let symbols = StringSet.of_list [ | 46 | let symbols = StringSet.of_list [ |
47 | "symbol"; "date"; "date-interval"; "hour-minute"; "hour"; "hour-minute-interval"; "hour-interval"; | 47 | "symbol"; "date"; "date-interval"; "hour-minute"; "hour"; "hour-minute-interval"; "hour-interval"; |
48 | "year"; "year-interval"; "day"; "day-interval"; "day-month"; "day-month-interval"; "month-interval"; "roman"; "roman-interval"; | 48 | "year"; "year-interval"; "day"; "day-interval"; "day-month"; "day-month-interval"; "month-interval"; "roman"; "roman-interval"; |
49 | - "match-result"; "url"; "email"; "obj-id"; | 49 | + "match-result"; "url"; "email"; "obj-id"; "building-number"; |
50 | "month-lex"; "day-lex"] | 50 | "month-lex"; "day-lex"] |
51 | 51 | ||
52 | let rec get_person = function | 52 | let rec get_person = function |
@@ -77,8 +77,35 @@ let make_make_triple_relation t c = | @@ -77,8 +77,35 @@ let make_make_triple_relation t c = | ||
77 | | "adjunct" -> MakeTripleRelation(t.arole,t.arole_attr,c) | 77 | | "adjunct" -> MakeTripleRelation(t.arole,t.arole_attr,c) |
78 | | s -> failwith ("make_make_triple_relation: " ^ s) | 78 | | s -> failwith ("make_make_triple_relation: " ^ s) |
79 | 79 | ||
80 | +let rec split_relations = function | ||
81 | + Tuple l -> | ||
82 | + let pl,cl = Xlist.fold l ([],[]) (fun (pl,cl) t -> | ||
83 | + let p,c = split_relations t in | ||
84 | + p :: pl, c :: cl) in | ||
85 | + Tuple(List.rev pl), Tuple(List.rev cl) | ||
86 | + | Variant(i,l) -> | ||
87 | + let pl,cl = Xlist.fold l ([],[]) (fun (pl,cl) (e,t) -> | ||
88 | + let p,c = split_relations t in | ||
89 | + (e,p) :: pl, (e,c) :: cl) in | ||
90 | + Variant(i,List.rev pl), Variant(i,List.rev cl) | ||
91 | + | Dot -> Dot, Dot | ||
92 | + | t -> failwith ("split_relations: " ^ ENIAMsemStringOf.linear_term 0 t) | ||
93 | + | ||
94 | +(* let add_proj proj c = | ||
95 | + if proj = Dot then Concept c else | ||
96 | + Concept{empty_concept with c_cat=proj; c_relations=Tuple[Relation("Has","",Concept{c with c_relations=Dot});c.c_relations]} *) | ||
97 | +let add_proj proj c = | ||
98 | + if proj = Dot then Concept c else | ||
99 | + Concept{empty_concept with c_cat=proj; c_relations=Relation("Has","",Concept c)} | ||
100 | +(* let add_proj proj c = | ||
101 | + if proj = Dot then Concept c else | ||
102 | + let proj_rels,c_rels = split_relations c.c_relations in | ||
103 | + Concept{empty_concept with c_cat=proj; c_relations=Tuple[Relation("Has","",Concept{c with c_relations=c_rels});proj_rels]} *) | ||
104 | + | ||
105 | + | ||
80 | let create_normal_concept (*roles role_attrs*) tokens lex_sems t cat proj = | 106 | let create_normal_concept (*roles role_attrs*) tokens lex_sems t cat proj = |
81 | (*if t.agf = ENIAMwalTypes.NOSEM then t.args else*) | 107 | (*if t.agf = ENIAMwalTypes.NOSEM then t.args else*) |
108 | + let proj = if proj = cat then Dot else proj in | ||
82 | let c = {empty_concept with | 109 | let c = {empty_concept with |
83 | c_sense = (*if t.lemma = "<root>" then Dot else*) (*t.meaning*)Val t.lemma; (* FIXME: zaślepka na potrzeby gramatyk semantycznych *) | 110 | c_sense = (*if t.lemma = "<root>" then Dot else*) (*t.meaning*)Val t.lemma; (* FIXME: zaślepka na potrzeby gramatyk semantycznych *) |
84 | c_relations=t.args; | 111 | c_relations=t.args; |
@@ -86,8 +113,8 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t cat proj = | @@ -86,8 +113,8 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t cat proj = | ||
86 | c_variable=string_of_int t.id,""; | 113 | c_variable=string_of_int t.id,""; |
87 | c_pos=(*if t.id >= Array.length tokens then -1 else*) (ExtArray.get tokens t.id).ENIAMtokenizerTypes.beg; (* FIXME: pro nie mają przydzielonego id *) | 114 | c_pos=(*if t.id >= Array.length tokens then -1 else*) (ExtArray.get tokens t.id).ENIAMtokenizerTypes.beg; (* FIXME: pro nie mają przydzielonego id *) |
88 | c_local_quant=true; | 115 | c_local_quant=true; |
89 | - c_cat=cat; | ||
90 | - c_proj=proj} in | 116 | + c_cat=cat(*; |
117 | + c_proj=proj*)} in | ||
91 | if t.pos = "subst" || t.pos = "depr" || t.pos = "ger" || t.pos = "unk" || StringSet.mem symbols t.pos then (* FIXME: wykrywanie plurale tantum *) | 118 | if t.pos = "subst" || t.pos = "depr" || t.pos = "ger" || t.pos = "unk" || StringSet.mem symbols t.pos then (* FIXME: wykrywanie plurale tantum *) |
92 | let c = {c with c_local_quant=false} in | 119 | let c = {c with c_local_quant=false} in |
93 | let c,measure,cx_flag = Xlist.fold t.attrs (c,false,false) (fun (c,measure,cx_flag) -> function | 120 | let c,measure,cx_flag = Xlist.fold t.attrs (c,false,false) (fun (c,measure,cx_flag) -> function |
@@ -119,9 +146,10 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t cat proj = | @@ -119,9 +146,10 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t cat proj = | ||
119 | if cx_flag then | 146 | if cx_flag then |
120 | let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in | 147 | let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in |
121 | let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in | 148 | let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in |
122 | - make_relation t (Context{empty_context with cx_contents=Concept c; cx_variable=string_of_int id,""; cx_pos=c.c_pos}) | 149 | + make_relation t (Context{empty_context with cx_contents=add_proj proj c; cx_variable=string_of_int id,""; cx_pos=c.c_pos}) |
123 | else | 150 | else |
124 | - make_relation t (Concept c) else | 151 | + make_relation t (add_proj proj c) else |
152 | + if proj <> Dot then failwith ("create_normal_concept proj: " ^ t.lemma) else | ||
125 | if t.pos = "fin" || t.pos = "bedzie" || t.pos = "praet" || t.pos = "winien" || t.pos = "impt" || t.pos = "imps" || t.pos = "pred" || t.lemma = "pro-komunikować" then | 153 | if t.pos = "fin" || t.pos = "bedzie" || t.pos = "praet" || t.pos = "winien" || t.pos = "impt" || t.pos = "imps" || t.pos = "pred" || t.lemma = "pro-komunikować" then |
126 | let c = {c with c_local_quant=false} in | 154 | let c = {c with c_local_quant=false} in |
127 | let c = Xlist.fold t.attrs c (fun c -> function | 155 | let c = Xlist.fold t.attrs c (fun c -> function |
semantics/ENIAMsemLexicon.ml
@@ -71,6 +71,9 @@ let parse_role p = function | @@ -71,6 +71,9 @@ let parse_role p = function | ||
71 | | "Count" -> {p with role="Count"; sel_prefs=[SynsetName "ALL"]} | 71 | | "Count" -> {p with role="Count"; sel_prefs=[SynsetName "ALL"]} |
72 | | "Measure" -> {p with role="Measure"; sel_prefs=[SynsetName "ALL"]} | 72 | | "Measure" -> {p with role="Measure"; sel_prefs=[SynsetName "ALL"]} |
73 | | "Apoz" -> {p with role="Apoz"; sel_prefs=[SynsetName "ALL"]} | 73 | | "Apoz" -> {p with role="Apoz"; sel_prefs=[SynsetName "ALL"]} |
74 | + | "Has" -> {p with role="Has"; sel_prefs=[SynsetName "ALL"]} | ||
75 | + | "PHas" -> {p with role="PHas"; sel_prefs=[SynsetName "ALL"]} | ||
76 | + | "PApoz" -> {p with role="PApoz"; sel_prefs=[SynsetName "ALL"]} | ||
74 | | s -> failwith ("parse_role: " ^ s) | 77 | | s -> failwith ("parse_role: " ^ s) |
75 | 78 | ||
76 | let parse_entry = function | 79 | let parse_entry = function |