Commit f96aa7383ee8237b68a1ad301802dae6169713cc
1 parent
394fa219
Dezambiguacja preferencji selekcyjnych
Showing
10 changed files
with
371 additions
and
84 deletions
exec/ENIAMdisambiguation.ml
... | ... | @@ -50,8 +50,8 @@ let rec select_random_rec selection = function |
50 | 50 | | t -> failwith ("select_random_rec: " ^ ENIAM_LCGstringOf.linear_term 0 t) |
51 | 51 | |
52 | 52 | let select_random tree = |
53 | - Int.fold 0 (Array.length tree - 1) StringMap.empty (fun selection i -> | |
54 | - select_random_rec selection tree.(i)) | |
53 | + Int.fold 0 (ExtArray.size tree - 1) StringMap.empty (fun selection i -> | |
54 | + select_random_rec selection (ExtArray.get tree i)) | |
55 | 55 | |
56 | 56 | let rec apply_selection_rec selection = function |
57 | 57 | Ref i -> Ref i |
... | ... | @@ -119,5 +119,76 @@ let rearrange_tree tree = |
119 | 119 | let random_tree tokens lex_sems tree = |
120 | 120 | (* print_endline "random_tree"; *) |
121 | 121 | let selection = select_random tree in |
122 | - let tree = apply_selection selection tree in | |
122 | + let tree = apply_selection selection (ExtArray.to_array tree) in | |
123 | 123 | rearrange_tree tree |
124 | + | |
125 | +let rec selprefs_rec cost = function | |
126 | + Ref i -> cost.(i), Ref i | |
127 | + | Node t -> -1, Node{t with args = snd(selprefs_rec cost t.args)} | |
128 | + | Variant(e,l) -> | |
129 | + let c,l = Xlist.fold l (max_int,[]) (fun (min_c,l) (i,t) -> | |
130 | + let c,t = selprefs_rec cost t in | |
131 | + if c < min_c then c,[i,t] else | |
132 | + if c > min_c then min_c,l else | |
133 | + min_c, (i,t) :: l) in | |
134 | + (match l with | |
135 | + [_,t] -> c,t | |
136 | + | _ -> c,Variant(e,List.rev l)) | |
137 | + | Tuple l -> | |
138 | + let c,l = Xlist.fold l (0,[]) (fun (c,l) t -> | |
139 | + let c2,t = selprefs_rec cost t in | |
140 | + c+c2, t :: l) in | |
141 | + c,Tuple(List.rev l) | |
142 | + | Dot -> 0, Dot | |
143 | + | t -> failwith ("selprefs_rec: " ^ ENIAM_LCGstringOf.linear_term 0 t) | |
144 | + | |
145 | +let rec get_attr pat = function | |
146 | + [] -> raise Not_found | |
147 | + | (s,v) :: l -> | |
148 | + if s = pat then v | |
149 | + else get_attr pat l | |
150 | + | |
151 | +let rec list_of_selprefs = function | |
152 | + Val s -> [s] | |
153 | + | Dot -> [] | |
154 | + | t -> failwith ("list_of_selprefs: " ^ ENIAM_LCGstringOf.linear_term 0 t) | |
155 | + | |
156 | +let map_of_hipero = function | |
157 | + Variant(_,l) -> Xlist.fold l StringMap.empty (fun map -> function | |
158 | + _,Tuple[Val hipero; Val cost] -> StringMap.add_inc map hipero (int_of_string cost) (fun _ -> failwith "map_of_hipero 1") | |
159 | + | _ -> failwith "map_of_hipero 2") | |
160 | + | Tuple[Val hipero; Val cost] -> StringMap.add StringMap.empty hipero (int_of_string cost) | |
161 | + | t -> failwith ("map_of_hipero: " ^ ENIAM_LCGstringOf.linear_term 0 t) | |
162 | + | |
163 | +let rec count_selprefs_cost tree cost = function | |
164 | + Ref i -> | |
165 | + if cost.(i) = -1 then | |
166 | + let c = count_selprefs_cost tree cost (ExtArray.get tree i) in | |
167 | + cost.(i) <- c; | |
168 | + c | |
169 | + else cost.(i) | |
170 | + | Node t -> | |
171 | + (count_selprefs_cost tree cost t.args) + | |
172 | + (match try get_attr "gf" t.attrs with Not_found -> Val "" with | |
173 | + Val "adjunct" -> 100 | |
174 | + | Val "subj" | Val "obj" | Val "arg" | Val "core" -> | |
175 | + let selprefs = try list_of_selprefs (get_attr "selprefs" t.attrs) with Not_found -> failwith "count_selprefs_cost: no selprefs" in | |
176 | + let hipero = try map_of_hipero (get_attr "hipero" t.attrs) with Not_found -> failwith "count_selprefs_cost: no hipero" in | |
177 | + Xlist.fold selprefs 1000 (fun cost selpref -> | |
178 | + try min cost (StringMap.find hipero selpref) with Not_found -> cost) | |
179 | + | Val "" -> 200 | |
180 | + | Val s -> failwith ("count_selprefs_cost: unknown gf=" ^ s ^ " for " ^ t.lemma) | |
181 | + | _ -> failwith "count_selprefs_cost") | |
182 | + | Variant(e,l) -> | |
183 | + Xlist.fold l max_int (fun min_c (_,t) -> | |
184 | + min min_c (count_selprefs_cost tree cost t)) | |
185 | + | Tuple l -> Xlist.fold l 0 (fun c t -> c + count_selprefs_cost tree cost t) | |
186 | + | Dot -> 0 | |
187 | + | t -> failwith ("count_selprefs_cost: " ^ ENIAM_LCGstringOf.linear_term 0 t) | |
188 | + | |
189 | +let selprefs tree = | |
190 | + let cost = Array.make (ExtArray.size tree) (-1) in | |
191 | + cost.(0) <- count_selprefs_cost tree cost (ExtArray.get tree 0); | |
192 | + Int.iter 0 (ExtArray.size tree - 1) (fun i -> | |
193 | + ExtArray.set tree i (snd (selprefs_rec cost (ExtArray.get tree i)))); | |
194 | + () | |
... | ... |
exec/ENIAMexec.ml
... | ... | @@ -392,11 +392,19 @@ let eniam_semantic_processing verbosity tokens lex_sems result = |
392 | 392 | let tree = ENIAMsemValence.reduce_tree tokens lex_sems tree in |
393 | 393 | let result = if verbosity < 2 then result else {result with dependency_tree8=tree} in |
394 | 394 | tree,result |
395 | - with e -> [| |],{result with status=SemError2; msg=Printexc.to_string e} in | |
395 | + with e -> ExtArray.make 0 Dot,{result with status=SemError2; msg=Printexc.to_string e} in | |
396 | + if result.status = SemError2 then result else | |
397 | + let result = | |
398 | + try | |
399 | + ENIAMsemValence.transfer_attributes tree; (* niejawna zmiana imperatywna w tree *) | |
400 | + result | |
401 | + with e -> {result with status=SemError2; msg=Printexc.to_string e} in | |
396 | 402 | if result.status = SemError2 then result else |
397 | 403 | let tree,result = |
398 | 404 | try |
399 | - let tree = ENIAMdisambiguation.random_tree tokens lex_sems tree in | |
405 | + ENIAMdisambiguation.selprefs tree; (* niejawna zmiana imperatywna w tree *) | |
406 | + (* let tree = ENIAMdisambiguation.random_tree tokens lex_sems tree in *) | |
407 | + let tree = ExtArray.to_array tree in | |
400 | 408 | let result = if verbosity = 0 then result else {result with dependency_tree9=tree} in |
401 | 409 | tree,result |
402 | 410 | with e -> [| |],{result with status=SemError2; msg=Printexc.to_string e} in |
... | ... | @@ -406,6 +414,9 @@ let eniam_semantic_processing verbosity tokens lex_sems result = |
406 | 414 | let graph = ENIAMsemGraph.translate tokens lex_sems tree in |
407 | 415 | let result = if verbosity = 0 then result else {result with semantic_graph10=graph} in |
408 | 416 | let graph = ENIAMsemGraph.make_tree graph in |
417 | + let graph = ENIAMsemGraph.simplify_tree graph in | |
418 | +(* let graph = ENIAMsemGraph.manage_quantification graph in *) | |
419 | + (* let graph = ENIAMsemGraph.simplify_gender graph in *) | |
409 | 420 | let result = if verbosity = 0 then result else {result with semantic_graph11=graph} in |
410 | 421 | graph,result |
411 | 422 | with e -> ENIAMsemTypes.Dot,{result with status=SemError2; msg=Printexc.to_string e} in |
... | ... |
exec/ENIAMexecTypes.ml
... | ... | @@ -43,7 +43,7 @@ type eniam_parse_result = { |
43 | 43 | dependency_tree5: linear_term array; |
44 | 44 | dependency_tree6: linear_term array; |
45 | 45 | dependency_tree7: linear_term array; |
46 | - dependency_tree8: linear_term array; | |
46 | + dependency_tree8: linear_term ExtArray.t; | |
47 | 47 | dependency_tree9: linear_term array; |
48 | 48 | semantic_graph10: ENIAMsemTypes.linear_term array; |
49 | 49 | semantic_graph11: ENIAMsemTypes.linear_term; |
... | ... | @@ -183,7 +183,7 @@ let empty_eniam_parse_result = { |
183 | 183 | dependency_tree5=[| |]; |
184 | 184 | dependency_tree6=[| |]; |
185 | 185 | dependency_tree7=[| |]; |
186 | - dependency_tree8=[| |]; | |
186 | + dependency_tree8=ExtArray.make 0 Dot; | |
187 | 187 | dependency_tree9=[| |]; |
188 | 188 | semantic_graph10=[| |]; |
189 | 189 | semantic_graph11=ENIAMsemTypes.Dot; |
... | ... |
exec/ENIAMsemGraph.ml
... | ... | @@ -69,20 +69,12 @@ let make_relation t c = |
69 | 69 | Relation(t.role,t.role_attr,c) |
70 | 70 | | s -> failwith ("make_relation: " (*^ s*)) |
71 | 71 | |
72 | -let create_normal_concept (*roles role_attrs*) tokens lex_sems t sem_args = | |
73 | -(* let sem_args = if t.pos = "pro" then | |
74 | - match get_person t.attrs with | |
75 | - "pri" -> ["indexical"] | |
76 | - | "sec" -> ["indexical"] | |
77 | - | "ter" -> ["coreferential";"deictic"] | |
78 | - | "" -> ["indexical";"coreferential";"deictic"] | |
79 | - | _ -> failwith "create_normal_concept: pro" | |
80 | - else sem_args in (* FIXME: przesunąć to do rozszerzania path_array *) | |
81 | - if t.agf = ENIAMwalTypes.NOSEM then t.args else*) | |
72 | +let create_normal_concept (*roles role_attrs*) tokens lex_sems t = | |
73 | + (*if t.agf = ENIAMwalTypes.NOSEM then t.args else*) | |
82 | 74 | let c = {empty_concept with |
83 | 75 | c_sense = if t.lemma = "<root>" then Dot else Val t.meaning; |
84 | 76 | c_relations=t.args; |
85 | - c_quant=(*make_sem_args sem_args*)Dot;(* FIXME *) | |
77 | + c_quant=(*make_sem_args*) t.sem_args; | |
86 | 78 | c_variable=string_of_int t.id,""; |
87 | 79 | c_pos=(*if t.id >= Array.length tokens then -1 else*) (ExtArray.get tokens t.id).ENIAMtokenizerTypes.beg; |
88 | 80 | c_local_quant=true} in |
... | ... | @@ -145,7 +137,8 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t sem_args = |
145 | 137 | let c = Xlist.fold t.attrs c (fun c -> function |
146 | 138 | | "ASPECT",_ -> c |
147 | 139 | | "TENSE",Val t -> {c with c_relations=Tuple[c.c_relations;SingleRelation t]} |
148 | - | "NEG",Val "+" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} | |
140 | + | "NEGATION",Val "aff" -> c | |
141 | + | "NEGATION",Val "neg" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} | |
149 | 142 | | e,t -> failwith ("create_normal_concept verb: " ^ e)) in |
150 | 143 | let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in |
151 | 144 | let _ = ExtArray.add lex_sems in |
... | ... | @@ -181,7 +174,8 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t sem_args = |
181 | 174 | | "ASPECT",_ -> c |
182 | 175 | (* | "TYPE",Val "int" -> {c with c_quant=Tuple[c.c_quant;Val "interrogative"]} *) |
183 | 176 | | "TYPE",_ -> c |
184 | - | "NEG",Val "+" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} | |
177 | + | "NEGATION",Val "aff" -> c | |
178 | + | "NEGATION",Val "neg" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} | |
185 | 179 | | e,t -> failwith ("create_normal_concept adv: " ^ e)) in |
186 | 180 | Relation(t.role,t.role_attr,Concept c) else |
187 | 181 | if t.pos = "pro" || t.pos = "ppron12" || t.pos = "ppron3" || t.pos = "siebie" then (* FIXME: indexicalność *) |
... | ... | @@ -193,6 +187,8 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t sem_args = |
193 | 187 | | "CASE",_ -> c |
194 | 188 | | "SYN",_ -> c |
195 | 189 | | "NSEM",_ -> c |
190 | + | "controller",_ -> c | |
191 | + | "coref",_ -> c | |
196 | 192 | | e,t -> failwith ("create_normal_concept pron: " ^ e)) in |
197 | 193 | Relation(t.role,t.role_attr,Concept c) else |
198 | 194 | if t.pos = "prep" then |
... | ... | @@ -239,11 +235,12 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t sem_args = |
239 | 235 | let c = Xlist.fold t.attrs c (fun c -> function |
240 | 236 | | e,t -> failwith ("create_normal_concept sinterj: " ^ e)) in |
241 | 237 | Concept c else |
238 | + if t.lemma = "<root>" then t.args else | |
242 | 239 | if t.pos = "interp" && t.lemma = "</sentence>" then |
243 | 240 | let l = List.rev (make_args_list t.args) in |
244 | - Xlist.fold (List.tl l) (List.hd l) (fun t s -> AddRelation(RemoveRelation t,"Next","Clause",RemoveRelation s)) else | |
241 | + Xlist.fold (List.tl l) (RemoveRelation(List.hd l)) (fun t s -> AddRelation(t,"Next","Clause",RemoveRelation s)) else | |
245 | 242 | if t.pos = "interp" && t.lemma = "<sentence>" then t.args else |
246 | - if t.pos = "interp" && t.lemma = "”s" then | |
243 | +(* if t.pos = "interp" && t.lemma = "”s" then | |
247 | 244 | let l = List.rev (make_args_list t.args) in |
248 | 245 | let x = Xlist.fold (List.tl l) (List.hd l) (fun t s -> AddRelation(RemoveRelation t,"Next","Sentence",RemoveRelation s)) in |
249 | 246 | Relation(t.arole,t.arole_attr,x) else (* FIXME: czy na pewno tu i w następnych arole a nie position.role? *) |
... | ... | @@ -267,9 +264,9 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t sem_args = |
267 | 264 | if t.pos = "interp" && t.lemma = "?" then SingleRelation("int") else |
268 | 265 | if t.pos = "interp" && t.lemma = "„" then |
269 | 266 | Relation(t.role,t.role_attr,RemoveRelation t.args) else |
270 | - if t.pos = "interp" || t.lemma = "</or-sentence>" then Relation(t.role,t.role_attr,t.args) else ( | |
267 | + if t.pos = "interp" || t.lemma = "</or-sentence>" then Relation(t.role,t.role_attr,t.args) else*) ( | |
271 | 268 | if t.pos = "interp" then Node t else |
272 | - if t.pos = "" then Relation(t.role,t.role_attr,t.args) else | |
269 | + (*if t.pos = "" then Relation(t.role,t.role_attr,t.args) else*) | |
273 | 270 | (* print_endline t.lemma; *) |
274 | 271 | Node t) |
275 | 272 | |
... | ... | @@ -293,6 +290,7 @@ let rec translate_node tokens lex_sems t = |
293 | 290 | | "arole-attr",Val s -> {t with arole_attr=s},attrs |
294 | 291 | | "arev",Val "-" -> {t with arev=false},attrs |
295 | 292 | | "arev",Val "+" -> {t with arev=true},attrs |
293 | + | "agf",Val s -> t,attrs | |
296 | 294 | | "sem-args",s -> {t with sem_args=s},attrs |
297 | 295 | | "fopinion",_ -> t,attrs |
298 | 296 | | "sopinion",_ -> t,attrs |
... | ... | @@ -309,6 +307,8 @@ let rec translate_node tokens lex_sems t = |
309 | 307 | | "MOOD",s -> t,("MOOD",s) :: attrs |
310 | 308 | | "TENSE",s -> t,("TENSE",s) :: attrs |
311 | 309 | | "controller",s -> t,("controller",s) :: attrs |
310 | + | "controllee",s -> t,("controllee",s) :: attrs | |
311 | + | "coref",s -> t,("coref",s) :: attrs | |
312 | 312 | | "CAT",_ -> t,attrs |
313 | 313 | | "NUM",s -> t,("NUM",s) :: attrs |
314 | 314 | | "CASE",s -> t,("CASE",s) :: attrs |
... | ... | @@ -329,7 +329,7 @@ let rec translate_node tokens lex_sems t = |
329 | 329 | and create_concepts tokens lex_sems = function |
330 | 330 | ENIAM_LCGtypes.Node t -> |
331 | 331 | let t = translate_node tokens lex_sems t in |
332 | - create_normal_concept tokens lex_sems t [] | |
332 | + create_normal_concept tokens lex_sems t | |
333 | 333 | | ENIAM_LCGtypes.Tuple l -> Tuple(Xlist.map l (create_concepts tokens lex_sems)) |
334 | 334 | | ENIAM_LCGtypes.Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, create_concepts tokens lex_sems t)) |
335 | 335 | | ENIAM_LCGtypes.Dot -> Dot |
... | ... | @@ -365,11 +365,11 @@ let rec make_tree_rec references = function |
365 | 365 | (* | t -> failwith ("make_tree_rec: " ^ LCGstringOf.linear_term 0 t) *) |
366 | 366 | |
367 | 367 | let make_tree references = |
368 | - RemoveRelation(make_tree_rec references references.(0)) | |
369 | -(* | |
368 | + (*RemoveRelation*)(make_tree_rec references references.(0)) | |
369 | + | |
370 | 370 | let rec simplify_tree_add_relation r a s = function |
371 | - Concept c -> Concept{c with c_relations=Tuple[Relation(Val r,Val a,s);c.c_relations]} | |
372 | - | Context c -> Context{c with cx_relations=Tuple[Relation(Val r,Val a,s);c.cx_relations]} | |
371 | + Concept c -> Concept{c with c_relations=Tuple[Relation(r,a,s);c.c_relations]} | |
372 | + | Context c -> Context{c with cx_relations=Tuple[Relation(r,a,s);c.cx_relations]} | |
373 | 373 | | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, simplify_tree_add_relation r a s t)) |
374 | 374 | | t -> AddRelation(t,r,a,s) |
375 | 375 | |
... | ... | @@ -465,13 +465,13 @@ let rec simplify_tree = function |
465 | 465 | (* Variant(e,Xlist.map l (fun (i,t) -> i, simplify_tree t)) *) |
466 | 466 | | Dot -> Dot |
467 | 467 | | Val s -> Val s |
468 | - | t -> failwith ("simplify_tree: " ^ LCGstringOf.linear_term 0 t) | |
468 | + | t -> failwith ("simplify_tree: " ^ ENIAMsemStringOf.linear_term 0 t) | |
469 | 469 | |
470 | 470 | let rec manage_quantification2 (quants,quant) = function |
471 | 471 | Tuple l -> Xlist.fold l (quants,quant) manage_quantification2 |
472 | 472 | | Dot -> quants,quant |
473 | 473 | | Val s -> quants,Tuple[Val s;quant] |
474 | - | t -> (Relation(Val "Quantifier",Val "",t)) :: quants,quant | |
474 | + | t -> (Relation("Quantifier","",t)) :: quants,quant | |
475 | 475 | |
476 | 476 | let rec manage_quantification = function |
477 | 477 | Node t -> Node{t with args=manage_quantification t.args} |
... | ... | @@ -488,9 +488,9 @@ let rec manage_quantification = function |
488 | 488 | | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, manage_quantification t)) |
489 | 489 | | Dot -> Dot |
490 | 490 | | Val s -> Val s |
491 | - | t -> failwith ("manage_quantification: " ^ LCGstringOf.linear_term 0 t) | |
491 | + | t -> failwith ("manage_quantification: " ^ ENIAMsemStringOf.linear_term 0 t) | |
492 | 492 | |
493 | -let simplify_gender2 = function | |
493 | +(*let simplify_gender2 = function | |
494 | 494 | Variant(e,l) -> |
495 | 495 | (try |
496 | 496 | let l2 = List.sort compare (Xlist.rev_map l (function (_,Val s) -> s | _ -> raise Not_found)) in |
... | ... | @@ -523,30 +523,30 @@ let rec simplify_gender = function |
523 | 523 | | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, simplify_gender t)) |
524 | 524 | | Dot -> Dot |
525 | 525 | | Val s -> Val s |
526 | - | t -> failwith ("simplify_gender: " ^ LCGstringOf.linear_term 0 t) | |
526 | + | t -> failwith ("simplify_gender: " ^ ENIAMsemStringOf.linear_term 0 t)*) | |
527 | 527 | |
528 | 528 | (***************************************************************************************) |
529 | - | |
529 | +(* | |
530 | 530 | let rec validate_semantics_quant = function |
531 | 531 | Val _ -> true |
532 | 532 | | Variant(e,l) -> Xlist.fold l true (fun b (_,t) -> b && validate_semantics_quant t) |
533 | 533 | | Tuple l -> Xlist.fold l true (fun b t -> b && validate_semantics_quant t) |
534 | 534 | | Dot -> true |
535 | - | t -> (*print_endline ("validate_semantics_quant: " ^ LCGstringOf.linear_term 0 t);*) false | |
535 | + | t -> (*print_endline ("validate_semantics_quant: " ^ ENIAMsemStringOf.linear_term 0 t);*) false | |
536 | 536 | |
537 | 537 | let rec validate_semantics_sense = function |
538 | 538 | Val _ -> true |
539 | 539 | | Dot -> true |
540 | - | t -> (*print_endline ("validate_semantics_sense: " ^ LCGstringOf.linear_term 0 t);*) false | |
540 | + | t -> (*print_endline ("validate_semantics_sense: " ^ ENIAMsemStringOf.linear_term 0 t);*) false | |
541 | 541 | |
542 | 542 | let rec validate_semantics_rel_name = function |
543 | 543 | Val _ -> true |
544 | - | t -> (*print_endline ("validate_semantics_rel_name: " ^ LCGstringOf.linear_term 0 t);*) false | |
544 | + | t -> (*print_endline ("validate_semantics_rel_name: " ^ ENIAMsemStringOf.linear_term 0 t);*) false | |
545 | 545 | |
546 | 546 | let rec validate_semantics = function |
547 | 547 | Context c -> validate_semantics_sense c.cx_sense && validate_semantics_contents c.cx_contents && validate_semantics_relations c.cx_relations |
548 | 548 | | Variant(e,l) -> Xlist.fold l true (fun b (_,t) -> b && validate_semantics t) |
549 | - | t -> (*print_endline ("validate_semantics: " ^ LCGstringOf.linear_term 0 t);*) false | |
549 | + | t -> (*print_endline ("validate_semantics: " ^ ENIAMsemStringOf.linear_term 0 t);*) false | |
550 | 550 | |
551 | 551 | and validate_semantics_relations = function |
552 | 552 | SingleRelation r -> validate_semantics_rel_name r |
... | ... | @@ -555,20 +555,20 @@ and validate_semantics_relations = function |
555 | 555 | | Variant(e,l) -> Xlist.fold l true (fun b (_,t) -> b && validate_semantics_relations t) |
556 | 556 | | Tuple l -> Xlist.fold l true (fun b t -> b && validate_semantics_relations t) |
557 | 557 | | Dot -> true |
558 | - | t -> (*print_endline ("validate_semantics_relations: " ^ LCGstringOf.linear_term 0 t);*) false | |
558 | + | t -> (*print_endline ("validate_semantics_relations: " ^ ENIAMsemStringOf.linear_term 0 t);*) false | |
559 | 559 | |
560 | 560 | and validate_semantics_concept = function |
561 | 561 | Concept c -> validate_semantics_sense c.c_sense && validate_semantics_sense c.c_name && validate_semantics_quant c.c_quant && validate_semantics_relations c.c_relations |
562 | 562 | | Context c -> validate_semantics_sense c.cx_sense && validate_semantics_contents c.cx_contents && validate_semantics_relations c.cx_relations |
563 | 563 | | Variant(e,l) -> Xlist.fold l true (fun b (_,t) -> b && validate_semantics_concept t) |
564 | - | t -> (*print_endline ("validate_semantics_concept: " ^ LCGstringOf.linear_term 0 t);*) false | |
564 | + | t -> (*print_endline ("validate_semantics_concept: " ^ ENIAMsemStringOf.linear_term 0 t);*) false | |
565 | 565 | |
566 | 566 | and validate_semantics_contents = function |
567 | 567 | Concept c -> validate_semantics_concept (Concept c) |
568 | 568 | | Context c -> validate_semantics_concept (Context c) |
569 | 569 | | Variant(e,l) -> Xlist.fold l true (fun b (_,t) -> b && validate_semantics_contents t) |
570 | 570 | | Tuple l -> Xlist.fold l true (fun b t -> b && validate_semantics_contents t) |
571 | - | t -> (*print_endline ("validate_semantics_contents: " ^ LCGstringOf.linear_term 0 t);*) false | |
571 | + | t -> (*print_endline ("validate_semantics_contents: " ^ ENIAMsemStringOf.linear_term 0 t);*) false | |
572 | 572 | |
573 | 573 | (***************************************************************************************) |
574 | 574 | |
... | ... | @@ -596,7 +596,7 @@ let rec find_multiple_variants v m = function |
596 | 596 | Xlist.fold vl v StringSet.union, m |
597 | 597 | | Dot -> v,m |
598 | 598 | | Val s -> v,m |
599 | - | t -> failwith ("find_multiple_variants: " ^ LCGstringOf.linear_term 0 t) | |
599 | + | t -> failwith ("find_multiple_variants: " ^ ENIAMsemStringOf.linear_term 0 t) | |
600 | 600 | |
601 | 601 | type variant_structure = |
602 | 602 | C of variant_structure * variant_structure |
... | ... | @@ -636,7 +636,7 @@ let rec create_variant_structure = function |
636 | 636 | n,V(e,n,List.rev l) |
637 | 637 | | Dot -> 1,E |
638 | 638 | | Val s -> 1,E |
639 | - | t -> failwith ("create_variant_structure: " ^ LCGstringOf.linear_term 0 t) | |
639 | + | t -> failwith ("create_variant_structure: " ^ ENIAMsemStringOf.linear_term 0 t) | |
640 | 640 | |
641 | 641 | let rec get_all_variants = function |
642 | 642 | Concept c -> |
... | ... | @@ -667,7 +667,7 @@ let rec get_all_variants = function |
667 | 667 | List.rev (Xlist.fold l [] (fun l (_,t) -> get_all_variants t @ l)) |
668 | 668 | | Dot -> [Dot] |
669 | 669 | | Val s -> [Val s] |
670 | - | t -> failwith ("get_all_variants: " ^ LCGstringOf.linear_term 0 t) | |
670 | + | t -> failwith ("get_all_variants: " ^ ENIAMsemStringOf.linear_term 0 t) | |
671 | 671 | |
672 | 672 | let _ = Random.self_init () |
673 | 673 | |
... | ... | @@ -693,7 +693,7 @@ let rec draw_variant = function |
693 | 693 | draw_variant (s,t) |
694 | 694 | | E,Dot -> Dot |
695 | 695 | | E,Val s -> Val s |
696 | - | s,t -> (*print_endline ("draw_variant: " ^ LCGstringOf.linear_term 0 t);*) failwith ("draw_variant: " ^ string_of_variant_structure s) | |
696 | + | s,t -> (*print_endline ("draw_variant: " ^ ENIAMsemStringOf.linear_term 0 t);*) failwith ("draw_variant: " ^ string_of_variant_structure s) | |
697 | 697 | |
698 | 698 | let rec get_some_variants chosen = function |
699 | 699 | Concept c -> (* FIXME: czy pozostałe atrybuty można pominąć? *) |
... | ... | @@ -715,7 +715,7 @@ let rec get_some_variants chosen = function |
715 | 715 | else Variant(e,Xlist.map l (fun (i,t) -> i,get_some_variants chosen t)) |
716 | 716 | | Dot -> Dot |
717 | 717 | | Val s -> Val s |
718 | - | t -> failwith ("get_some_variants: " ^ LCGstringOf.linear_term 0 t) | |
718 | + | t -> failwith ("get_some_variants: " ^ ENIAMsemStringOf.linear_term 0 t) | |
719 | 719 | |
720 | 720 | let get_all_multiple_variants t mv = |
721 | 721 | let ll = StringMap.fold mv [] (fun ll e l -> |
... | ... | @@ -769,7 +769,7 @@ let draw_trees max_n t = |
769 | 769 | let n,s = create_variant_structure t in |
770 | 770 | n,s,t) in |
771 | 771 | let sum_n = Xlist.fold multiple_variants 0 (fun sum_n (n,_,_) -> sum_n + n) in |
772 | -(* print_endline (LCGstringOf.linear_term 0 t); | |
772 | +(* print_endline (ENIAMsemStringOf.linear_term 0 t); | |
773 | 773 | print_endline (string_of_variant_structure s);*) |
774 | 774 | if sum_n <= max_n then |
775 | 775 | List.flatten (Xlist.rev_map multiple_variants (fun (n,s,t) -> |
... | ... |
exec/ENIAMsemValence.ml
... | ... | @@ -27,6 +27,48 @@ type pos = {role: linear_term; role_attr: linear_term; selprefs: linear_term; gf |
27 | 27 | cr: string list; ce: string list; |
28 | 28 | is_necessary: bool; is_pro: bool; is_prong: bool; is_multi: bool; dir: string; morfs: StringSet.t} |
29 | 29 | |
30 | +let get_pro_lemma attrs = | |
31 | + let pers,num,gend = Xlist.fold attrs ("","",[]) (fun (pers,num,gend) -> function | |
32 | + "PERS",Val s -> s,num,gend | |
33 | + | "NUM",Val s -> pers,s,gend | |
34 | + | "GEND",Val s -> pers,num,[s] | |
35 | + | "GEND",Variant(_,l) -> pers,num,Xlist.map l (function (_,Val s) -> s | _ -> failwith "get_pro_lemma") | |
36 | + | _ -> failwith "get_pro_lemma") in | |
37 | + match pers,num with | |
38 | + "",_ -> "pro" | |
39 | + | "pri","" -> "pro1" | |
40 | + | "pri","sg" -> "ja" | |
41 | + | "pri","pl" -> "my" | |
42 | + | "sec","" -> "pro2" | |
43 | + | "sec","sg" -> "ty" | |
44 | + | "sec","pl" -> "wy" | |
45 | + | "ter","" -> "pro3" | |
46 | + | "ter","sg" -> | |
47 | + (match Xlist.fold gend (false,false,false) (fun (m,n,f) -> function | |
48 | + "m1" -> true,n,f | |
49 | + | "m2" -> true,n,f | |
50 | + | "m3" -> true,n,f | |
51 | + | "n1" -> m,true,f | |
52 | + | "n2" -> m,true,f | |
53 | + | "f" -> m,n,true | |
54 | + | _ -> m,n,f) with | |
55 | + true,false,false -> "on" | |
56 | + | false,true,true -> "ono" | |
57 | + | false,false,true -> "ona" | |
58 | + | _ -> "pro3sg") | |
59 | + | "ter","pl" -> | |
60 | + (match Xlist.fold gend (false,false) (fun (mo,nmo) -> function | |
61 | + "m1" -> true,nmo | |
62 | + | "p1" -> true,nmo | |
63 | + | _ -> mo,true) with | |
64 | + true,false -> "oni" | |
65 | + | false,true -> "one" | |
66 | + | _ -> "pro3pl") | |
67 | + | _ -> failwith "get_pro_lemma" | |
68 | + | |
69 | +let make_sem_args sem_args = | |
70 | + if sem_args = [] then Dot else ENIAM_LCGrules.make_variant (Xlist.map sem_args (fun s -> Val s)) | |
71 | + | |
30 | 72 | let match_value v2 = function |
31 | 73 | Val v -> if v = v2 then Val v else raise Not_found |
32 | 74 | | _ -> failwith "match_value" |
... | ... | @@ -102,23 +144,24 @@ let rec match_arg_positions arg rev = function |
102 | 144 | (match l with |
103 | 145 | [] -> (*print_endline "match_arg_positions: not matched";*) match_arg_positions arg (p :: rev) positions |
104 | 146 | | [t] -> |
147 | + let t = SetAttr("gf",Val (ENIAMwalStringOf.gf p.gf),t) in | |
105 | 148 | let t = if p.gf = ENIAMwalTypes.SUBJ || p.gf = ENIAMwalTypes.OBJ || p.gf = ENIAMwalTypes.ARG then |
106 | 149 | SetAttr("role",p.role,SetAttr("role-attr",p.role_attr,SetAttr("selprefs",p.selprefs,t))) |
107 | - else if p.gf = ENIAMwalTypes.ADJUNCT then t else failwith "match_arg_positions: ni 2" in | |
108 | - let t = SetAttr("gf",Val (ENIAMwalStringOf.gf p.gf),t) in | |
150 | + else if p.gf = ENIAMwalTypes.ADJUNCT || p.gf = ENIAMwalTypes.NOSEM || p.gf = ENIAMwalTypes.CORE then t else failwith "match_arg_positions: ni 2" in | |
109 | 151 | let t = Xlist.fold p.cr t (fun t cr -> SetAttr("controller",Val cr,t)) in |
110 | 152 | let t = Xlist.fold p.ce t (fun t ce -> SetAttr("controllee",Val ce,t)) in |
153 | + let t = if p.gf = ENIAMwalTypes.NOSEM then Dot else t in | |
111 | 154 | if p.is_multi then (t, rev @ (p :: positions)) :: (match_arg_positions arg (p :: rev) positions) |
112 | 155 | else (t, rev @ positions) :: (match_arg_positions arg (p :: rev) positions) |
113 | 156 | | _ -> failwith "match_arg_positions: ni") |
114 | 157 | | [] -> (*Printf.printf "match_arg_positions: arg=%s rev=[%s] positions=[]\n%!" (string_of_arg arg) (String.concat "; " (Xlist.map rev string_of_position));*) [] |
115 | 158 | |
116 | 159 | (* Jeśli ta funkcja zwróci pustą listę, oznacza to, że argumentów nie dało się dopasować do pozycji *) |
117 | -let rec match_args_positions_rec positions = function | |
160 | +let rec match_args_positions_rec prong_attrs positions = function | |
118 | 161 | arg :: args -> |
119 | 162 | (* Printf.printf "match_args_positions_rec: args=%s :: [%s] positions=[%s]\n%!" (string_of_arg arg) (String.concat "; " (Xlist.map args string_of_arg)) (String.concat "; " (Xlist.map positions string_of_position)); *) |
120 | 163 | Xlist.fold (match_arg_positions arg [] positions) [] (fun found (arg_pos,positions) -> |
121 | - Xlist.fold (match_args_positions_rec positions args) found (fun found l -> (arg_pos :: l) :: found)) | |
164 | + Xlist.fold (match_args_positions_rec prong_attrs positions args) found (fun found l -> (arg_pos :: l) :: found)) | |
122 | 165 | | [] -> |
123 | 166 | (* Printf.printf "match_args_positions_rec: args=[] positions=[%s]\n%!" (String.concat "; " (Xlist.map positions string_of_position)); *) |
124 | 167 | let b = Xlist.fold positions false (fun b p -> p.is_necessary || b) in |
... | ... | @@ -126,8 +169,10 @@ let rec match_args_positions_rec positions = function |
126 | 169 | if b then [] else |
127 | 170 | [Xlist.fold positions [] (fun found p -> |
128 | 171 | if not p.is_pro then found else |
129 | - let attrs = ["role",p.role; "role-attr",p.role_attr; "selprefs",p.selprefs; "gf",Val (ENIAMwalStringOf.gf p.gf)] in | |
130 | - let attrs = if p.is_prong then attrs else attrs in (* FIXME: dodać number, gender *) | |
172 | + let attrs = if p.is_prong then prong_attrs else [] in (* FIXME: dodać number, gender *) | |
173 | + let lemma = get_pro_lemma attrs in | |
174 | + let sem_args = try StringMap.find ENIAMlexSemanticsData.pron_sem_args lemma with Not_found -> failwith "match_args_positions_rec" in | |
175 | + let attrs = ["meaning",Val lemma;"role",p.role; "role-attr",p.role_attr; "selprefs",p.selprefs; "gf",Val (ENIAMwalStringOf.gf p.gf); "agf",Val ""; "sem-args",make_sem_args sem_args] @ attrs in | |
131 | 176 | let attrs = Xlist.fold p.cr attrs (fun attrs cr -> ("controller",Val cr) :: attrs) in |
132 | 177 | let attrs = Xlist.fold p.ce attrs (fun attrs ce -> ("controllee",Val ce) :: attrs) in |
133 | 178 | Node{ENIAM_LCGrenderer.empty_node with lemma="pro"; pos="pro"; attrs=attrs} :: found)] |
... | ... | @@ -135,9 +180,9 @@ let rec match_args_positions_rec positions = function |
135 | 180 | (* FIXME: opcjonalność podrzędników argumentów zleksykalizowanych *) |
136 | 181 | |
137 | 182 | (* Jeśli ta funkcja zwróci pustą listę, oznacza to, że argumentów nie dało się dopasować do pozycji *) |
138 | -let match_args_positions args positions = | |
183 | +let match_args_positions prong_attrs args positions = | |
139 | 184 | (* Printf.printf "match_args_positions: args=[%s] positions=[%s]\n%!" (String.concat "; " (Xlist.map args string_of_arg)) (String.concat "; " (Xlist.map positions string_of_position)); *) |
140 | - Xlist.rev_map (match_args_positions_rec positions args) (function | |
185 | + Xlist.rev_map (match_args_positions_rec prong_attrs positions args) (function | |
141 | 186 | [] -> Dot |
142 | 187 | | [t] -> t |
143 | 188 | | l -> Tuple l) |
... | ... | @@ -186,8 +231,8 @@ let translate_position id p = |
186 | 231 | is_prong = p.ENIAMwalTypes.is_necessary = ENIAMwalTypes.ProNG; |
187 | 232 | is_multi = p.ENIAMwalTypes.is_necessary = ENIAMwalTypes.Multi; |
188 | 233 | dir= translate_dir p.ENIAMwalTypes.dir; |
189 | - morfs = Xlist.fold p.ENIAMwalTypes.morfs StringSet.empty (fun morfs morf -> | |
190 | - if morf = ENIAMwalTypes.LCG One then (Printf.printf "translate_position: One%!"; morfs) else | |
234 | + morfs = if p.ENIAMwalTypes.morfs=[ENIAMwalTypes.LCG One] then StringSet.empty else Xlist.fold p.ENIAMwalTypes.morfs StringSet.empty (fun morfs morf -> | |
235 | + if morf = ENIAMwalTypes.LCG One then (Printf.printf "translate_position: One%!\n"; morfs) else | |
191 | 236 | StringSet.add morfs (string_of_morf morf))} |
192 | 237 | |
193 | 238 | let get_phrase_symbol = function |
... | ... | @@ -201,6 +246,13 @@ let get_phrase_symbol = function |
201 | 246 | |
202 | 247 | exception NoFrame of string * string |
203 | 248 | |
249 | +let get_prong_attrs attrs = | |
250 | + Xlist.fold attrs [] (fun attrs -> function | |
251 | + "NUM",t -> ("NUM",t) :: attrs | |
252 | + | "GEND",t -> ("GEND",t) :: attrs | |
253 | + | "PERS",t -> ("PERS",t) :: attrs | |
254 | + | _ -> attrs) | |
255 | + | |
204 | 256 | let rec assign_frames_rec tokens lex_sems tree arg_symbols visited = function |
205 | 257 | Ref i -> |
206 | 258 | if IntSet.mem visited i then Ref i,visited else |
... | ... | @@ -210,12 +262,14 @@ let rec assign_frames_rec tokens lex_sems tree arg_symbols visited = function |
210 | 262 | | Node t -> |
211 | 263 | let args,visited = assign_frames_rec tokens lex_sems tree arg_symbols visited t.args in |
212 | 264 | let t = {t with args=args} in |
265 | + (* print_endline ("assign_frames_rec: " ^ t.lemma); *) | |
213 | 266 | if t.symbol = Dot then Node t,visited else |
214 | 267 | let args = get_arg_symbols_tuple arg_symbols [] args in |
215 | 268 | let s = ExtArray.get lex_sems t.id in |
216 | 269 | let symbol = get_phrase_symbol t.symbol in |
217 | 270 | let frames = Xlist.fold s.ENIAMlexSemanticsTypes.frames [] (fun frames frame -> |
218 | 271 | (* print_endline ("selectors: " ^ ENIAMcategoriesPL.string_of_selectors frame.selectors); *) |
272 | + (* Printf.printf "assign_frames_rec: lemma=%s positions=[%s]\n%!" t.lemma (ENIAMwalStringOf.schema frame.positions); *) | |
219 | 273 | try |
220 | 274 | let attrs = apply_selectors t.attrs frame.selectors in |
221 | 275 | let frame = ENIAMsemLexicon.extend_frame symbol frame in |
... | ... | @@ -223,11 +277,12 @@ let rec assign_frames_rec tokens lex_sems tree arg_symbols visited = function |
223 | 277 | (attrs,frame,Xlist.rev_map frame.positions (translate_position (string_of_int t.id))) :: frames |
224 | 278 | with Not_found -> (*print_endline "rejected";*) frames) in |
225 | 279 | if frames = [] then failwith "assign_frames_rec: no frame" else |
280 | + let prong_attrs = get_prong_attrs t.attrs in | |
226 | 281 | let e = ENIAM_LCGreductions.get_variant_label () in |
227 | 282 | let l,_ = Xlist.fold frames ([],1) (fun (l,n) (attrs,frame,positions) -> |
228 | 283 | (* Printf.printf "assign_frames_rec: lemma=%s args=[%s] positions=[%s]\n%!" t.lemma (String.concat "; " (Xlist.map args string_of_arg)) (String.concat "; " (Xlist.map positions string_of_position)); *) |
229 | 284 | if frame.meanings = [] then failwith ("assign_frames_rec: no meanings '" ^ t.lemma ^ "'") else |
230 | - Xlist.fold (match_args_positions args positions) (l,n) (fun (l,n) args -> | |
285 | + Xlist.fold (match_args_positions prong_attrs args positions) (l,n) (fun (l,n) args -> | |
231 | 286 | Xlist.fold frame.meanings (l,n) (fun (l,n) (meaning,hipero,weight) -> |
232 | 287 | (string_of_int n, Node{t with attrs= |
233 | 288 | ("meaning",Val meaning) :: |
... | ... | @@ -235,7 +290,8 @@ let rec assign_frames_rec tokens lex_sems tree arg_symbols visited = function |
235 | 290 | ("arole",Val frame.arole) :: |
236 | 291 | ("arole-attr",Val frame.arole_attr) :: |
237 | 292 | ("arev",Val (if frame.arev then "+" else "-")) :: |
238 | - ("sem-args",if frame.sem_args = [] then Dot else ENIAM_LCGrules.make_variant (Xlist.map frame.sem_args (fun s -> Val s))) :: | |
293 | + ("agf",Val frame.agf) :: | |
294 | + ("sem-args",make_sem_args frame.sem_args) :: | |
239 | 295 | ("fopinion",Val (ENIAMwalStringOf.opinion frame.fopinion)) :: |
240 | 296 | ("sopinion",Val (ENIAMwalStringOf.opinion frame.sopinion)) :: t.attrs; args=args}) :: |
241 | 297 | l,n+1))) in |
... | ... | @@ -284,6 +340,18 @@ let assign_frames tokens lex_sems tree = |
284 | 340 | let _ = assign_frames_rec tokens lex_sems tree arg_symbols IntSet.empty (Ref 0) in |
285 | 341 | tree |
286 | 342 | |
343 | +let rec extract_attr pat rev = function | |
344 | + [] -> raise Not_found | |
345 | + | (s,v) :: l -> | |
346 | + if s = pat then (List.rev rev) @ l, v | |
347 | + else extract_attr pat ((s,v) :: rev) l | |
348 | + | |
349 | +let rec get_attr pat = function | |
350 | + [] -> raise Not_found | |
351 | + | (s,v) :: l -> | |
352 | + if s = pat then v | |
353 | + else get_attr pat l | |
354 | + | |
287 | 355 | let rec cut_nodes result_tree = function |
288 | 356 | | Node t -> |
289 | 357 | let i = ExtArray.add result_tree (Node t) in |
... | ... | @@ -297,6 +365,22 @@ let rec cut_nodes result_tree = function |
297 | 365 | | Dot -> Dot |
298 | 366 | | t -> failwith ("cut_nodes: " ^ ENIAM_LCGstringOf.linear_term 0 t) |
299 | 367 | |
368 | +exception AGF | |
369 | + | |
370 | +let rec manage_agf = function | |
371 | + | Node t -> | |
372 | + let attrs,agf = try extract_attr "agf" [] t.attrs with Not_found -> failwith "manage_agf" in | |
373 | + let gf = try get_attr "gf" t.attrs with Not_found -> Dot in (* FIXME: to by się chyba przydało poprawić, żeby gf było zawsze ustalone *) | |
374 | + if agf = Val "" || agf=gf then Node{t with attrs=attrs} else raise AGF | |
375 | + | Variant(e,l) -> | |
376 | + let l = Xlist.fold l [] (fun l (i,t) -> try (i, manage_agf t) :: l with AGF -> l) in | |
377 | + if l = [] then raise AGF else Variant(e,List.rev l) | |
378 | + | Tuple l -> | |
379 | + let l = Xlist.rev_map l manage_agf in | |
380 | + Tuple(List.rev l) | |
381 | + | Dot -> Dot | |
382 | + | t -> failwith ("cut_nodes: " ^ ENIAM_LCGstringOf.linear_term 0 t) | |
383 | + | |
300 | 384 | let rec reduce_set_attr attr v = function |
301 | 385 | Node t -> Node{t with attrs=(attr,v) :: t.attrs} |
302 | 386 | | Variant(e,l) -> |
... | ... | @@ -313,6 +397,7 @@ let rec reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree = functio |
313 | 397 | | Node t -> |
314 | 398 | let args = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree t.args in |
315 | 399 | (* print_endline ("reduce_tree_rec 1: " ^ ENIAM_LCGstringOf.linear_term 0 args); *) |
400 | + let args = try manage_agf args with AGF -> failwith "reduce_tree_rec: AGF" in | |
316 | 401 | let args = cut_nodes result_tree args in |
317 | 402 | (* print_endline ("reduce_tree_rec 2: " ^ ENIAM_LCGstringOf.linear_term 0 args); *) |
318 | 403 | let id = |
... | ... | @@ -341,4 +426,96 @@ let reduce_tree tokens lex_sems orig_tree = |
341 | 426 | let _ = ExtArray.add result_tree Dot in |
342 | 427 | let t = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree orig_tree.(0) in |
343 | 428 | ExtArray.set result_tree 0 t; |
344 | - ExtArray.to_array result_tree | |
429 | + result_tree | |
430 | + | |
431 | +let is_subj = function | |
432 | + | Node t -> | |
433 | + let gf = try get_attr "gf" t.attrs with Not_found -> failwith "is_subj" in | |
434 | + gf = Val "subj" | |
435 | + | t -> failwith ("is_subj: " ^ ENIAM_LCGstringOf.linear_term 0 t) | |
436 | + | |
437 | +let is_core = function | |
438 | + | Node t -> | |
439 | + let gf = try get_attr "gf" t.attrs with Not_found -> failwith "is_core" in | |
440 | + gf = Val "core" | |
441 | + | t -> failwith ("is_core: " ^ ENIAM_LCGstringOf.linear_term 0 t) | |
442 | + | |
443 | +let set_subj_coref ce = function | |
444 | + | Node t -> | |
445 | + let gf = try get_attr "gf" t.attrs with Not_found -> failwith "set_subj_coref" in | |
446 | + if gf = Val "subj" then Node{t with attrs=("coref",ce) :: t.attrs} else Node t | |
447 | + | t -> failwith ("set_subj_coref: " ^ ENIAM_LCGstringOf.linear_term 0 t) | |
448 | + | |
449 | +let set_core_selprefs selprefs = function (* FIXME: trzeba usunąć dotychczasowe selprefs. *) | |
450 | + | Node t -> | |
451 | + let gf = try get_attr "gf" t.attrs with Not_found -> failwith "set_core_selprefs" in | |
452 | + if gf = Val "core" then Node{t with attrs=("selprefs",selprefs) :: t.attrs} else Node t | |
453 | + | t -> failwith ("set_core_selprefs: " ^ ENIAM_LCGstringOf.linear_term 0 t) | |
454 | + | |
455 | +let rec set_subj_coref_args tree ce = function | |
456 | + Ref i -> | |
457 | + if is_subj (ExtArray.get tree i) then | |
458 | + let id = ExtArray.add tree (set_subj_coref ce (ExtArray.get tree i)) in | |
459 | + Ref id | |
460 | + else Ref i | |
461 | + | Variant(e,l) -> | |
462 | + let l = Xlist.rev_map l (fun (i,t) -> i, set_subj_coref_args tree ce t) in | |
463 | + Variant(e,List.rev l) | |
464 | + | Tuple l -> | |
465 | + let l = Xlist.rev_map l (set_subj_coref_args tree ce) in | |
466 | + Tuple(List.rev l) | |
467 | + | Dot -> Dot | |
468 | + | t -> failwith ("set_subj_coref_args: " ^ ENIAM_LCGstringOf.linear_term 0 t) | |
469 | + | |
470 | +let rec set_selprefs_core tree selprefs = function | |
471 | + Ref i -> | |
472 | + if is_core (ExtArray.get tree i) then | |
473 | + let id = ExtArray.add tree (set_core_selprefs selprefs (ExtArray.get tree i)) in | |
474 | + Ref id | |
475 | + else Ref i | |
476 | + | Variant(e,l) -> | |
477 | + let l = Xlist.rev_map l (fun (i,t) -> i, set_selprefs_core tree selprefs t) in | |
478 | + Variant(e,List.rev l) | |
479 | + | Tuple l -> | |
480 | + let l = Xlist.rev_map l (set_selprefs_core tree selprefs) in | |
481 | + Tuple(List.rev l) | |
482 | + | Dot -> Dot | |
483 | + | t -> failwith ("set_subj_coref_args: " ^ ENIAM_LCGstringOf.linear_term 0 t) | |
484 | + | |
485 | +let rec transfer_attributes_rec tree visited = function | |
486 | + Ref i -> | |
487 | + if visited.(i) then Ref i else ( | |
488 | + visited.(i) <- true; | |
489 | + ExtArray.set tree i (transfer_attributes_rec tree visited (ExtArray.get tree i)); | |
490 | + Ref i) | |
491 | + | Node t -> | |
492 | + let t = {t with args = transfer_attributes_rec tree visited t.args} in | |
493 | + (* print_endline ("transfer_attributes_rec 1: " ^ ENIAM_LCGstringOf.linear_term 0 args); *) | |
494 | + let t = | |
495 | + if t.pos = "inf" || t.pos = "pcon" || t.pos = "pant" then | |
496 | + try | |
497 | + let attrs,ce = extract_attr "controllee" [] t.attrs in | |
498 | + let args = set_subj_coref_args tree ce t.args in | |
499 | + {t with attrs=attrs; args=args} | |
500 | + with Not_found -> t else | |
501 | + if t.pos = "prep" && get_attr "gf" t.attrs = Val "arg" then | |
502 | + let attrs,selprefs = extract_attr "selprefs" [] t.attrs in | |
503 | + let args = set_selprefs_core tree selprefs t.args in | |
504 | + {t with attrs=("selprefs", Val "ALL") :: attrs; args=args} | |
505 | + else t in | |
506 | + Node t | |
507 | + | Variant(e,l) -> | |
508 | + let l = Xlist.rev_map l (fun (i,t) -> i, transfer_attributes_rec tree visited t) in | |
509 | + Variant(e,List.rev l) | |
510 | + | Tuple l -> | |
511 | + let l = Xlist.rev_map l (transfer_attributes_rec tree visited) in | |
512 | + Tuple(List.rev l) | |
513 | + | Dot -> Dot | |
514 | + | t -> failwith ("transfer_attributes_rec: " ^ ENIAM_LCGstringOf.linear_term 0 t) | |
515 | + | |
516 | +let transfer_attributes tree = | |
517 | + let visited = Array.make (ExtArray.size tree) false in | |
518 | + visited.(0) <- true; | |
519 | + let t = transfer_attributes_rec tree visited (ExtArray.get tree 0) in | |
520 | + ExtArray.set tree 0 t; | |
521 | + () | |
... | ... |
exec/ENIAMvisualization.ml
... | ... | @@ -928,10 +928,12 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam |
928 | 928 | "" |
929 | 929 | | SemParsed -> |
930 | 930 | if verbosity < 2 then () else ( |
931 | - ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_6_dependency_tree") "a4" result.dependency_tree6; | |
932 | - ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_7_dependency_tree") "a4" result.dependency_tree7; | |
933 | - ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_8_dependency_tree") "a4" result.dependency_tree8; | |
934 | - ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") "a4" result.dependency_tree9); | |
931 | + ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_6_dependency_tree") result.dependency_tree6; | |
932 | + ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") result.dependency_tree9; | |
933 | + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_6_dependency_tree") "a3" result.dependency_tree6; | |
934 | + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_7_dependency_tree") "a3" result.dependency_tree7; | |
935 | + ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_8_dependency_tree") "a3" result.dependency_tree8; | |
936 | + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") "a3" result.dependency_tree9); | |
935 | 937 | if verbosity = 0 then () else ( |
936 | 938 | ENIAMsemLatexOf.print_semantic_graph path (file_prefix ^ "_10_semantic_graph") "a3" result.semantic_graph10; |
937 | 939 | ENIAMsemGraphOf.print_semantic_graph2 path (file_prefix ^ "_11_semantic_graph") "" result.semantic_graph11); |
... | ... | @@ -940,24 +942,26 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam |
940 | 942 | sprintf "<BR><A HREF=\"%s_6_dependency_tree.pdf\">Dependency Tree References 6</A>\n" file_prefix ^ |
941 | 943 | sprintf "<BR><A HREF=\"%s_7_dependency_tree.pdf\">Dependency Tree References 7</A>\n" file_prefix ^ |
942 | 944 | sprintf "<BR><A HREF=\"%s_8_dependency_tree.pdf\">Dependency Tree References 8</A>\n" file_prefix ^ |
943 | - sprintf "<BR><A HREF=\"%s_9_dependency_tree.pdf\">Dependency Tree References 9</A>\n" file_prefix) ^ | |
945 | + sprintf "<BR><A HREF=\"%s_9_dependency_tree.pdf\">Dependency Tree References 9</A>\n" file_prefix ^ | |
946 | + sprintf "<BR><IMG SRC=\"%s_6_dependency_tree.png\">\n" file_prefix ^ | |
947 | + sprintf "<BR><IMG SRC=\"%s_9_dependency_tree.png\">\n" file_prefix) ^ | |
944 | 948 | (if verbosity = 0 then "" else |
945 | 949 | sprintf "<BR><A HREF=\"%s_10_semantic_graph.pdf\">Semantic Graph References 10</A>\n" file_prefix ^ |
946 | - sprintf "<BR><IMG SRC=\"%s_11_semantic_graph.png\">\n" file_prefix) ^ | |
950 | + sprintf "<BR><IMG SRC=\"%s_11_semantic_graph.png\">\n" file_prefix) ^ | |
947 | 951 | "" |
948 | 952 | | SemError2 -> |
949 | 953 | if verbosity = 0 then () else ( |
950 | - ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_6_dependency_tree") "a4" result.dependency_tree6; | |
954 | + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_6_dependency_tree") "a3" result.dependency_tree6; | |
951 | 955 | ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_6_dependency_tree") result.dependency_tree6; |
952 | - if result.dependency_tree7 <> [| |] then ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_7_dependency_tree") "a4" result.dependency_tree7; | |
953 | - if result.dependency_tree8 <> [| |] then ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_8_dependency_tree") "a4" result.dependency_tree8; | |
954 | - if result.dependency_tree9 <> [| |] then ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") "a4" result.dependency_tree9; | |
956 | + if result.dependency_tree7 <> [| |] then ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_7_dependency_tree") "a3" result.dependency_tree7; | |
957 | + if ExtArray.size result.dependency_tree8 <> 0 then ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_8_dependency_tree") "a3" result.dependency_tree8; | |
958 | + if result.dependency_tree9 <> [| |] then ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") "a3" result.dependency_tree9; | |
955 | 959 | if result.dependency_tree9 <> [| |] then ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") result.dependency_tree9); |
956 | 960 | sprintf "error_sem2: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.msg result.paths_size result.chart_size result.dependency_tree_size ^ |
957 | 961 | (if verbosity = 0 then "" else |
958 | 962 | sprintf "<BR><A HREF=\"%s_6_dependency_tree.pdf\">Dependency Tree References 6</A>\n" file_prefix ^ |
959 | 963 | (if result.dependency_tree7 <> [| |] then sprintf "<BR><A HREF=\"%s_7_dependency_tree.pdf\">Dependency Tree References 7</A>\n" file_prefix else "") ^ |
960 | - (if result.dependency_tree8 <> [| |] then sprintf "<BR><A HREF=\"%s_8_dependency_tree.pdf\">Dependency Tree References 8</A>\n" file_prefix else "") ^ | |
964 | + (if ExtArray.size result.dependency_tree8 <> 0 then sprintf "<BR><A HREF=\"%s_8_dependency_tree.pdf\">Dependency Tree References 8</A>\n" file_prefix else "") ^ | |
961 | 965 | (if result.dependency_tree9 <> [| |] then sprintf "<BR><A HREF=\"%s_9_dependency_tree.pdf\">Dependency Tree References 9</A>\n" file_prefix else "") ^ |
962 | 966 | (if result.dependency_tree9 <> [| |] then sprintf "<BR><IMG SRC=\"%s_9_dependency_tree.png\">\n" file_prefix else "") ^ |
963 | 967 | sprintf "<BR><IMG SRC=\"%s_6_dependency_tree.png\">\n" file_prefix) ^ |
... | ... |
exec/resources/lexicon-pl.dic
... | ... | @@ -34,9 +34,12 @@ num: \(1+qub):adjunct /(1+inclusion):adjunct; |
34 | 34 | measure: |
35 | 35 | \(1+num*number*case*gender*person*congr+num*number*case*gender*person*rec):Count \(1+qub):adjunct /(1+inclusion):adjunct; |
36 | 36 | |
37 | -prepnp: \(1+advp*T):adjunct /(np*T*case*T*T+day-month+day+year+date+hour+hour-minute):unk \(1+qub):adjunct /(1+inclusion):adjunct; | |
38 | -prepadjp: \(1+advp*T):adjunct /(adjp*T*case*T+adjp*sg*dat*m1+adjp*T*postp*T+adjp*sg*nom*f+advp*T):unk \(1+qub):adjunct /(1+inclusion):adjunct; | |
39 | -compar: \(1+advp*T):adjunct /(np*T*case*T*T+prepnp*T*T+prepadjp*T*T):unk \(1+qub):adjunct /(1+inclusion):adjunct; | |
37 | +#prepnp: \(1+advp*T):adjunct /(np*T*case*T*T+day-month+day+year+date+hour+hour-minute):unk \(1+qub):adjunct /(1+inclusion):adjunct; | |
38 | +#prepadjp: \(1+advp*T):adjunct /(adjp*T*case*T+adjp*sg*dat*m1+adjp*T*postp*T+adjp*sg*nom*f+advp*T):unk \(1+qub):adjunct /(1+inclusion):adjunct; | |
39 | +#compar: \(1+advp*T):adjunct /(np*T*case*T*T+prepnp*T*T+prepadjp*T*T):unk \(1+qub):adjunct /(1+inclusion):adjunct; | |
40 | +prepnp: \(1+advp*T):adjunct \(1+qub):adjunct /(1+inclusion):adjunct; | |
41 | +prepadjp: \(1+advp*T):adjunct \(1+qub):adjunct /(1+inclusion):adjunct; | |
42 | +compar: \(1+advp*T):adjunct \(1+qub):adjunct /(1+inclusion):adjunct; | |
40 | 43 | |
41 | 44 | adjp: \(1+qub):adjunct /(1+inclusion):adjunct \(1+adja):unk; |
42 | 45 | |
... | ... |
lexSemantics/ENIAMlexSemantics.ml
... | ... | @@ -190,17 +190,23 @@ let add_sem_args lemma pos frame = |
190 | 190 | |
191 | 191 | let assign_prep_semantics lemma = |
192 | 192 | let roles = try StringMap.find ENIAMlexSemanticsData.prep_roles lemma with Not_found -> [] in |
193 | - Printf.printf "assign_prep_semantics: |roles|=%d\n%!" (Xlist.size roles); | |
193 | + (* Printf.printf "assign_prep_semantics: |roles|=%d\n%!" (Xlist.size roles); *) | |
194 | + {empty_frame with | |
195 | + meanings = [find_prep_meaning lemma [Predef "ALL"]]; | |
196 | + positions= [{empty_position with | |
197 | + dir=if lemma="temu" then Backward_ else Forward_; gf=CORE; | |
198 | + morfs=ENIAMwalRenderer.prep_morfs; is_necessary=Req}]; | |
199 | + agf="arg"} :: | |
194 | 200 | Xlist.map roles (function (case,arole,arole_attr,hipero,sel_prefs) -> |
195 | - Printf.printf "assign_prep_semantics: case=%s arole=%s arole_attr=%s\n%!" case arole arole_attr; | |
201 | + (* Printf.printf "assign_prep_semantics: case=%s arole=%s arole_attr=%s\n%!" case arole arole_attr; *) | |
196 | 202 | let meaning = find_prep_meaning lemma hipero in (* FIXME: zaślepka dla meaning i weight *) |
197 | - print_endline "assign_prep_semantics 1"; | |
203 | + (* print_endline "assign_prep_semantics 1"; *) | |
198 | 204 | let positions = [{empty_position with |
199 | 205 | sel_prefs=sel_prefs; dir=if lemma="temu" then Backward_ else Forward_; |
200 | - morfs=ENIAMwalRenderer.assing_pref_morfs (lemma,case); is_necessary=Req}] in | |
201 | - print_endline "assign_prep_semantics 2"; | |
206 | + morfs=ENIAMwalRenderer.prep_morfs(*ENIAMwalRenderer.assing_prep_morfs (lemma,case)*); is_necessary=Req}] in | |
207 | + (* print_endline "assign_prep_semantics 2"; *) | |
202 | 208 | {empty_frame with selectors=[ENIAM_LCGlexiconTypes.Case,ENIAM_LCGlexiconTypes.Eq,[case]]; meanings=[meaning]; positions=find_selprefs positions; |
203 | - arole=arole; arole_attr=arole_attr; arev=false}) | |
209 | + arole=arole; arole_attr=arole_attr; arev=false; agf="adjunct"}) | |
204 | 210 | |
205 | 211 | let assign_num_semantics lemma = |
206 | 212 | let sems = try StringMap.find !num_sem lemma with Not_found -> [] in |
... | ... | @@ -267,7 +273,7 @@ let assign_valence tokens lex_sems group = |
267 | 273 | List.flatten (Xlist.rev_map (ENIAMvalence.transform_entry pos lemma neg pred aspect schema1) (fun (selectors,schema) -> |
268 | 274 | Xlist.rev_map (ENIAMvalence.get_aroles schema1 lemma pos) (fun (sel,arole,arole_attr,arev) -> |
269 | 275 | {selectors=sel @ selectors; meanings=Xlist.map meanings find_meaning; positions=schema; |
270 | - arole=arole; arole_attr=arole_attr; arev=arev; sem_args=[]; sopinion=sopinion; fopinion=fopinion}))))) in | |
276 | + arole=arole; arole_attr=arole_attr; arev=arev; agf=""; sem_args=[]; sopinion=sopinion; fopinion=fopinion}))))) in | |
271 | 277 | (* Printf.printf "E %s |connected|=%d\n" lemma (Xlist.size connected); *) |
272 | 278 | let connected = if connected = [] then List.flatten (Xlist.rev_map (make_unique schemata1) (semantize lemma pos)) else connected in |
273 | 279 | (* Printf.printf "F %s |connected|=%d\n" lemma (Xlist.size connected); *) |
... | ... |
lexSemantics/ENIAMlexSemanticsTypes.ml
... | ... | @@ -27,13 +27,14 @@ type frame = { |
27 | 27 | arole: string; |
28 | 28 | arole_attr: string; |
29 | 29 | arev: bool; |
30 | + agf: string; | |
30 | 31 | sem_args: string list; |
31 | 32 | (* has_context: bool; *) |
32 | 33 | sopinion: ENIAMwalTypes.opinion; |
33 | 34 | fopinion: ENIAMwalTypes.opinion; |
34 | 35 | } |
35 | 36 | |
36 | -let empty_frame = {selectors=[]; meanings=[]; positions=[]; arole=""; arole_attr=""; arev=false; sem_args=[]; (*has_context=false;*) | |
37 | +let empty_frame = {selectors=[]; meanings=[]; positions=[]; arole=""; arole_attr=""; arev=false; agf=""; sem_args=[]; (*has_context=false;*) | |
37 | 38 | sopinion=ENIAMwalTypes.Nieokreslony; fopinion=ENIAMwalTypes.Nieokreslony} |
38 | 39 | |
39 | 40 | type lex_sem = { |
... | ... |
lexSemantics/ENIAMwalRenderer.ml
... | ... | @@ -340,7 +340,7 @@ let adv_connected_adjuncts_simp = [ |
340 | 340 | adjunct [Tensor[Atom "advp"; Top]]; |
341 | 341 | ] |
342 | 342 | |
343 | -let assing_pref_morfs = function | |
343 | +let assing_prep_morfs = function | |
344 | 344 | "po","postp" -> [ |
345 | 345 | LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "dat"; Atom "m1"]); |
346 | 346 | LCG(Tensor[Atom "adjp"; Top; Atom "postp"; Top])] |
... | ... | @@ -349,3 +349,17 @@ let assing_pref_morfs = function |
349 | 349 | | _,case -> [ |
350 | 350 | LCG(Tensor[Atom "np"; Top; Atom case; Top; Top]); |
351 | 351 | LCG(Tensor[Atom "adjp"; Top; Atom case; Top])] |
352 | + | |
353 | +let prep_morfs = [ | |
354 | + LCG(Tensor[Atom "np"; Top; Atom "case"; Top; Top]); | |
355 | + LCG(Tensor[Atom "adjp"; Top; Atom "case"; Top]); | |
356 | + LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "dat"; Atom "m1"]); | |
357 | + LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "nom"; Atom "f"]); | |
358 | + LCG(Tensor[Atom "advp"; Top]); | |
359 | + LCG(Tensor[Atom "year"]); | |
360 | + LCG(Tensor[Atom "hour-minute"]); | |
361 | + LCG(Tensor[Atom "day-month"]); | |
362 | + LCG(Tensor[Atom "hour"]); | |
363 | + LCG(Tensor[Atom "day"]); | |
364 | + LCG(Tensor[Atom "date"]); | |
365 | + ] | |
... | ... |