From f96aa7383ee8237b68a1ad301802dae6169713cc Mon Sep 17 00:00:00 2001 From: Wojciech Jaworski <wjaworski@mimuw.edu.pl> Date: Sat, 10 Jun 2017 15:03:48 +0200 Subject: [PATCH] Dezambiguacja preferencji selekcyjnych --- exec/ENIAMdisambiguation.ml | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- exec/ENIAMexec.ml | 15 +++++++++++++-- exec/ENIAMexecTypes.ml | 4 ++-- exec/ENIAMsemGraph.ml | 82 +++++++++++++++++++++++++++++++++++++++++----------------------------------------- exec/ENIAMsemValence.ml | 203 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------- exec/ENIAMvisualization.ml | 26 +++++++++++++++----------- exec/resources/lexicon-pl.dic | 9 ++++++--- lexSemantics/ENIAMlexSemantics.ml | 20 +++++++++++++------- lexSemantics/ENIAMlexSemanticsTypes.ml | 3 ++- lexSemantics/ENIAMwalRenderer.ml | 16 +++++++++++++++- 10 files changed, 371 insertions(+), 84 deletions(-) diff --git a/exec/ENIAMdisambiguation.ml b/exec/ENIAMdisambiguation.ml index 7230502..1af7cda 100644 --- a/exec/ENIAMdisambiguation.ml +++ b/exec/ENIAMdisambiguation.ml @@ -50,8 +50,8 @@ let rec select_random_rec selection = function | t -> failwith ("select_random_rec: " ^ ENIAM_LCGstringOf.linear_term 0 t) let select_random tree = - Int.fold 0 (Array.length tree - 1) StringMap.empty (fun selection i -> - select_random_rec selection tree.(i)) + Int.fold 0 (ExtArray.size tree - 1) StringMap.empty (fun selection i -> + select_random_rec selection (ExtArray.get tree i)) let rec apply_selection_rec selection = function Ref i -> Ref i @@ -119,5 +119,76 @@ let rearrange_tree tree = let random_tree tokens lex_sems tree = (* print_endline "random_tree"; *) let selection = select_random tree in - let tree = apply_selection selection tree in + let tree = apply_selection selection (ExtArray.to_array tree) in rearrange_tree tree + +let rec selprefs_rec cost = function + Ref i -> cost.(i), Ref i + | Node t -> -1, Node{t with args = snd(selprefs_rec cost t.args)} + | Variant(e,l) -> + let c,l = Xlist.fold l (max_int,[]) (fun (min_c,l) (i,t) -> + let c,t = selprefs_rec cost t in + if c < min_c then c,[i,t] else + if c > min_c then min_c,l else + min_c, (i,t) :: l) in + (match l with + [_,t] -> c,t + | _ -> c,Variant(e,List.rev l)) + | Tuple l -> + let c,l = Xlist.fold l (0,[]) (fun (c,l) t -> + let c2,t = selprefs_rec cost t in + c+c2, t :: l) in + c,Tuple(List.rev l) + | Dot -> 0, Dot + | t -> failwith ("selprefs_rec: " ^ ENIAM_LCGstringOf.linear_term 0 t) + +let rec get_attr pat = function + [] -> raise Not_found + | (s,v) :: l -> + if s = pat then v + else get_attr pat l + +let rec list_of_selprefs = function + Val s -> [s] + | Dot -> [] + | t -> failwith ("list_of_selprefs: " ^ ENIAM_LCGstringOf.linear_term 0 t) + +let map_of_hipero = function + Variant(_,l) -> Xlist.fold l StringMap.empty (fun map -> function + _,Tuple[Val hipero; Val cost] -> StringMap.add_inc map hipero (int_of_string cost) (fun _ -> failwith "map_of_hipero 1") + | _ -> failwith "map_of_hipero 2") + | Tuple[Val hipero; Val cost] -> StringMap.add StringMap.empty hipero (int_of_string cost) + | t -> failwith ("map_of_hipero: " ^ ENIAM_LCGstringOf.linear_term 0 t) + +let rec count_selprefs_cost tree cost = function + Ref i -> + if cost.(i) = -1 then + let c = count_selprefs_cost tree cost (ExtArray.get tree i) in + cost.(i) <- c; + c + else cost.(i) + | Node t -> + (count_selprefs_cost tree cost t.args) + + (match try get_attr "gf" t.attrs with Not_found -> Val "" with + Val "adjunct" -> 100 + | Val "subj" | Val "obj" | Val "arg" | Val "core" -> + let selprefs = try list_of_selprefs (get_attr "selprefs" t.attrs) with Not_found -> failwith "count_selprefs_cost: no selprefs" in + let hipero = try map_of_hipero (get_attr "hipero" t.attrs) with Not_found -> failwith "count_selprefs_cost: no hipero" in + Xlist.fold selprefs 1000 (fun cost selpref -> + try min cost (StringMap.find hipero selpref) with Not_found -> cost) + | Val "" -> 200 + | Val s -> failwith ("count_selprefs_cost: unknown gf=" ^ s ^ " for " ^ t.lemma) + | _ -> failwith "count_selprefs_cost") + | Variant(e,l) -> + Xlist.fold l max_int (fun min_c (_,t) -> + min min_c (count_selprefs_cost tree cost t)) + | Tuple l -> Xlist.fold l 0 (fun c t -> c + count_selprefs_cost tree cost t) + | Dot -> 0 + | t -> failwith ("count_selprefs_cost: " ^ ENIAM_LCGstringOf.linear_term 0 t) + +let selprefs tree = + let cost = Array.make (ExtArray.size tree) (-1) in + cost.(0) <- count_selprefs_cost tree cost (ExtArray.get tree 0); + Int.iter 0 (ExtArray.size tree - 1) (fun i -> + ExtArray.set tree i (snd (selprefs_rec cost (ExtArray.get tree i)))); + () diff --git a/exec/ENIAMexec.ml b/exec/ENIAMexec.ml index 40a4ceb..b09d326 100644 --- a/exec/ENIAMexec.ml +++ b/exec/ENIAMexec.ml @@ -392,11 +392,19 @@ let eniam_semantic_processing verbosity tokens lex_sems result = let tree = ENIAMsemValence.reduce_tree tokens lex_sems tree in let result = if verbosity < 2 then result else {result with dependency_tree8=tree} in tree,result - with e -> [| |],{result with status=SemError2; msg=Printexc.to_string e} in + with e -> ExtArray.make 0 Dot,{result with status=SemError2; msg=Printexc.to_string e} in + if result.status = SemError2 then result else + let result = + try + ENIAMsemValence.transfer_attributes tree; (* niejawna zmiana imperatywna w tree *) + result + with e -> {result with status=SemError2; msg=Printexc.to_string e} in if result.status = SemError2 then result else let tree,result = try - let tree = ENIAMdisambiguation.random_tree tokens lex_sems tree in + ENIAMdisambiguation.selprefs tree; (* niejawna zmiana imperatywna w tree *) + (* let tree = ENIAMdisambiguation.random_tree tokens lex_sems tree in *) + let tree = ExtArray.to_array tree in let result = if verbosity = 0 then result else {result with dependency_tree9=tree} in tree,result with e -> [| |],{result with status=SemError2; msg=Printexc.to_string e} in @@ -406,6 +414,9 @@ let eniam_semantic_processing verbosity tokens lex_sems result = let graph = ENIAMsemGraph.translate tokens lex_sems tree in let result = if verbosity = 0 then result else {result with semantic_graph10=graph} in let graph = ENIAMsemGraph.make_tree graph in + let graph = ENIAMsemGraph.simplify_tree graph in +(* let graph = ENIAMsemGraph.manage_quantification graph in *) + (* let graph = ENIAMsemGraph.simplify_gender graph in *) let result = if verbosity = 0 then result else {result with semantic_graph11=graph} in graph,result with e -> ENIAMsemTypes.Dot,{result with status=SemError2; msg=Printexc.to_string e} in diff --git a/exec/ENIAMexecTypes.ml b/exec/ENIAMexecTypes.ml index fb48bc3..4fe50be 100644 --- a/exec/ENIAMexecTypes.ml +++ b/exec/ENIAMexecTypes.ml @@ -43,7 +43,7 @@ type eniam_parse_result = { dependency_tree5: linear_term array; dependency_tree6: linear_term array; dependency_tree7: linear_term array; - dependency_tree8: linear_term array; + dependency_tree8: linear_term ExtArray.t; dependency_tree9: linear_term array; semantic_graph10: ENIAMsemTypes.linear_term array; semantic_graph11: ENIAMsemTypes.linear_term; @@ -183,7 +183,7 @@ let empty_eniam_parse_result = { dependency_tree5=[| |]; dependency_tree6=[| |]; dependency_tree7=[| |]; - dependency_tree8=[| |]; + dependency_tree8=ExtArray.make 0 Dot; dependency_tree9=[| |]; semantic_graph10=[| |]; semantic_graph11=ENIAMsemTypes.Dot; diff --git a/exec/ENIAMsemGraph.ml b/exec/ENIAMsemGraph.ml index 1d3d198..600f93b 100644 --- a/exec/ENIAMsemGraph.ml +++ b/exec/ENIAMsemGraph.ml @@ -69,20 +69,12 @@ let make_relation t c = Relation(t.role,t.role_attr,c) | s -> failwith ("make_relation: " (*^ s*)) -let create_normal_concept (*roles role_attrs*) tokens lex_sems t sem_args = -(* let sem_args = if t.pos = "pro" then - match get_person t.attrs with - "pri" -> ["indexical"] - | "sec" -> ["indexical"] - | "ter" -> ["coreferential";"deictic"] - | "" -> ["indexical";"coreferential";"deictic"] - | _ -> failwith "create_normal_concept: pro" - else sem_args in (* FIXME: przesunąć to do rozszerzania path_array *) - if t.agf = ENIAMwalTypes.NOSEM then t.args else*) +let create_normal_concept (*roles role_attrs*) tokens lex_sems t = + (*if t.agf = ENIAMwalTypes.NOSEM then t.args else*) let c = {empty_concept with c_sense = if t.lemma = "<root>" then Dot else Val t.meaning; c_relations=t.args; - c_quant=(*make_sem_args sem_args*)Dot;(* FIXME *) + c_quant=(*make_sem_args*) t.sem_args; c_variable=string_of_int t.id,""; c_pos=(*if t.id >= Array.length tokens then -1 else*) (ExtArray.get tokens t.id).ENIAMtokenizerTypes.beg; c_local_quant=true} in @@ -145,7 +137,8 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t sem_args = let c = Xlist.fold t.attrs c (fun c -> function | "ASPECT",_ -> c | "TENSE",Val t -> {c with c_relations=Tuple[c.c_relations;SingleRelation t]} - | "NEG",Val "+" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} + | "NEGATION",Val "aff" -> c + | "NEGATION",Val "neg" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} | e,t -> failwith ("create_normal_concept verb: " ^ e)) in let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in let _ = ExtArray.add lex_sems in @@ -181,7 +174,8 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t sem_args = | "ASPECT",_ -> c (* | "TYPE",Val "int" -> {c with c_quant=Tuple[c.c_quant;Val "interrogative"]} *) | "TYPE",_ -> c - | "NEG",Val "+" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} + | "NEGATION",Val "aff" -> c + | "NEGATION",Val "neg" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} | e,t -> failwith ("create_normal_concept adv: " ^ e)) in Relation(t.role,t.role_attr,Concept c) else if t.pos = "pro" || t.pos = "ppron12" || t.pos = "ppron3" || t.pos = "siebie" then (* FIXME: indexicalność *) @@ -193,6 +187,8 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t sem_args = | "CASE",_ -> c | "SYN",_ -> c | "NSEM",_ -> c + | "controller",_ -> c + | "coref",_ -> c | e,t -> failwith ("create_normal_concept pron: " ^ e)) in Relation(t.role,t.role_attr,Concept c) else if t.pos = "prep" then @@ -239,11 +235,12 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t sem_args = let c = Xlist.fold t.attrs c (fun c -> function | e,t -> failwith ("create_normal_concept sinterj: " ^ e)) in Concept c else + if t.lemma = "<root>" then t.args else if t.pos = "interp" && t.lemma = "</sentence>" then let l = List.rev (make_args_list t.args) in - Xlist.fold (List.tl l) (List.hd l) (fun t s -> AddRelation(RemoveRelation t,"Next","Clause",RemoveRelation s)) else + Xlist.fold (List.tl l) (RemoveRelation(List.hd l)) (fun t s -> AddRelation(t,"Next","Clause",RemoveRelation s)) else if t.pos = "interp" && t.lemma = "<sentence>" then t.args else - if t.pos = "interp" && t.lemma = "”s" then +(* if t.pos = "interp" && t.lemma = "”s" then let l = List.rev (make_args_list t.args) in let x = Xlist.fold (List.tl l) (List.hd l) (fun t s -> AddRelation(RemoveRelation t,"Next","Sentence",RemoveRelation s)) in Relation(t.arole,t.arole_attr,x) else (* FIXME: czy na pewno tu i w następnych arole a nie position.role? *) @@ -267,9 +264,9 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t sem_args = if t.pos = "interp" && t.lemma = "?" then SingleRelation("int") else if t.pos = "interp" && t.lemma = "„" then Relation(t.role,t.role_attr,RemoveRelation t.args) else - if t.pos = "interp" || t.lemma = "</or-sentence>" then Relation(t.role,t.role_attr,t.args) else ( + if t.pos = "interp" || t.lemma = "</or-sentence>" then Relation(t.role,t.role_attr,t.args) else*) ( if t.pos = "interp" then Node t else - if t.pos = "" then Relation(t.role,t.role_attr,t.args) else + (*if t.pos = "" then Relation(t.role,t.role_attr,t.args) else*) (* print_endline t.lemma; *) Node t) @@ -293,6 +290,7 @@ let rec translate_node tokens lex_sems t = | "arole-attr",Val s -> {t with arole_attr=s},attrs | "arev",Val "-" -> {t with arev=false},attrs | "arev",Val "+" -> {t with arev=true},attrs + | "agf",Val s -> t,attrs | "sem-args",s -> {t with sem_args=s},attrs | "fopinion",_ -> t,attrs | "sopinion",_ -> t,attrs @@ -309,6 +307,8 @@ let rec translate_node tokens lex_sems t = | "MOOD",s -> t,("MOOD",s) :: attrs | "TENSE",s -> t,("TENSE",s) :: attrs | "controller",s -> t,("controller",s) :: attrs + | "controllee",s -> t,("controllee",s) :: attrs + | "coref",s -> t,("coref",s) :: attrs | "CAT",_ -> t,attrs | "NUM",s -> t,("NUM",s) :: attrs | "CASE",s -> t,("CASE",s) :: attrs @@ -329,7 +329,7 @@ let rec translate_node tokens lex_sems t = and create_concepts tokens lex_sems = function ENIAM_LCGtypes.Node t -> let t = translate_node tokens lex_sems t in - create_normal_concept tokens lex_sems t [] + create_normal_concept tokens lex_sems t | ENIAM_LCGtypes.Tuple l -> Tuple(Xlist.map l (create_concepts tokens lex_sems)) | ENIAM_LCGtypes.Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, create_concepts tokens lex_sems t)) | ENIAM_LCGtypes.Dot -> Dot @@ -365,11 +365,11 @@ let rec make_tree_rec references = function (* | t -> failwith ("make_tree_rec: " ^ LCGstringOf.linear_term 0 t) *) let make_tree references = - RemoveRelation(make_tree_rec references references.(0)) -(* + (*RemoveRelation*)(make_tree_rec references references.(0)) + let rec simplify_tree_add_relation r a s = function - Concept c -> Concept{c with c_relations=Tuple[Relation(Val r,Val a,s);c.c_relations]} - | Context c -> Context{c with cx_relations=Tuple[Relation(Val r,Val a,s);c.cx_relations]} + Concept c -> Concept{c with c_relations=Tuple[Relation(r,a,s);c.c_relations]} + | Context c -> Context{c with cx_relations=Tuple[Relation(r,a,s);c.cx_relations]} | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, simplify_tree_add_relation r a s t)) | t -> AddRelation(t,r,a,s) @@ -465,13 +465,13 @@ let rec simplify_tree = function (* Variant(e,Xlist.map l (fun (i,t) -> i, simplify_tree t)) *) | Dot -> Dot | Val s -> Val s - | t -> failwith ("simplify_tree: " ^ LCGstringOf.linear_term 0 t) + | t -> failwith ("simplify_tree: " ^ ENIAMsemStringOf.linear_term 0 t) let rec manage_quantification2 (quants,quant) = function Tuple l -> Xlist.fold l (quants,quant) manage_quantification2 | Dot -> quants,quant | Val s -> quants,Tuple[Val s;quant] - | t -> (Relation(Val "Quantifier",Val "",t)) :: quants,quant + | t -> (Relation("Quantifier","",t)) :: quants,quant let rec manage_quantification = function Node t -> Node{t with args=manage_quantification t.args} @@ -488,9 +488,9 @@ let rec manage_quantification = function | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, manage_quantification t)) | Dot -> Dot | Val s -> Val s - | t -> failwith ("manage_quantification: " ^ LCGstringOf.linear_term 0 t) + | t -> failwith ("manage_quantification: " ^ ENIAMsemStringOf.linear_term 0 t) -let simplify_gender2 = function +(*let simplify_gender2 = function Variant(e,l) -> (try let l2 = List.sort compare (Xlist.rev_map l (function (_,Val s) -> s | _ -> raise Not_found)) in @@ -523,30 +523,30 @@ let rec simplify_gender = function | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, simplify_gender t)) | Dot -> Dot | Val s -> Val s - | t -> failwith ("simplify_gender: " ^ LCGstringOf.linear_term 0 t) + | t -> failwith ("simplify_gender: " ^ ENIAMsemStringOf.linear_term 0 t)*) (***************************************************************************************) - +(* let rec validate_semantics_quant = function Val _ -> true | Variant(e,l) -> Xlist.fold l true (fun b (_,t) -> b && validate_semantics_quant t) | Tuple l -> Xlist.fold l true (fun b t -> b && validate_semantics_quant t) | Dot -> true - | t -> (*print_endline ("validate_semantics_quant: " ^ LCGstringOf.linear_term 0 t);*) false + | t -> (*print_endline ("validate_semantics_quant: " ^ ENIAMsemStringOf.linear_term 0 t);*) false let rec validate_semantics_sense = function Val _ -> true | Dot -> true - | t -> (*print_endline ("validate_semantics_sense: " ^ LCGstringOf.linear_term 0 t);*) false + | t -> (*print_endline ("validate_semantics_sense: " ^ ENIAMsemStringOf.linear_term 0 t);*) false let rec validate_semantics_rel_name = function Val _ -> true - | t -> (*print_endline ("validate_semantics_rel_name: " ^ LCGstringOf.linear_term 0 t);*) false + | t -> (*print_endline ("validate_semantics_rel_name: " ^ ENIAMsemStringOf.linear_term 0 t);*) false let rec validate_semantics = function Context c -> validate_semantics_sense c.cx_sense && validate_semantics_contents c.cx_contents && validate_semantics_relations c.cx_relations | Variant(e,l) -> Xlist.fold l true (fun b (_,t) -> b && validate_semantics t) - | t -> (*print_endline ("validate_semantics: " ^ LCGstringOf.linear_term 0 t);*) false + | t -> (*print_endline ("validate_semantics: " ^ ENIAMsemStringOf.linear_term 0 t);*) false and validate_semantics_relations = function SingleRelation r -> validate_semantics_rel_name r @@ -555,20 +555,20 @@ and validate_semantics_relations = function | Variant(e,l) -> Xlist.fold l true (fun b (_,t) -> b && validate_semantics_relations t) | Tuple l -> Xlist.fold l true (fun b t -> b && validate_semantics_relations t) | Dot -> true - | t -> (*print_endline ("validate_semantics_relations: " ^ LCGstringOf.linear_term 0 t);*) false + | t -> (*print_endline ("validate_semantics_relations: " ^ ENIAMsemStringOf.linear_term 0 t);*) false and validate_semantics_concept = function Concept c -> validate_semantics_sense c.c_sense && validate_semantics_sense c.c_name && validate_semantics_quant c.c_quant && validate_semantics_relations c.c_relations | Context c -> validate_semantics_sense c.cx_sense && validate_semantics_contents c.cx_contents && validate_semantics_relations c.cx_relations | Variant(e,l) -> Xlist.fold l true (fun b (_,t) -> b && validate_semantics_concept t) - | t -> (*print_endline ("validate_semantics_concept: " ^ LCGstringOf.linear_term 0 t);*) false + | t -> (*print_endline ("validate_semantics_concept: " ^ ENIAMsemStringOf.linear_term 0 t);*) false and validate_semantics_contents = function Concept c -> validate_semantics_concept (Concept c) | Context c -> validate_semantics_concept (Context c) | Variant(e,l) -> Xlist.fold l true (fun b (_,t) -> b && validate_semantics_contents t) | Tuple l -> Xlist.fold l true (fun b t -> b && validate_semantics_contents t) - | t -> (*print_endline ("validate_semantics_contents: " ^ LCGstringOf.linear_term 0 t);*) false + | t -> (*print_endline ("validate_semantics_contents: " ^ ENIAMsemStringOf.linear_term 0 t);*) false (***************************************************************************************) @@ -596,7 +596,7 @@ let rec find_multiple_variants v m = function Xlist.fold vl v StringSet.union, m | Dot -> v,m | Val s -> v,m - | t -> failwith ("find_multiple_variants: " ^ LCGstringOf.linear_term 0 t) + | t -> failwith ("find_multiple_variants: " ^ ENIAMsemStringOf.linear_term 0 t) type variant_structure = C of variant_structure * variant_structure @@ -636,7 +636,7 @@ let rec create_variant_structure = function n,V(e,n,List.rev l) | Dot -> 1,E | Val s -> 1,E - | t -> failwith ("create_variant_structure: " ^ LCGstringOf.linear_term 0 t) + | t -> failwith ("create_variant_structure: " ^ ENIAMsemStringOf.linear_term 0 t) let rec get_all_variants = function Concept c -> @@ -667,7 +667,7 @@ let rec get_all_variants = function List.rev (Xlist.fold l [] (fun l (_,t) -> get_all_variants t @ l)) | Dot -> [Dot] | Val s -> [Val s] - | t -> failwith ("get_all_variants: " ^ LCGstringOf.linear_term 0 t) + | t -> failwith ("get_all_variants: " ^ ENIAMsemStringOf.linear_term 0 t) let _ = Random.self_init () @@ -693,7 +693,7 @@ let rec draw_variant = function draw_variant (s,t) | E,Dot -> Dot | E,Val s -> Val s - | s,t -> (*print_endline ("draw_variant: " ^ LCGstringOf.linear_term 0 t);*) failwith ("draw_variant: " ^ string_of_variant_structure s) + | s,t -> (*print_endline ("draw_variant: " ^ ENIAMsemStringOf.linear_term 0 t);*) failwith ("draw_variant: " ^ string_of_variant_structure s) let rec get_some_variants chosen = function Concept c -> (* FIXME: czy pozostałe atrybuty można pominąć? *) @@ -715,7 +715,7 @@ let rec get_some_variants chosen = function else Variant(e,Xlist.map l (fun (i,t) -> i,get_some_variants chosen t)) | Dot -> Dot | Val s -> Val s - | t -> failwith ("get_some_variants: " ^ LCGstringOf.linear_term 0 t) + | t -> failwith ("get_some_variants: " ^ ENIAMsemStringOf.linear_term 0 t) let get_all_multiple_variants t mv = let ll = StringMap.fold mv [] (fun ll e l -> @@ -769,7 +769,7 @@ let draw_trees max_n t = let n,s = create_variant_structure t in n,s,t) in let sum_n = Xlist.fold multiple_variants 0 (fun sum_n (n,_,_) -> sum_n + n) in -(* print_endline (LCGstringOf.linear_term 0 t); +(* print_endline (ENIAMsemStringOf.linear_term 0 t); print_endline (string_of_variant_structure s);*) if sum_n <= max_n then List.flatten (Xlist.rev_map multiple_variants (fun (n,s,t) -> diff --git a/exec/ENIAMsemValence.ml b/exec/ENIAMsemValence.ml index c3626fb..079bf8f 100644 --- a/exec/ENIAMsemValence.ml +++ b/exec/ENIAMsemValence.ml @@ -27,6 +27,48 @@ type pos = {role: linear_term; role_attr: linear_term; selprefs: linear_term; gf cr: string list; ce: string list; is_necessary: bool; is_pro: bool; is_prong: bool; is_multi: bool; dir: string; morfs: StringSet.t} +let get_pro_lemma attrs = + let pers,num,gend = Xlist.fold attrs ("","",[]) (fun (pers,num,gend) -> function + "PERS",Val s -> s,num,gend + | "NUM",Val s -> pers,s,gend + | "GEND",Val s -> pers,num,[s] + | "GEND",Variant(_,l) -> pers,num,Xlist.map l (function (_,Val s) -> s | _ -> failwith "get_pro_lemma") + | _ -> failwith "get_pro_lemma") in + match pers,num with + "",_ -> "pro" + | "pri","" -> "pro1" + | "pri","sg" -> "ja" + | "pri","pl" -> "my" + | "sec","" -> "pro2" + | "sec","sg" -> "ty" + | "sec","pl" -> "wy" + | "ter","" -> "pro3" + | "ter","sg" -> + (match Xlist.fold gend (false,false,false) (fun (m,n,f) -> function + "m1" -> true,n,f + | "m2" -> true,n,f + | "m3" -> true,n,f + | "n1" -> m,true,f + | "n2" -> m,true,f + | "f" -> m,n,true + | _ -> m,n,f) with + true,false,false -> "on" + | false,true,true -> "ono" + | false,false,true -> "ona" + | _ -> "pro3sg") + | "ter","pl" -> + (match Xlist.fold gend (false,false) (fun (mo,nmo) -> function + "m1" -> true,nmo + | "p1" -> true,nmo + | _ -> mo,true) with + true,false -> "oni" + | false,true -> "one" + | _ -> "pro3pl") + | _ -> failwith "get_pro_lemma" + +let make_sem_args sem_args = + if sem_args = [] then Dot else ENIAM_LCGrules.make_variant (Xlist.map sem_args (fun s -> Val s)) + let match_value v2 = function Val v -> if v = v2 then Val v else raise Not_found | _ -> failwith "match_value" @@ -102,23 +144,24 @@ let rec match_arg_positions arg rev = function (match l with [] -> (*print_endline "match_arg_positions: not matched";*) match_arg_positions arg (p :: rev) positions | [t] -> + let t = SetAttr("gf",Val (ENIAMwalStringOf.gf p.gf),t) in let t = if p.gf = ENIAMwalTypes.SUBJ || p.gf = ENIAMwalTypes.OBJ || p.gf = ENIAMwalTypes.ARG then SetAttr("role",p.role,SetAttr("role-attr",p.role_attr,SetAttr("selprefs",p.selprefs,t))) - else if p.gf = ENIAMwalTypes.ADJUNCT then t else failwith "match_arg_positions: ni 2" in - let t = SetAttr("gf",Val (ENIAMwalStringOf.gf p.gf),t) in + else if p.gf = ENIAMwalTypes.ADJUNCT || p.gf = ENIAMwalTypes.NOSEM || p.gf = ENIAMwalTypes.CORE then t else failwith "match_arg_positions: ni 2" in let t = Xlist.fold p.cr t (fun t cr -> SetAttr("controller",Val cr,t)) in let t = Xlist.fold p.ce t (fun t ce -> SetAttr("controllee",Val ce,t)) in + let t = if p.gf = ENIAMwalTypes.NOSEM then Dot else t in if p.is_multi then (t, rev @ (p :: positions)) :: (match_arg_positions arg (p :: rev) positions) else (t, rev @ positions) :: (match_arg_positions arg (p :: rev) positions) | _ -> failwith "match_arg_positions: ni") | [] -> (*Printf.printf "match_arg_positions: arg=%s rev=[%s] positions=[]\n%!" (string_of_arg arg) (String.concat "; " (Xlist.map rev string_of_position));*) [] (* Jeśli ta funkcja zwróci pustą listę, oznacza to, że argumentów nie dało się dopasować do pozycji *) -let rec match_args_positions_rec positions = function +let rec match_args_positions_rec prong_attrs positions = function arg :: args -> (* Printf.printf "match_args_positions_rec: args=%s :: [%s] positions=[%s]\n%!" (string_of_arg arg) (String.concat "; " (Xlist.map args string_of_arg)) (String.concat "; " (Xlist.map positions string_of_position)); *) Xlist.fold (match_arg_positions arg [] positions) [] (fun found (arg_pos,positions) -> - Xlist.fold (match_args_positions_rec positions args) found (fun found l -> (arg_pos :: l) :: found)) + Xlist.fold (match_args_positions_rec prong_attrs positions args) found (fun found l -> (arg_pos :: l) :: found)) | [] -> (* Printf.printf "match_args_positions_rec: args=[] positions=[%s]\n%!" (String.concat "; " (Xlist.map positions string_of_position)); *) let b = Xlist.fold positions false (fun b p -> p.is_necessary || b) in @@ -126,8 +169,10 @@ let rec match_args_positions_rec positions = function if b then [] else [Xlist.fold positions [] (fun found p -> if not p.is_pro then found else - let attrs = ["role",p.role; "role-attr",p.role_attr; "selprefs",p.selprefs; "gf",Val (ENIAMwalStringOf.gf p.gf)] in - let attrs = if p.is_prong then attrs else attrs in (* FIXME: dodać number, gender *) + let attrs = if p.is_prong then prong_attrs else [] in (* FIXME: dodać number, gender *) + let lemma = get_pro_lemma attrs in + let sem_args = try StringMap.find ENIAMlexSemanticsData.pron_sem_args lemma with Not_found -> failwith "match_args_positions_rec" in + let attrs = ["meaning",Val lemma;"role",p.role; "role-attr",p.role_attr; "selprefs",p.selprefs; "gf",Val (ENIAMwalStringOf.gf p.gf); "agf",Val ""; "sem-args",make_sem_args sem_args] @ attrs in let attrs = Xlist.fold p.cr attrs (fun attrs cr -> ("controller",Val cr) :: attrs) in let attrs = Xlist.fold p.ce attrs (fun attrs ce -> ("controllee",Val ce) :: attrs) in Node{ENIAM_LCGrenderer.empty_node with lemma="pro"; pos="pro"; attrs=attrs} :: found)] @@ -135,9 +180,9 @@ let rec match_args_positions_rec positions = function (* FIXME: opcjonalność podrzędników argumentów zleksykalizowanych *) (* Jeśli ta funkcja zwróci pustą listę, oznacza to, że argumentów nie dało się dopasować do pozycji *) -let match_args_positions args positions = +let match_args_positions prong_attrs args positions = (* Printf.printf "match_args_positions: args=[%s] positions=[%s]\n%!" (String.concat "; " (Xlist.map args string_of_arg)) (String.concat "; " (Xlist.map positions string_of_position)); *) - Xlist.rev_map (match_args_positions_rec positions args) (function + Xlist.rev_map (match_args_positions_rec prong_attrs positions args) (function [] -> Dot | [t] -> t | l -> Tuple l) @@ -186,8 +231,8 @@ let translate_position id p = is_prong = p.ENIAMwalTypes.is_necessary = ENIAMwalTypes.ProNG; is_multi = p.ENIAMwalTypes.is_necessary = ENIAMwalTypes.Multi; dir= translate_dir p.ENIAMwalTypes.dir; - morfs = Xlist.fold p.ENIAMwalTypes.morfs StringSet.empty (fun morfs morf -> - if morf = ENIAMwalTypes.LCG One then (Printf.printf "translate_position: One%!"; morfs) else + morfs = if p.ENIAMwalTypes.morfs=[ENIAMwalTypes.LCG One] then StringSet.empty else Xlist.fold p.ENIAMwalTypes.morfs StringSet.empty (fun morfs morf -> + if morf = ENIAMwalTypes.LCG One then (Printf.printf "translate_position: One%!\n"; morfs) else StringSet.add morfs (string_of_morf morf))} let get_phrase_symbol = function @@ -201,6 +246,13 @@ let get_phrase_symbol = function exception NoFrame of string * string +let get_prong_attrs attrs = + Xlist.fold attrs [] (fun attrs -> function + "NUM",t -> ("NUM",t) :: attrs + | "GEND",t -> ("GEND",t) :: attrs + | "PERS",t -> ("PERS",t) :: attrs + | _ -> attrs) + let rec assign_frames_rec tokens lex_sems tree arg_symbols visited = function Ref i -> if IntSet.mem visited i then Ref i,visited else @@ -210,12 +262,14 @@ let rec assign_frames_rec tokens lex_sems tree arg_symbols visited = function | Node t -> let args,visited = assign_frames_rec tokens lex_sems tree arg_symbols visited t.args in let t = {t with args=args} in + (* print_endline ("assign_frames_rec: " ^ t.lemma); *) if t.symbol = Dot then Node t,visited else let args = get_arg_symbols_tuple arg_symbols [] args in let s = ExtArray.get lex_sems t.id in let symbol = get_phrase_symbol t.symbol in let frames = Xlist.fold s.ENIAMlexSemanticsTypes.frames [] (fun frames frame -> (* print_endline ("selectors: " ^ ENIAMcategoriesPL.string_of_selectors frame.selectors); *) + (* Printf.printf "assign_frames_rec: lemma=%s positions=[%s]\n%!" t.lemma (ENIAMwalStringOf.schema frame.positions); *) try let attrs = apply_selectors t.attrs frame.selectors in let frame = ENIAMsemLexicon.extend_frame symbol frame in @@ -223,11 +277,12 @@ let rec assign_frames_rec tokens lex_sems tree arg_symbols visited = function (attrs,frame,Xlist.rev_map frame.positions (translate_position (string_of_int t.id))) :: frames with Not_found -> (*print_endline "rejected";*) frames) in if frames = [] then failwith "assign_frames_rec: no frame" else + let prong_attrs = get_prong_attrs t.attrs in let e = ENIAM_LCGreductions.get_variant_label () in let l,_ = Xlist.fold frames ([],1) (fun (l,n) (attrs,frame,positions) -> (* Printf.printf "assign_frames_rec: lemma=%s args=[%s] positions=[%s]\n%!" t.lemma (String.concat "; " (Xlist.map args string_of_arg)) (String.concat "; " (Xlist.map positions string_of_position)); *) if frame.meanings = [] then failwith ("assign_frames_rec: no meanings '" ^ t.lemma ^ "'") else - Xlist.fold (match_args_positions args positions) (l,n) (fun (l,n) args -> + Xlist.fold (match_args_positions prong_attrs args positions) (l,n) (fun (l,n) args -> Xlist.fold frame.meanings (l,n) (fun (l,n) (meaning,hipero,weight) -> (string_of_int n, Node{t with attrs= ("meaning",Val meaning) :: @@ -235,7 +290,8 @@ let rec assign_frames_rec tokens lex_sems tree arg_symbols visited = function ("arole",Val frame.arole) :: ("arole-attr",Val frame.arole_attr) :: ("arev",Val (if frame.arev then "+" else "-")) :: - ("sem-args",if frame.sem_args = [] then Dot else ENIAM_LCGrules.make_variant (Xlist.map frame.sem_args (fun s -> Val s))) :: + ("agf",Val frame.agf) :: + ("sem-args",make_sem_args frame.sem_args) :: ("fopinion",Val (ENIAMwalStringOf.opinion frame.fopinion)) :: ("sopinion",Val (ENIAMwalStringOf.opinion frame.sopinion)) :: t.attrs; args=args}) :: l,n+1))) in @@ -284,6 +340,18 @@ let assign_frames tokens lex_sems tree = let _ = assign_frames_rec tokens lex_sems tree arg_symbols IntSet.empty (Ref 0) in tree +let rec extract_attr pat rev = function + [] -> raise Not_found + | (s,v) :: l -> + if s = pat then (List.rev rev) @ l, v + else extract_attr pat ((s,v) :: rev) l + +let rec get_attr pat = function + [] -> raise Not_found + | (s,v) :: l -> + if s = pat then v + else get_attr pat l + let rec cut_nodes result_tree = function | Node t -> let i = ExtArray.add result_tree (Node t) in @@ -297,6 +365,22 @@ let rec cut_nodes result_tree = function | Dot -> Dot | t -> failwith ("cut_nodes: " ^ ENIAM_LCGstringOf.linear_term 0 t) +exception AGF + +let rec manage_agf = function + | Node t -> + let attrs,agf = try extract_attr "agf" [] t.attrs with Not_found -> failwith "manage_agf" in + let gf = try get_attr "gf" t.attrs with Not_found -> Dot in (* FIXME: to by się chyba przydało poprawić, żeby gf było zawsze ustalone *) + if agf = Val "" || agf=gf then Node{t with attrs=attrs} else raise AGF + | Variant(e,l) -> + let l = Xlist.fold l [] (fun l (i,t) -> try (i, manage_agf t) :: l with AGF -> l) in + if l = [] then raise AGF else Variant(e,List.rev l) + | Tuple l -> + let l = Xlist.rev_map l manage_agf in + Tuple(List.rev l) + | Dot -> Dot + | t -> failwith ("cut_nodes: " ^ ENIAM_LCGstringOf.linear_term 0 t) + let rec reduce_set_attr attr v = function Node t -> Node{t with attrs=(attr,v) :: t.attrs} | Variant(e,l) -> @@ -313,6 +397,7 @@ let rec reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree = functio | Node t -> let args = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree t.args in (* print_endline ("reduce_tree_rec 1: " ^ ENIAM_LCGstringOf.linear_term 0 args); *) + let args = try manage_agf args with AGF -> failwith "reduce_tree_rec: AGF" in let args = cut_nodes result_tree args in (* print_endline ("reduce_tree_rec 2: " ^ ENIAM_LCGstringOf.linear_term 0 args); *) let id = @@ -341,4 +426,96 @@ let reduce_tree tokens lex_sems orig_tree = let _ = ExtArray.add result_tree Dot in let t = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree orig_tree.(0) in ExtArray.set result_tree 0 t; - ExtArray.to_array result_tree + result_tree + +let is_subj = function + | Node t -> + let gf = try get_attr "gf" t.attrs with Not_found -> failwith "is_subj" in + gf = Val "subj" + | t -> failwith ("is_subj: " ^ ENIAM_LCGstringOf.linear_term 0 t) + +let is_core = function + | Node t -> + let gf = try get_attr "gf" t.attrs with Not_found -> failwith "is_core" in + gf = Val "core" + | t -> failwith ("is_core: " ^ ENIAM_LCGstringOf.linear_term 0 t) + +let set_subj_coref ce = function + | Node t -> + let gf = try get_attr "gf" t.attrs with Not_found -> failwith "set_subj_coref" in + if gf = Val "subj" then Node{t with attrs=("coref",ce) :: t.attrs} else Node t + | t -> failwith ("set_subj_coref: " ^ ENIAM_LCGstringOf.linear_term 0 t) + +let set_core_selprefs selprefs = function (* FIXME: trzeba usunąć dotychczasowe selprefs. *) + | Node t -> + let gf = try get_attr "gf" t.attrs with Not_found -> failwith "set_core_selprefs" in + if gf = Val "core" then Node{t with attrs=("selprefs",selprefs) :: t.attrs} else Node t + | t -> failwith ("set_core_selprefs: " ^ ENIAM_LCGstringOf.linear_term 0 t) + +let rec set_subj_coref_args tree ce = function + Ref i -> + if is_subj (ExtArray.get tree i) then + let id = ExtArray.add tree (set_subj_coref ce (ExtArray.get tree i)) in + Ref id + else Ref i + | Variant(e,l) -> + let l = Xlist.rev_map l (fun (i,t) -> i, set_subj_coref_args tree ce t) in + Variant(e,List.rev l) + | Tuple l -> + let l = Xlist.rev_map l (set_subj_coref_args tree ce) in + Tuple(List.rev l) + | Dot -> Dot + | t -> failwith ("set_subj_coref_args: " ^ ENIAM_LCGstringOf.linear_term 0 t) + +let rec set_selprefs_core tree selprefs = function + Ref i -> + if is_core (ExtArray.get tree i) then + let id = ExtArray.add tree (set_core_selprefs selprefs (ExtArray.get tree i)) in + Ref id + else Ref i + | Variant(e,l) -> + let l = Xlist.rev_map l (fun (i,t) -> i, set_selprefs_core tree selprefs t) in + Variant(e,List.rev l) + | Tuple l -> + let l = Xlist.rev_map l (set_selprefs_core tree selprefs) in + Tuple(List.rev l) + | Dot -> Dot + | t -> failwith ("set_subj_coref_args: " ^ ENIAM_LCGstringOf.linear_term 0 t) + +let rec transfer_attributes_rec tree visited = function + Ref i -> + if visited.(i) then Ref i else ( + visited.(i) <- true; + ExtArray.set tree i (transfer_attributes_rec tree visited (ExtArray.get tree i)); + Ref i) + | Node t -> + let t = {t with args = transfer_attributes_rec tree visited t.args} in + (* print_endline ("transfer_attributes_rec 1: " ^ ENIAM_LCGstringOf.linear_term 0 args); *) + let t = + if t.pos = "inf" || t.pos = "pcon" || t.pos = "pant" then + try + let attrs,ce = extract_attr "controllee" [] t.attrs in + let args = set_subj_coref_args tree ce t.args in + {t with attrs=attrs; args=args} + with Not_found -> t else + if t.pos = "prep" && get_attr "gf" t.attrs = Val "arg" then + let attrs,selprefs = extract_attr "selprefs" [] t.attrs in + let args = set_selprefs_core tree selprefs t.args in + {t with attrs=("selprefs", Val "ALL") :: attrs; args=args} + else t in + Node t + | Variant(e,l) -> + let l = Xlist.rev_map l (fun (i,t) -> i, transfer_attributes_rec tree visited t) in + Variant(e,List.rev l) + | Tuple l -> + let l = Xlist.rev_map l (transfer_attributes_rec tree visited) in + Tuple(List.rev l) + | Dot -> Dot + | t -> failwith ("transfer_attributes_rec: " ^ ENIAM_LCGstringOf.linear_term 0 t) + +let transfer_attributes tree = + let visited = Array.make (ExtArray.size tree) false in + visited.(0) <- true; + let t = transfer_attributes_rec tree visited (ExtArray.get tree 0) in + ExtArray.set tree 0 t; + () diff --git a/exec/ENIAMvisualization.ml b/exec/ENIAMvisualization.ml index 0767c54..a3647ea 100644 --- a/exec/ENIAMvisualization.ml +++ b/exec/ENIAMvisualization.ml @@ -928,10 +928,12 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam "" | SemParsed -> if verbosity < 2 then () else ( - ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_6_dependency_tree") "a4" result.dependency_tree6; - ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_7_dependency_tree") "a4" result.dependency_tree7; - ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_8_dependency_tree") "a4" result.dependency_tree8; - ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") "a4" result.dependency_tree9); + ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_6_dependency_tree") result.dependency_tree6; + ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") result.dependency_tree9; + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_6_dependency_tree") "a3" result.dependency_tree6; + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_7_dependency_tree") "a3" result.dependency_tree7; + ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_8_dependency_tree") "a3" result.dependency_tree8; + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") "a3" result.dependency_tree9); if verbosity = 0 then () else ( ENIAMsemLatexOf.print_semantic_graph path (file_prefix ^ "_10_semantic_graph") "a3" result.semantic_graph10; ENIAMsemGraphOf.print_semantic_graph2 path (file_prefix ^ "_11_semantic_graph") "" result.semantic_graph11); @@ -940,24 +942,26 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam sprintf "<BR><A HREF=\"%s_6_dependency_tree.pdf\">Dependency Tree References 6</A>\n" file_prefix ^ sprintf "<BR><A HREF=\"%s_7_dependency_tree.pdf\">Dependency Tree References 7</A>\n" file_prefix ^ sprintf "<BR><A HREF=\"%s_8_dependency_tree.pdf\">Dependency Tree References 8</A>\n" file_prefix ^ - sprintf "<BR><A HREF=\"%s_9_dependency_tree.pdf\">Dependency Tree References 9</A>\n" file_prefix) ^ + sprintf "<BR><A HREF=\"%s_9_dependency_tree.pdf\">Dependency Tree References 9</A>\n" file_prefix ^ + sprintf "<BR><IMG SRC=\"%s_6_dependency_tree.png\">\n" file_prefix ^ + sprintf "<BR><IMG SRC=\"%s_9_dependency_tree.png\">\n" file_prefix) ^ (if verbosity = 0 then "" else sprintf "<BR><A HREF=\"%s_10_semantic_graph.pdf\">Semantic Graph References 10</A>\n" file_prefix ^ - sprintf "<BR><IMG SRC=\"%s_11_semantic_graph.png\">\n" file_prefix) ^ + sprintf "<BR><IMG SRC=\"%s_11_semantic_graph.png\">\n" file_prefix) ^ "" | SemError2 -> if verbosity = 0 then () else ( - ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_6_dependency_tree") "a4" result.dependency_tree6; + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_6_dependency_tree") "a3" result.dependency_tree6; ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_6_dependency_tree") result.dependency_tree6; - if result.dependency_tree7 <> [| |] then ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_7_dependency_tree") "a4" result.dependency_tree7; - if result.dependency_tree8 <> [| |] then ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_8_dependency_tree") "a4" result.dependency_tree8; - if result.dependency_tree9 <> [| |] then ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") "a4" result.dependency_tree9; + if result.dependency_tree7 <> [| |] then ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_7_dependency_tree") "a3" result.dependency_tree7; + if ExtArray.size result.dependency_tree8 <> 0 then ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_8_dependency_tree") "a3" result.dependency_tree8; + if result.dependency_tree9 <> [| |] then ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") "a3" result.dependency_tree9; if result.dependency_tree9 <> [| |] then ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") result.dependency_tree9); sprintf "error_sem2: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.msg result.paths_size result.chart_size result.dependency_tree_size ^ (if verbosity = 0 then "" else sprintf "<BR><A HREF=\"%s_6_dependency_tree.pdf\">Dependency Tree References 6</A>\n" file_prefix ^ (if result.dependency_tree7 <> [| |] then sprintf "<BR><A HREF=\"%s_7_dependency_tree.pdf\">Dependency Tree References 7</A>\n" file_prefix else "") ^ - (if result.dependency_tree8 <> [| |] then sprintf "<BR><A HREF=\"%s_8_dependency_tree.pdf\">Dependency Tree References 8</A>\n" file_prefix else "") ^ + (if ExtArray.size result.dependency_tree8 <> 0 then sprintf "<BR><A HREF=\"%s_8_dependency_tree.pdf\">Dependency Tree References 8</A>\n" file_prefix else "") ^ (if result.dependency_tree9 <> [| |] then sprintf "<BR><A HREF=\"%s_9_dependency_tree.pdf\">Dependency Tree References 9</A>\n" file_prefix else "") ^ (if result.dependency_tree9 <> [| |] then sprintf "<BR><IMG SRC=\"%s_9_dependency_tree.png\">\n" file_prefix else "") ^ sprintf "<BR><IMG SRC=\"%s_6_dependency_tree.png\">\n" file_prefix) ^ diff --git a/exec/resources/lexicon-pl.dic b/exec/resources/lexicon-pl.dic index 9119b74..9be709e 100644 --- a/exec/resources/lexicon-pl.dic +++ b/exec/resources/lexicon-pl.dic @@ -34,9 +34,12 @@ num: \(1+qub):adjunct /(1+inclusion):adjunct; measure: \(1+num*number*case*gender*person*congr+num*number*case*gender*person*rec):Count \(1+qub):adjunct /(1+inclusion):adjunct; -prepnp: \(1+advp*T):adjunct /(np*T*case*T*T+day-month+day+year+date+hour+hour-minute):unk \(1+qub):adjunct /(1+inclusion):adjunct; -prepadjp: \(1+advp*T):adjunct /(adjp*T*case*T+adjp*sg*dat*m1+adjp*T*postp*T+adjp*sg*nom*f+advp*T):unk \(1+qub):adjunct /(1+inclusion):adjunct; -compar: \(1+advp*T):adjunct /(np*T*case*T*T+prepnp*T*T+prepadjp*T*T):unk \(1+qub):adjunct /(1+inclusion):adjunct; +#prepnp: \(1+advp*T):adjunct /(np*T*case*T*T+day-month+day+year+date+hour+hour-minute):unk \(1+qub):adjunct /(1+inclusion):adjunct; +#prepadjp: \(1+advp*T):adjunct /(adjp*T*case*T+adjp*sg*dat*m1+adjp*T*postp*T+adjp*sg*nom*f+advp*T):unk \(1+qub):adjunct /(1+inclusion):adjunct; +#compar: \(1+advp*T):adjunct /(np*T*case*T*T+prepnp*T*T+prepadjp*T*T):unk \(1+qub):adjunct /(1+inclusion):adjunct; +prepnp: \(1+advp*T):adjunct \(1+qub):adjunct /(1+inclusion):adjunct; +prepadjp: \(1+advp*T):adjunct \(1+qub):adjunct /(1+inclusion):adjunct; +compar: \(1+advp*T):adjunct \(1+qub):adjunct /(1+inclusion):adjunct; adjp: \(1+qub):adjunct /(1+inclusion):adjunct \(1+adja):unk; diff --git a/lexSemantics/ENIAMlexSemantics.ml b/lexSemantics/ENIAMlexSemantics.ml index b927c8b..dcfd873 100644 --- a/lexSemantics/ENIAMlexSemantics.ml +++ b/lexSemantics/ENIAMlexSemantics.ml @@ -190,17 +190,23 @@ let add_sem_args lemma pos frame = let assign_prep_semantics lemma = let roles = try StringMap.find ENIAMlexSemanticsData.prep_roles lemma with Not_found -> [] in - Printf.printf "assign_prep_semantics: |roles|=%d\n%!" (Xlist.size roles); + (* Printf.printf "assign_prep_semantics: |roles|=%d\n%!" (Xlist.size roles); *) + {empty_frame with + meanings = [find_prep_meaning lemma [Predef "ALL"]]; + positions= [{empty_position with + dir=if lemma="temu" then Backward_ else Forward_; gf=CORE; + morfs=ENIAMwalRenderer.prep_morfs; is_necessary=Req}]; + agf="arg"} :: Xlist.map roles (function (case,arole,arole_attr,hipero,sel_prefs) -> - Printf.printf "assign_prep_semantics: case=%s arole=%s arole_attr=%s\n%!" case arole arole_attr; + (* Printf.printf "assign_prep_semantics: case=%s arole=%s arole_attr=%s\n%!" case arole arole_attr; *) let meaning = find_prep_meaning lemma hipero in (* FIXME: zaślepka dla meaning i weight *) - print_endline "assign_prep_semantics 1"; + (* print_endline "assign_prep_semantics 1"; *) let positions = [{empty_position with sel_prefs=sel_prefs; dir=if lemma="temu" then Backward_ else Forward_; - morfs=ENIAMwalRenderer.assing_pref_morfs (lemma,case); is_necessary=Req}] in - print_endline "assign_prep_semantics 2"; + morfs=ENIAMwalRenderer.prep_morfs(*ENIAMwalRenderer.assing_prep_morfs (lemma,case)*); is_necessary=Req}] in + (* print_endline "assign_prep_semantics 2"; *) {empty_frame with selectors=[ENIAM_LCGlexiconTypes.Case,ENIAM_LCGlexiconTypes.Eq,[case]]; meanings=[meaning]; positions=find_selprefs positions; - arole=arole; arole_attr=arole_attr; arev=false}) + arole=arole; arole_attr=arole_attr; arev=false; agf="adjunct"}) let assign_num_semantics lemma = let sems = try StringMap.find !num_sem lemma with Not_found -> [] in @@ -267,7 +273,7 @@ let assign_valence tokens lex_sems group = List.flatten (Xlist.rev_map (ENIAMvalence.transform_entry pos lemma neg pred aspect schema1) (fun (selectors,schema) -> Xlist.rev_map (ENIAMvalence.get_aroles schema1 lemma pos) (fun (sel,arole,arole_attr,arev) -> {selectors=sel @ selectors; meanings=Xlist.map meanings find_meaning; positions=schema; - arole=arole; arole_attr=arole_attr; arev=arev; sem_args=[]; sopinion=sopinion; fopinion=fopinion}))))) in + arole=arole; arole_attr=arole_attr; arev=arev; agf=""; sem_args=[]; sopinion=sopinion; fopinion=fopinion}))))) in (* Printf.printf "E %s |connected|=%d\n" lemma (Xlist.size connected); *) let connected = if connected = [] then List.flatten (Xlist.rev_map (make_unique schemata1) (semantize lemma pos)) else connected in (* Printf.printf "F %s |connected|=%d\n" lemma (Xlist.size connected); *) diff --git a/lexSemantics/ENIAMlexSemanticsTypes.ml b/lexSemantics/ENIAMlexSemanticsTypes.ml index ce2b229..9e32ba5 100644 --- a/lexSemantics/ENIAMlexSemanticsTypes.ml +++ b/lexSemantics/ENIAMlexSemanticsTypes.ml @@ -27,13 +27,14 @@ type frame = { arole: string; arole_attr: string; arev: bool; + agf: string; sem_args: string list; (* has_context: bool; *) sopinion: ENIAMwalTypes.opinion; fopinion: ENIAMwalTypes.opinion; } -let empty_frame = {selectors=[]; meanings=[]; positions=[]; arole=""; arole_attr=""; arev=false; sem_args=[]; (*has_context=false;*) +let empty_frame = {selectors=[]; meanings=[]; positions=[]; arole=""; arole_attr=""; arev=false; agf=""; sem_args=[]; (*has_context=false;*) sopinion=ENIAMwalTypes.Nieokreslony; fopinion=ENIAMwalTypes.Nieokreslony} type lex_sem = { diff --git a/lexSemantics/ENIAMwalRenderer.ml b/lexSemantics/ENIAMwalRenderer.ml index c8e153a..985999f 100644 --- a/lexSemantics/ENIAMwalRenderer.ml +++ b/lexSemantics/ENIAMwalRenderer.ml @@ -340,7 +340,7 @@ let adv_connected_adjuncts_simp = [ adjunct [Tensor[Atom "advp"; Top]]; ] -let assing_pref_morfs = function +let assing_prep_morfs = function "po","postp" -> [ LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "dat"; Atom "m1"]); LCG(Tensor[Atom "adjp"; Top; Atom "postp"; Top])] @@ -349,3 +349,17 @@ let assing_pref_morfs = function | _,case -> [ LCG(Tensor[Atom "np"; Top; Atom case; Top; Top]); LCG(Tensor[Atom "adjp"; Top; Atom case; Top])] + +let prep_morfs = [ + LCG(Tensor[Atom "np"; Top; Atom "case"; Top; Top]); + LCG(Tensor[Atom "adjp"; Top; Atom "case"; Top]); + LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "dat"; Atom "m1"]); + LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "nom"; Atom "f"]); + LCG(Tensor[Atom "advp"; Top]); + LCG(Tensor[Atom "year"]); + LCG(Tensor[Atom "hour-minute"]); + LCG(Tensor[Atom "day-month"]); + LCG(Tensor[Atom "hour"]); + LCG(Tensor[Atom "day"]); + LCG(Tensor[Atom "date"]); + ] -- libgit2 0.22.2