(* * ENIAM: Categorial Syntactic-Semantic Parser for Polish * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. *) open WalTypes open LCGtypes open Printf open Xstd let rec list_assoc2 x = function (s,a,b) :: l -> if x = s then a,b else list_assoc2 x l | [] -> raise Not_found let meaning_weight = -1. let prepare_senses lemma meanings senses = match meanings,senses with [],[] -> [lemma, ["ALL"],0.] (* FIXME *) | [],_ -> Xlist.map senses (fun (sense,hipero,weight) -> if hipero = ["0"] then sense,["0"],weight else sense,(if hipero = [] then ["ALL"] else hipero),weight) | _,[] -> Xlist.map meanings (fun meaning -> meaning, ["ALL"],meaning_weight) | _,_ -> Xlist.map meanings (fun meaning -> let hipero,weight = try list_assoc2 meaning senses with Not_found -> [],meaning_weight in if hipero = ["0"] then meaning,["0"],weight else meaning,(if hipero = [] then ["ALL"] else hipero),weight) let extract_meaning lemma = function DefaultAtrs(m,r,o,neg,p,a) -> m,lemma,DefaultAtrs([],r,o,neg,p,a) | EmptyAtrs m -> m,lemma,EmptyAtrs [] | NounAtrs(m,nsyn,s(*,typ*)) -> m,lemma,NounAtrs([],nsyn,s(*,typ*)) | AdjAtrs(m,c,adjsyn(*,adjsem,typ*)) -> m,lemma,AdjAtrs([],c,adjsyn(*,adjsem,typ*)) | PersAtrs(m,le,neg,mo,t,au,a) -> m,le,PersAtrs([],le,neg,mo,t,au,a) | GerAtrs(m,le,neg,a) -> m,le,GerAtrs([],le,neg,a) | NonPersAtrs(m,le,role,role_attr,neg,a) -> m,le,NonPersAtrs([],le,role,role_attr,neg,a) | _ -> failwith "extract_meaning" let extract_roles = function NonPersAtrs(m,le,role,role_attr,neg,a) -> role,role_attr | _ -> failwith "extract_roles" let get_lemma = function PreTypes.Lemma(lemma,cat,_) -> lemma,cat | PreTypes.Interp lemma -> lemma,"interp" | _ -> "","" let prepare_valence paths_array = let valence = Array.map (fun d -> let lemma,cat = get_lemma d.PreTypes.token in let lemma = if lemma = "<ors>" || lemma = ":s" || lemma = "„s" then "pro-komunikować" else lemma in if lemma = "" then [] else let prep_valence = if cat = "prep" then (* (0,lemma,StringSet.empty,0.,"NOSEM","",Frame(EmptyAtrs[],[])) :: *) match d.PreTypes.semantics with PreTypes.Normal -> [] | PreTypes.PrepSemantics l -> Xlist.rev_map l (fun (lrole,lrole_attr,hipero,sel_prefs) -> 0,lemma,hipero,0.,lrole,lrole_attr,Frame(EmptyAtrs[],[])) | _ -> failwith "prepare_valence" else [] in let valence = if d.PreTypes.valence = [] then [0,Frame(EmptyAtrs[],[])] else d.PreTypes.valence in let lrole,lrole_attr = d.PreTypes.lroles in prep_valence @ List.flatten (Xlist.map valence (function fnum,Frame(attrs,schema) -> let meanings,lemma,attrs = extract_meaning lemma attrs in let lrole,lrole_attr = if cat = "pact" || cat = "ppas" then extract_roles attrs else if cat = "pcon" then "Con","" else if cat = "pant" then "Ant","" else d.PreTypes.lroles in Xlist.map (prepare_senses lemma meanings d.PreTypes.senses) (fun (meaning,hipero,weight) -> let hipero = if cat = "conj" then ["0"] else hipero in fnum,meaning,StringSet.of_list hipero,weight,lrole,lrole_attr, Frame(attrs,Xlist.map schema (fun s -> (* let s = if s.sel_prefs=[] then (print_endline ("prepare_valence empty sel_prefs: " ^ lemma ^ " " ^ cat); {s with sel_prefs=["ALL"]}) else s in *) if s.role="" && s.gf <> ADJUNCT && s.gf <> NOSEM then ( printf "%d: %s\n%!" fnum (WalStringOf.frame lemma (Frame(attrs,schema))); failwith ("prepare_valence empty role: " ^ lemma ^ " " ^ cat)) else {s with morfs=List.sort compare s.morfs}))) | fnum,(LexFrame _ as frame) -> [fnum,"lex",StringSet.empty,0.,lrole,lrole_attr,frame] | fnum,(ComprepFrame _ as frame) -> [fnum,"comprep",StringSet.empty,0.,lrole,lrole_attr,frame]))) paths_array in valence (*let create_pro_frames t = [0,t.pred,StringSet.singleton "0",0.,"","",Frame(EmptyAtrs[],[])]*) let get_fnum t = let x = try Xlist.assoc t.attrs "FNUM" with Not_found -> Val "0" in (match x with Val s -> (try int_of_string s with _ -> failwith "get_fnum 1") | _ -> failwith "get_fnum 2") let select_frames l t = (* printf "a1 pred=%s\n" t.pred; *) let fnum = get_fnum t in let l = Xlist.fold l [] (fun l (n,meaning,hipero,weight,lrole,lrole_attr,frame) -> if n = fnum then (meaning,hipero,weight,lrole,lrole_attr,frame) :: l else l) in (* printf "a2 pred=%s\n" t.pred; *) match l with [] -> "",[] | [_,_,_,_,_,LexFrame _] -> "",[] | [_,_,_,_,_,ComprepFrame _] -> "",[] | _ -> LCGreductions.get_variant_label (), fst (Xlist.fold l ([],1) (fun (l,i) t -> (string_of_int i, t) :: l, i+1)) let rec get_arg_refs found = function Variant(_,l) -> Xlist.fold l found (fun found (_,t) -> get_arg_refs found t) | Ref i -> i :: found | t -> failwith ("get_arg_refs: " ^ LCGstringOf.linear_term 0 t) let rec match_position = function a :: la, b :: lb -> if a = b then match_position (la,b :: lb) else if a > b then match_position (a :: la,lb) else false | [],_ -> true | _,[] -> false let mark_sem_morfs morfs = Xlist.map morfs (function | Phrase(PrepNP(_,prep,c)) -> Phrase(PrepNP(Sem,prep,c)) | Phrase(PrepAdjP(_,prep,c)) -> Phrase(PrepAdjP(Sem,prep,c)) | Phrase(PrepNumP(_,prep,c)) -> Phrase(PrepNumP(Sem,prep,c)) | Phrase(ComprepNP(_,prep)) -> Phrase(ComprepNP(Sem,prep)) | Phrase(ComparNP(_,prep,c)) -> Phrase(ComparNP(Sem,prep,c)) | Phrase(ComparPP(_,prep)) -> Phrase(ComparPP(Sem,prep)) | Phrase(PrepNCP(_,prep,c,ct,co)) -> Phrase(PrepNCP(Sem,prep,c,ct,co)) | t -> t) let mark_nosem_morf = function Phrase(PrepNP(_,prep,c)) -> Phrase(PrepNP(NoSem,prep,c)) | Phrase(PrepAdjP(_,prep,c)) -> Phrase(PrepAdjP(NoSem,prep,c)) | Phrase(PrepNumP(_,prep,c)) -> Phrase(PrepNumP(NoSem,prep,c)) | Phrase(ComprepNP(_,prep)) -> Phrase(ComprepNP(NoSem,prep)) | Phrase(ComparNP(_,prep,c)) -> Phrase(ComparNP(NoSem,prep,c)) | Phrase(ComparPP(_,prep)) -> Phrase(ComparPP(NoSem,prep)) | Phrase(PrepNCP(_,prep,c,ct,co)) -> Phrase(PrepNCP(NoSem,prep,c,ct,co)) | t -> failwith "mark_nosem_morf" let rec is_nosem_morf = function Phrase(PrepNP(NoSem,prep,c)) -> true | Phrase(PrepAdjP(NoSem,prep,c)) -> true | Phrase(PrepNumP(NoSem,prep,c)) -> true | Phrase(ComprepNP(NoSem,prep)) -> true | Phrase(ComparNP(NoSem,prep,c)) -> true | Phrase(ComparPP(NoSem,prep)) -> true | Phrase(PrepNCP(NoSem,prep,c,ct,co)) -> true | _ -> false let rec is_sem_morf = function Phrase(PrepNP(Sem,prep,c)) -> true | Phrase(PrepAdjP(Sem,prep,c)) -> true | Phrase(PrepNumP(Sem,prep,c)) -> true | Phrase(ComprepNP(Sem,prep)) -> true | Phrase(ComparNP(Sem,prep,c)) -> true | Phrase(ComparPP(Sem,prep)) -> true | Phrase(PrepNCP(Sem,prep,c,ct,co)) -> true | _ -> false let rec exclude_sem_morfs = function [] -> [] | morf :: morfs -> if is_sem_morf morf then exclude_sem_morfs morfs else morf :: exclude_sem_morfs morfs (* UWAGA: dopasowywane ramy są preselekcjonowane, więc wszystkie argumenty muszą się maczować *) let match_args_pos modifications nodes e i schema t = (* printf "match_args_pos\n"; *) (* if schema = [] then schema else *) let refs = get_arg_refs [] t in let gfl,morfs = Xlist.fold refs ([],[]) (fun (gfl,morfs) i -> nodes.(i).agf :: gfl, nodes.(i).amorf :: morfs) in if gfl = [] then failwith "match_args_pos: empty gfl" else let gf = Xlist.fold (List.tl gfl) (List.hd gfl) (fun gf gf2 -> if gf = gf2 then gf else failwith "match_args_pos 2") in (* if gf = NOSEM || gf = NOGF then schema else if gf = CORE then schema else (* FIXME: semantyka dla core *)*) let morfs = exclude_sem_morfs morfs in let schema,selected = if morfs = [] then schema,[] else let morfs = List.sort compare morfs in (* printf "gf=%s morfs=%s\n%!" (WalStringOf.gf gf) (String.concat ";" (Xlist.map morfs WalStringOf.morf)); *) Xlist.fold schema ([],[]) (fun (schema,selected) pos -> (* printf "pos.gf=%s pos.morfs=%s\n%!" (WalStringOf.gf pos.gf) (String.concat ";" (Xlist.map pos.morfs WalStringOf.morf)); *) if gf = pos.gf || (gf = ADJUNCT && pos.gf=ARG) then if match_position (morfs,(*mark_sem_morfs*) pos.morfs) then schema, pos :: selected else pos :: schema, selected else pos :: schema, selected) in (match selected with [] -> (*if gf = ARG then failwith "match_args_pos 3" else*) Xlist.iter refs (fun r -> modifications.(r) <- StringMap.add modifications.(r) (e ^ i) LCGrenderer.empty_schema_field(*{gf=ADJUNCT; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; dir=Both; morfs=[]}*) ) (* FIXME: przy kilku pasujących pozycjach wybieram pierwszą a nich, do poprawienia przy okazji porządków z walencją np walencja leksemu "godzina": 1: : : common: time: Poss,T|{null;np(gen);nump(gen)}+Arg,T|{null;np(gen);nump(gen)}; 1: : : common: time: Temp,T|{null;np(gen);nump(gen)}]; *) | pos :: _ -> Xlist.iter refs (fun r -> (* FIXME: gdzieś tu trzeba wstawić uzupełnianie brakujących ról dla argumentów i adjunctów *) (* FIXME: tu pewnie trzeba będzie wstawić rolę dla adjunctów, które pojawią się w wyniku niespełniania SEL-PREFS *) modifications.(r) <- StringMap.add modifications.(r) (e ^ i) pos) (*| _ -> failwith "match_args_pos 4"*)); schema let rec match_args_tuple modifications nodes e i schema = function Tuple l -> Xlist.fold l schema (fun schema t -> match_args_tuple modifications nodes e i schema t) | t -> match_args_pos modifications nodes e i schema t let match_args modifications nodes e i t = function Frame(_,schema) -> ignore (match_args_tuple modifications nodes e i schema t.args) | LexFrame _ -> failwith "match_args" | ComprepFrame _ -> failwith "match_args" let rec assign_frames_and_senses_rec modifications valence nodes t = (* printf "pred=%s id=%d\n" t.pred t.id; *) if t.id = 0 then failwith ("assign_frames_and_senses_rec: t.id=0 pred=" ^ t.pred) else let e,node_valence = select_frames ((*if t.id >= Array.length valence then create_pro_frames t else*) valence.(t.id)) t in let node_valence = if node_valence <> [] then node_valence else ["1",(t.pred,StringSet.empty,0.,"","",Frame(EmptyAtrs[],[]))] in let l = Xlist.map node_valence (fun (i,(meaning,hipero,weight,lrole,lrole_attr,frame)) -> let t = if lrole = "" then t else {t with arole=lrole;arole_attr=lrole_attr} in (* FIXME: pomijam to, że role dla rzeczowników dotyczą tylko inst *) (* let t = if lrole = "NOSEM" then {t with amorf=mark_nosem_morf t.amorf} else t in *) if t.args <> Dot then match_args modifications nodes e i t frame; (* printf "meaning=%s\n" meaning; *) i,Node{t with meaning=meaning; hipero=hipero; meaning_weight=weight}) in Variant(e,l) let rec is_nosem_morfs morfs = let sem = Xlist.fold morfs false (fun b m -> b || is_sem_morf m) in let nosem = Xlist.fold morfs false (fun b m -> b || is_nosem_morf m) in nosem && not sem (* FIXME: pewne uproszczenie, ale liczę, że nie ma wielu ramek z xp koordynowanym z prep *) let has_tuple = function Tuple _ -> true | _ -> false let rec apply_modifications_rec (*paths_array*) pos = function Variant(e,l) -> Variant(e,List.rev (Xlist.fold l [] (fun l (i,t) -> let t = apply_modifications_rec (*paths_array*) pos t in (*if t = Dot then l else*) (i, (*apply_modifications_rec (*paths_array*) pos*) t) :: l))) | Node t -> if is_sem_morf t.amorf then Node t else (* FIXME: czy to jest poprawne? *) let t = if is_nosem_morf t.amorf then {t with agf=ARG} else t in Node{t with position=pos} | _ -> failwith "apply_modifications_rec" (* let empty_pos = {gf=ADJUNCT; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; dir=Both; morfs=[]} (* FIXME: jaka GF? *) *) let apply_modifications (*paths_array*) modifications nodes references = Int.iter 1 (Array.length references - 1) (fun r -> (* if StringMap.is_empty modifications.(r) then failwith ("apply_modifications: " ^ nodes.(r).pred) else *) references.(r) <- Choice(StringMap.map modifications.(r) (fun pos -> apply_modifications_rec (*paths_array*) pos references.(r)))) let rec extract_nosem rev = function [] -> List.rev rev, false | ("NOSEM",Val "+") :: l -> (List.rev rev) @ l, true | x :: l -> extract_nosem (x :: rev) l let get_nodes = function Node t -> let attrs,b = extract_nosem [] t.attrs in let t = if t.pred = "<query1>" || t.pred = "<query2>" || t.pred = "<query3>" || t.pred = "<query4>" || t.pred = "<query5>" || t.pred = "<query6>" then {t with agf=CORE} else t in if t.agf = NOGF then failwith ("get_nodes agf=NOGF: " ^ t.pred) else if b then {t with amorf=mark_nosem_morf t.amorf; attrs=attrs} else t | _ -> failwith "get_nodes" let rec propagate_nosem_selprefs modifications ei = function Choice choice -> Choice(StringMap.map choice (propagate_nosem_selprefs modifications "")) | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, propagate_nosem_selprefs modifications (e ^ i) t)) | Node t -> if (t.cat = "prep" && t.arole = "NOSEM") || t.cat = "num" then let refs = IntSet.of_list (get_arg_refs [] t.args) in IntSet.iter refs (fun r -> modifications.(r) <- StringMap.add_inc modifications.(r) ei t.position.WalTypes.sel_prefs (fun l -> if l = t.position.WalTypes.sel_prefs then l else failwith ("propagate_nosem_selprefs 1: [" ^ String.concat ";" l ^ "] [" ^ String.concat ";" t.position.WalTypes.sel_prefs ^ "]"))); Node{t with position= {t.position with WalTypes.sel_prefs = []}} else Node t | _ -> failwith "propagate_nosem_selprefs 2" let rec apply_modifications2_rec mods = function Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, apply_modifications2_rec mods t)) | Node t -> if t.position.WalTypes.sel_prefs <> [] then failwith "apply_modifications2_rec" else Node{t with position={t.position with WalTypes.sel_prefs=mods}} | _ -> failwith "apply_modifications2_rec" let apply_modifications2 modifications references = Int.iter 1 (Array.length references - 1) (fun r -> if not (StringMap.is_empty modifications.(r)) then match references.(r) with Choice choice -> references.(r) <- Choice(StringMap.mapi choice (fun ei t -> try apply_modifications2_rec (StringMap.find modifications.(r) ei) t with Not_found -> t)) | _ -> failwith "apply_modifications2") let assign_frames_and_senses paths_array references = let modifications = Array.make (Array.length references) StringMap.empty in let valence = prepare_valence paths_array in let nodes = Array.map get_nodes references in let references = Array.map (assign_frames_and_senses_rec modifications valence nodes) nodes in apply_modifications (*paths_array*) modifications nodes references; (* let modifications = Array.make (Array.length references) StringMap.empty in Int.iter 0 (Array.length references - 1) (fun r -> references.(r) <- propagate_nosem_selprefs modifications "" references.(r)); (* FIXME: propagowanie preferencji selekcyjnych więcej niż jeden poziom w głąb nie działa *) apply_modifications2 modifications references; Int.iter 0 (Array.length references - 1) (fun r -> references.(r) <- propagate_nosem_selprefs modifications "" references.(r)); apply_modifications2 modifications references; Int.iter 0 (Array.length references - 1) (fun r -> references.(r) <- propagate_nosem_selprefs modifications "" references.(r)); apply_modifications2 modifications references; *) references