ENIAMsemValence.ml 17.7 KB
(*
 *  ENIAMexec implements ENIAM processing stream
 *  Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
 *
 *  This library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

open ENIAMexecTypes
open ENIAM_LCGtypes
open ENIAM_LCGlexiconTypes
open ENIAMlexSemanticsTypes
open Xstd

type pos = {role: linear_term; role_attr: linear_term; selprefs: linear_term; gf: ENIAMwalTypes.gf;
  cr: string list; ce: string list;
  is_necessary: bool; is_pro: bool; is_prong: bool; is_multi: bool; dir: string; morfs: StringSet.t}

let match_value v2 = function
    Val v -> if v = v2 then Val v else raise Not_found
  | _ -> failwith "match_value"

let match_neg_value vals = function
    Val v -> if Xlist.mem vals v then raise Not_found else Val v
  | _ -> failwith "match_neg_value"

let rec apply_selector v2 = function
    (sel,[]) -> failwith ("apply_selector: " ^ ENIAMcategoriesPL.string_of_selector sel)
  | Negation,("NEGATION",v) :: l -> ("NEGATION",match_value v2 v) :: l
  | Aspect,("ASPECT",v) :: l -> ("ASPECT",match_value v2 v) :: l
  | Mood,("MOOD",v) :: l -> ("MOOD",match_value v2 v) :: l
  | Nsyn,("NSYN",v) :: l -> ("NSYN",match_value v2 v) :: l
  | Nsem,("NSEM",v) :: l -> ("NSEM",match_value v2 v) :: l
  | Case,("CASE",v) :: l -> ("CASE",match_value v2 v) :: l
  | Mode,("MODE",v) :: l -> ("MODE",match_value v2 v) :: l
  | sel,(attr,v) :: l -> (*print_endline ("apply_selector: " ^ ENIAMcategoriesPL.string_of_selector sel ^ " " ^ attr);*) (attr,v) :: (apply_selector v2 (sel,l))

let rec apply_neg_selector vals = function
    (sel,[]) -> failwith ("apply_neg_selector: " ^ ENIAMcategoriesPL.string_of_selector sel)
  | Nsem,("NSEM",v) :: l -> ("NSEM",match_neg_value vals v) :: l
  | Case,("CASE",v) :: l -> ("CASE",match_neg_value vals v) :: l
  | sel,(attr,v) :: l -> (*print_endline ("apply_neg_selector: " ^ ENIAMcategoriesPL.string_of_selector sel ^ " " ^ attr);*) (attr,v) :: (apply_neg_selector vals (sel,l))

let rec apply_selectors attrs = function
    [] -> attrs
  | (sel,Eq,[v]) :: l -> apply_selectors (apply_selector v (sel,attrs)) l
  | (sel,Neq,vals) :: l -> apply_selectors (apply_neg_selector vals (sel,attrs)) l
  | _ -> failwith "apply_selectors"

module OrderedStringDir =
  struct
    type t = string * string
    let compare = compare
  end

module StringDirMap = Xmap.Make(OrderedStringDir)

let rec get_arg_symbols_variant arg_symbols = function
    Ref i ->
      let l,dir = arg_symbols.(i) in
      Xlist.map l (fun s -> (s,dir),Ref i)
  | Variant(e,l) ->
      let map = Xlist.fold l StringDirMap.empty (fun map (i,t) ->
        Xlist.fold (get_arg_symbols_variant arg_symbols t) map (fun map (arg_symbol,t) ->
          StringDirMap.add_inc map arg_symbol [i,t] (fun l -> (i,t) :: l))) in
      StringDirMap.fold map [] (fun found arg_symbol l -> (arg_symbol,Variant(e,l)) :: found)
  | t -> failwith ("get_arg_symbols_variant: " ^ ENIAM_LCGstringOf.linear_term 0 t)

let rec get_arg_symbols_tuple arg_symbols rev = function
    Dot -> rev
  | Tuple l -> Xlist.fold l rev (get_arg_symbols_tuple arg_symbols)
  | t -> (get_arg_symbols_variant arg_symbols t) :: rev

let string_of_argdir = function
    "forward" -> "/"
  | "backward" -> "\\"
  | "both" -> "|"
  | _ -> failwith "string_of_argdir"

let string_of_arg arg =
  String.concat ", " (Xlist.map arg (fun ((arg_symbol,dir),t) -> (string_of_argdir dir) ^ arg_symbol ^ ":" ^ ENIAM_LCGstringOf.linear_term 0 t))

let string_of_position p =
  (string_of_argdir p.dir) ^ String.concat "+" (StringSet.to_list p.morfs)

let rec match_arg_positions arg rev = function
    p :: positions ->
      (* Printf.printf "match_arg_positions: arg=%s rev=[%s] positions=%s :: [%s]\n%!" (string_of_arg arg) (String.concat "; " (Xlist.map rev string_of_position)) (string_of_position p) (String.concat "; " (Xlist.map positions string_of_position)); *)
      let l = Xlist.fold arg [] (fun l ((arg_symbol,dir),t) ->
        if StringSet.mem p.morfs arg_symbol && p.dir = dir then t :: l else l) in
      (match l with
        [] -> (*print_endline "match_arg_positions: not matched";*) match_arg_positions arg (p :: rev) positions
      | [t] ->
          let t = if p.gf = ENIAMwalTypes.SUBJ || p.gf = ENIAMwalTypes.OBJ || p.gf = ENIAMwalTypes.ARG then
            SetAttr("role",p.role,SetAttr("role-attr",p.role_attr,SetAttr("selprefs",p.selprefs,t)))
          else if p.gf = ENIAMwalTypes.ADJUNCT then t else failwith "match_arg_positions: ni 2" in
          let t = SetAttr("gf",Val (ENIAMwalStringOf.gf p.gf),t) in
          let t = Xlist.fold p.cr t (fun t cr -> SetAttr("controller",Val cr,t)) in
          let t = Xlist.fold p.ce t (fun t ce -> SetAttr("controllee",Val ce,t)) in
          if p.is_multi then (t, rev @ (p :: positions)) :: (match_arg_positions arg (p :: rev) positions)
          else (t, rev @ positions) :: (match_arg_positions arg (p :: rev) positions)
      | _ -> failwith "match_arg_positions: ni")
  | [] -> (*Printf.printf "match_arg_positions: arg=%s rev=[%s] positions=[]\n%!" (string_of_arg arg) (String.concat "; " (Xlist.map rev string_of_position));*) []

(* Jeśli ta funkcja zwróci pustą listę, oznacza to, że argumentów nie dało się dopasować do pozycji *)
let rec match_args_positions_rec positions = function
    arg :: args ->
      (* Printf.printf "match_args_positions_rec: args=%s :: [%s] positions=[%s]\n%!" (string_of_arg arg) (String.concat "; " (Xlist.map args string_of_arg)) (String.concat "; " (Xlist.map positions string_of_position)); *)
      Xlist.fold (match_arg_positions arg [] positions) [] (fun found (arg_pos,positions) ->
        Xlist.fold (match_args_positions_rec positions args) found (fun found l -> (arg_pos :: l) :: found))
  | [] ->
      (* Printf.printf "match_args_positions_rec: args=[] positions=[%s]\n%!" (String.concat "; " (Xlist.map positions string_of_position)); *)
      let b = Xlist.fold positions false (fun b p -> p.is_necessary || b) in
      (* if b then print_endline "match_args_positions: not matched"; *)
      if b then [] else
        [Xlist.fold positions [] (fun found p ->
          if not p.is_pro then found else
          let attrs = ["role",p.role; "role-attr",p.role_attr; "selprefs",p.selprefs; "gf",Val (ENIAMwalStringOf.gf p.gf)] in
          let attrs = if p.is_prong then attrs else attrs in (* FIXME: dodać number, gender *)
          let attrs = Xlist.fold p.cr attrs (fun attrs cr -> ("controller",Val cr) :: attrs) in
          let attrs = Xlist.fold p.ce attrs (fun attrs ce -> ("controllee",Val ce) :: attrs) in
          Node{ENIAM_LCGrenderer.empty_node with lemma="pro"; pos="pro"; attrs=attrs} :: found)]

(* FIXME: opcjonalność podrzędników argumentów zleksykalizowanych *)

(* Jeśli ta funkcja zwróci pustą listę, oznacza to, że argumentów nie dało się dopasować do pozycji *)
let match_args_positions args positions =
  (* Printf.printf "match_args_positions: args=[%s] positions=[%s]\n%!" (String.concat "; " (Xlist.map args string_of_arg)) (String.concat "; " (Xlist.map positions string_of_position)); *)
  Xlist.rev_map (match_args_positions_rec positions args) (function
      [] -> Dot
    | [t] -> t
    | l -> Tuple l)

let translate_selprefs = function
    ENIAMwalTypes.SynsetId _ -> failwith "translate_selprefs"
  | ENIAMwalTypes.Predef _ -> failwith "translate_selprefs"
  | ENIAMwalTypes.SynsetName s -> s
  | ENIAMwalTypes.RelationRole _ -> "ALL"

let string_of_internal_morf = function
    Atom s -> s
  | AVar s -> s
  | Top -> "T"
  | t -> failwith ("string_of_internal_morf: " ^ ENIAM_LCGstringOf.internal_grammar_symbol_prime t)


let string_of_morf = function
    ENIAMwalTypes.LCG Tensor l -> String.concat "*" (Xlist.map l string_of_internal_morf)
  | ENIAMwalTypes.LCG t -> failwith ("string_of_morf: " ^ ENIAM_LCGstringOf.grammar_symbol_prime t)
  | _ -> failwith "string_of_morf"

let rec string_of_arg_symbol = function
    Dot -> ""
  | Val s -> s
  | Tuple l -> String.concat "*" (Xlist.map l string_of_arg_symbol)
  | t -> failwith ("string_of_arg_symbol: " ^ ENIAM_LCGstringOf.linear_term 0 t)

let translate_dir = function
    ENIAMwalTypes.Both_ -> "both"
  | ENIAMwalTypes.Forward_ -> "forward"
  | ENIAMwalTypes.Backward_ -> "backward"

let translate_position id p =
  {role = Val p.ENIAMwalTypes.role;
   role_attr = Val p.ENIAMwalTypes.role_attr;
   selprefs = (match Xlist.map p.ENIAMwalTypes.sel_prefs translate_selprefs with
      [] -> Dot
    | [s] -> Val s
    | l -> Tuple(Xlist.rev_map l (fun s -> Val s)));
   gf=p.ENIAMwalTypes.gf;
   cr=Xlist.map p.ENIAMwalTypes.cr (fun cr -> id ^ "-" ^ cr);
   ce=Xlist.map p.ENIAMwalTypes.ce (fun ce -> id ^ "-" ^ ce);
   is_necessary = p.ENIAMwalTypes.is_necessary = ENIAMwalTypes.Req(*Xlist.fold p.ENIAMwalTypes.morfs true (fun b -> function ENIAMwalTypes.LCG One -> false | _ -> b)*);
   is_pro = p.ENIAMwalTypes.is_necessary = ENIAMwalTypes.Pro || p.ENIAMwalTypes.is_necessary = ENIAMwalTypes.ProNG;
   is_prong = p.ENIAMwalTypes.is_necessary = ENIAMwalTypes.ProNG;
   is_multi = p.ENIAMwalTypes.is_necessary = ENIAMwalTypes.Multi;
   dir= translate_dir p.ENIAMwalTypes.dir;
   morfs = Xlist.fold p.ENIAMwalTypes.morfs StringSet.empty (fun morfs morf ->
        if morf = ENIAMwalTypes.LCG One then (Printf.printf "translate_position: One%!"; morfs) else
        StringSet.add morfs (string_of_morf morf))}

let get_phrase_symbol = function
    Tuple[Val "lex";Val "się";Val "qub"] -> "lex-się-qub"
  | Tuple(Val s :: _) -> s
  | Val s -> s
  (* | Dot -> "dot" *)
  | t -> failwith ("get_phrase_symbol: " ^ ENIAM_LCGstringOf.linear_term 0 t)

(* let extend_frame symbol = function *)

exception NoFrame of string * string

let rec assign_frames_rec tokens lex_sems tree arg_symbols visited = function
    Ref i ->
      if IntSet.mem visited i then Ref i,visited else
      let t,visited = assign_frames_rec tokens lex_sems tree arg_symbols (IntSet.add visited i) tree.(i) in
      tree.(i) <- t;
      Ref i,visited
  | Node t ->
      let args,visited = assign_frames_rec tokens lex_sems tree arg_symbols visited t.args in
      let t = {t with args=args} in
      if t.symbol = Dot then Node t,visited else
      let args = get_arg_symbols_tuple arg_symbols [] args in
      let s = ExtArray.get lex_sems t.id in
      let symbol = get_phrase_symbol t.symbol in
      let frames = Xlist.fold s.ENIAMlexSemanticsTypes.frames [] (fun frames frame ->
        (* print_endline ("selectors: " ^ ENIAMcategoriesPL.string_of_selectors frame.selectors); *)
        try
          let attrs = apply_selectors t.attrs frame.selectors in
          let frame = ENIAMsemLexicon.extend_frame symbol frame in
          (* print_endline "passed"; *)
          (attrs,frame,Xlist.rev_map frame.positions (translate_position (string_of_int t.id))) :: frames
        with Not_found -> (*print_endline "rejected";*) frames) in
      if frames = [] then failwith "assign_frames_rec: no frame" else
      let e = ENIAM_LCGreductions.get_variant_label () in
      let l,_ = Xlist.fold frames ([],1) (fun (l,n) (attrs,frame,positions) ->
        (* Printf.printf "assign_frames_rec: lemma=%s args=[%s] positions=[%s]\n%!" t.lemma (String.concat "; " (Xlist.map args string_of_arg)) (String.concat "; " (Xlist.map positions string_of_position)); *)
        if frame.meanings = [] then failwith ("assign_frames_rec: no meanings '" ^ t.lemma ^ "'") else
        Xlist.fold (match_args_positions args positions) (l,n) (fun (l,n) args ->
          Xlist.fold frame.meanings (l,n) (fun (l,n) (meaning,hipero,weight) ->
            (string_of_int n, Node{t with attrs=
              ("meaning",Val meaning) ::
              ("hipero",ENIAM_LCGrules.make_variant (Xlist.map hipero (fun (h,n) -> Tuple[Val h;Val(string_of_int n)]))) ::
              ("arole",Val frame.arole) ::
              ("arole-attr",Val frame.arole_attr) ::
              ("arev",Val (if frame.arev then "+" else "-")) ::
              ("sem-args",if frame.sem_args = [] then Dot else ENIAM_LCGrules.make_variant (Xlist.map frame.sem_args (fun s -> Val s))) ::
              ("fopinion",Val (ENIAMwalStringOf.opinion frame.fopinion)) ::
              ("sopinion",Val (ENIAMwalStringOf.opinion frame.sopinion)) :: t.attrs; args=args}) ::
              l,n+1))) in
      if l = [] then (print_endline ("assign_frames_rec: no frame assingment found for " ^ t.lemma ^ " " ^ ENIAM_LCGstringOf.linear_term 0 t.symbol);raise (NoFrame(t.lemma,ENIAM_LCGstringOf.linear_term 0 t.symbol))) else
      Variant(e,l),visited
  | Variant(e,l) ->
      let a = ref "" in
      let b = ref "" in
      let l,visited = Xlist.fold l ([],visited) (fun (l,visited) (i,t) ->
        try
          let t,visited = assign_frames_rec tokens lex_sems tree arg_symbols visited t in
          (i,t) :: l, visited
        with NoFrame(x,y) -> a:=x; b:=y; l, visited) in
      if l = [] then raise (NoFrame(!a,!b)) else
      Variant(e,List.rev l),visited
  | Tuple l ->
      let l,visited = Xlist.fold l ([],visited) (fun (l,visited) t ->
        let t,visited = assign_frames_rec tokens lex_sems tree arg_symbols visited t in
        t :: l, visited) in
      Tuple(List.rev l),visited
  | Dot -> Dot,visited
  | t -> failwith ("assign_frames_rec: " ^ ENIAM_LCGstringOf.linear_term 0 t)

let rec get_arg_symbols = function
    Node{arg_symbol=Tuple([Val "cp"; Val "T"; Val "T"]);
         symbol=Tuple([Val "cp"; ctype; comp]); arg_dir=dir} ->
      [string_of_arg_symbol (Tuple([Val "cp"; Val "T"; Val "T"]));
       string_of_arg_symbol (Tuple([Val "cp"; ctype; comp]))],dir
  | Node{arg_symbol=Tuple([Val "ncp"; Val "T"; Val arg_case; Val "T"; Val "T"; Val "T"; Val "T"]);
         symbol=Tuple([Val "ncp"; number; case; gender; person; ctype; comp]); arg_dir=dir} ->
      [string_of_arg_symbol (Tuple([Val "ncp"; Val "T"; Val arg_case; Val "T"; Val "T"; Val "T"; Val "T"]));
       string_of_arg_symbol (Tuple([Val "ncp"; Val "T"; Val arg_case; Val "T"; Val "T"; ctype; comp]))],dir
  | Node{arg_symbol=Tuple([Val "prepncp"; Val arg_prep; Val arg_case; Val "T"; Val "T"]);
         symbol=Tuple([Val "prepncp"; prep; case; ctype; comp]); arg_dir=dir} ->
      [string_of_arg_symbol (Tuple([Val "prepncp"; Val arg_prep; Val arg_case; Val "T"; Val "T"]));
       string_of_arg_symbol (Tuple([Val "prepncp"; prep; case; ctype; comp]))],dir
  | Node t -> [string_of_arg_symbol t.arg_symbol], t.arg_dir
  | t -> failwith ("get_arg_symbols: " ^ ENIAM_LCGstringOf.linear_term 0 t)

let assign_frames tokens lex_sems tree =
  (* print_endline "assign_frames"; *)
  let tree = Array.copy tree in
  let arg_symbols = Array.make (Array.length tree) ([],"") in
  Int.iter 0 (Array.length tree - 1) (fun i ->
    arg_symbols.(i) <- get_arg_symbols tree.(i));
  let _ = assign_frames_rec tokens lex_sems tree arg_symbols IntSet.empty (Ref 0) in
  tree

let rec cut_nodes result_tree = function
  | Node t ->
      let i = ExtArray.add result_tree (Node t) in
      Ref i
  | Variant(e,l) ->
      let l = Xlist.rev_map l (fun (i,t) -> i, cut_nodes result_tree t) in
      Variant(e,List.rev l)
  | Tuple l ->
      let l = Xlist.rev_map l (cut_nodes result_tree) in
      Tuple(List.rev l)
  | Dot -> Dot
  | t -> failwith ("cut_nodes: " ^ ENIAM_LCGstringOf.linear_term 0 t)

let rec reduce_set_attr attr v = function
    Node t -> Node{t with attrs=(attr,v) :: t.attrs}
  | Variant(e,l) ->
      Variant(e,List.rev (Xlist.rev_map l (fun (i,t) ->
        i, reduce_set_attr attr v t)))
  | t -> failwith ("reduce_set_attr: " ^ ENIAM_LCGstringOf.linear_term 0 t)

let rec reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree = function
    Ref i ->
      if mid_tree.(i) <> Dot then mid_tree.(i) else
      let t = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree orig_tree.(i) in
      mid_tree.(i) <- t;
      t
  | Node t ->
      let args = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree t.args in
      (* print_endline ("reduce_tree_rec 1: " ^ ENIAM_LCGstringOf.linear_term 0 args); *)
      let args = cut_nodes result_tree args in
      (* print_endline ("reduce_tree_rec 2: " ^ ENIAM_LCGstringOf.linear_term 0 args); *)
      let id =
        if t.id = 0 then
          let id = ExtArray.add tokens {ENIAMtokenizerTypes.empty_token_env with ENIAMtokenizerTypes.token=ENIAMtokenizerTypes.Lemma("pro","pro",[[]])} in
          let _ = ExtArray.add lex_sems empty_lex_sem in
          id
        else t.id in
      Node{t with args=args; id=id}
  | Variant(e,l) ->
      let l = Xlist.rev_map l (fun (i,t) -> i, reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree t) in
      Variant(e,List.rev l)
  | Tuple l ->
      let l = Xlist.rev_map l (reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree) in
      Tuple(List.rev l)
  | Dot -> Dot
  | SetAttr(attr,v,t) ->
      let t = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree t in
      reduce_set_attr attr v t
  | t -> failwith ("reduce_tree_rec: " ^ ENIAM_LCGstringOf.linear_term 0 t)

let reduce_tree tokens lex_sems orig_tree =
  (* print_endline "reduce_tree"; *)
  let mid_tree = Array.make (Array.length orig_tree) Dot in
  let result_tree = ExtArray.make (Array.length orig_tree) Dot in
  let _ = ExtArray.add result_tree Dot in
  let t = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree orig_tree.(0) in
  ExtArray.set result_tree 0 t;
  ExtArray.to_array result_tree