ENIAMsemValence.ml 7.27 KB
(*
 *  ENIAMexec implements ENIAM processing stream
 *  Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
 *
 *  This library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

open ENIAMexecTypes
open ENIAM_LCGtypes
open ENIAM_LCGlexiconTypes
open Xstd

type pos = {role: linear_term; role_attr: linear_term; selprefs: linear_term;
  is_necessary: bool; morfs: StringSet.t}

let match_value v2 = function
    Val v -> if v = v2 then Val v else raise Not_found
  | _ -> failwith "match_value"

let rec apply_selector v2 = function
    (_,[]) -> failwith "apply_selector"
  | Negation,("NEGATION",v) :: l -> ("NEGATION",match_value v2 v) :: l
  | Aspect,("ASPECT",v) :: l -> ("ASPECT",match_value v2 v) :: l
  | Mood,("MOOD",v) :: l -> ("MOOD",match_value v2 v) :: l
  | sel,(attr,v) :: l -> (*print_endline ("apply_selector: " ^ ENIAMcategoriesPL.string_of_selector sel ^ " " ^ attr);*) (attr,v) :: (apply_selector v2 (sel,l))

let rec apply_selectors attrs = function
    [] -> attrs
  | (sel,Eq,[v]) :: l -> apply_selectors (apply_selector v (sel,attrs)) l
  | (sel,Neq,vals) :: l -> failwith "apply_selectors"
  | _ -> failwith "apply_selectors"

let rec get_arg_symbols_variant arg_symbols = function
    Ref i -> [arg_symbols.(i),Ref i]
  | Variant(e,l) ->
      let map = Xlist.fold l StringMap.empty (fun map (i,t) ->
        Xlist.fold (get_arg_symbols_variant arg_symbols t) map (fun map (arg_symbol,t) ->
          StringMap.add_inc map arg_symbol [i,t] (fun l -> (i,t) :: l))) in
      StringMap.fold map [] (fun found arg_symbol l -> (arg_symbol,Variant(e,l)) :: found)
  | t -> failwith ("get_arg_symbols_variant: " ^ ENIAM_LCGstringOf.linear_term 0 t)

let rec get_arg_symbols_tuple arg_symbols rev = function
    Dot -> rev
  | Tuple l -> Xlist.fold l rev (get_arg_symbols_tuple arg_symbols)
  | t -> (get_arg_symbols_variant arg_symbols t) :: rev

let rec match_arg_positions arg rev = function
    p :: positions ->
      let l = Xlist.fold arg [] (fun l (arg_symbol,t) ->
        if StringSet.mem p.morfs arg_symbol then t :: l else l) in
      (match l with
        [] -> match_arg_positions arg (p :: rev) positions
      | [t] ->
          let t = SetAttr("role",p.role,SetAttr("role_attr",p.role_attr,SetAttr("selprefs",p.selprefs,t))) in
          (t, rev @ positions) :: (match_arg_positions arg (p :: rev) positions)
      | _ -> failwith "match_arg_positions: ni")
  | [] -> [] (*failwith "match_arg_positions"*) (* FIXME: to nie musi być błąd *)

(* Jeśli ta funkcja zwróci pustą listę, oznacza to, że argumentów nie dało się dopasować do pozycji *)
let rec match_args_positions_rec positions = function
    arg :: args ->
      Xlist.fold (match_arg_positions arg [] positions) [] (fun found (arg_pos,positions) ->
        Xlist.fold (match_args_positions_rec positions args) found (fun found l -> (arg_pos :: l) :: found))
  | [] ->
      let b = Xlist.fold positions false (fun b p -> p.is_necessary || b) in
      if b then [] else [[]] (* FIXME: miejsce na wstawianie pro? *)

let match_args_positions args positions =
  Xlist.rev_map (match_args_positions_rec positions args) (function
      [] -> Dot
    | [t] -> t
    | l -> Tuple l)

let translate_position p =
  {role = Val p.ENIAMwalTypes.role;
   role_attr = Val p.ENIAMwalTypes.role_attr;
   selprefs = (match p.ENIAMwalTypes.sel_prefs with
      [] -> Dot
    | [s] -> Val s
    | l -> Tuple(Xlist.rev_map l (fun s -> Val s)));
   is_necessary = Xlist.fold p.ENIAMwalTypes.morfs true (fun b -> function LCG One -> false | _ -> b);
   morfs = Xlist.fold position.ENIAMwalTypes.morfs StringSet.empty (fun morfs morf ->
        StringSet.add morfs (string_of_morf morf))}

let rec assign_frames_rec tokens lex_sems tree arg_symbols visited = function
    Ref i ->
      if IntSet.mem visited i then Ref i,visited else
      let t,visited = assign_frames_rec tokens lex_sems tree arg_symbols (IntSet.add visited i) tree.(i) in
      tree.(i) <- t;
      Ref i,visited
  | Node t ->
      let args,visited = assign_frames_rec tokens lex_sems tree arg_symbols visited t.args in
      let t = {t with args=args} in
      let args = get_arg_symbols_tuple arg_symbols [] args in
      let s = ExtArray.get lex_sems t.id in
      let frames = Xlist.fold s.ENIAMlexSemanticsTypes.frames [] (fun frames (selectors,meanings,positions) ->
        print_endline ("selectors: " ^ ENIAMcategoriesPL.string_of_selectors selectors);
        try
          let attrs = apply_selectors t.attrs selectors in
          print_endline "passed";
          (attrs,meanings,Xlist.rev_map positions translate_position) :: frames
        with Not_found -> print_endline "rejected"; frames) in
      if frames = [] then Node t,visited else (* FIXME: tu powinien być rzucany wyjątek *)
      let e = ENIAM_LCGreductions.get_variant_label () in
      let l,_ = Xlist.fold frames ([],1) (fun (l,n) (attrs,meanings,positions) ->
        Xlist.fold meanings (l,n) (fun (l,n) (meaning,hipero,weight) ->
          Xlist.fold (match_args_positions args positions) (l,n) (fun (l,n) args ->
            (string_of_int n, Node{t with attrs=("meaning",Val meaning) :: t.attrs; args=args}) :: l,n+1))) in
      Variant(e,l),visited
  | Variant(e,l) ->
      let l,visited = Xlist.fold l ([],visited) (fun (l,visited) (i,t) ->
        let t,visited = assign_frames_rec tokens lex_sems tree arg_symbols visited t in
        (i,t) :: l, visited) in
      Variant(e,List.rev l),visited
  | Tuple l ->
      let l,visited = Xlist.fold l ([],visited) (fun (l,visited) t ->
        let t,visited = assign_frames_rec tokens lex_sems tree arg_symbols visited t in
        t :: l, visited) in
      Tuple(List.rev l),visited
  | Dot -> Dot,visited
  | t -> failwith ("assign_frames_rec: " ^ ENIAM_LCGstringOf.linear_term 0 t)

let string_of_arg_symbol = function
  | t -> failwith ("string_of_arg_symbol: " ^ ENIAM_LCGstringOf.linear_term 0 t)

let rec get_arg_symbols = function
    Node t -> string_of_arg_symbol t.arg_symbol
  | t -> failwith ("get_arg_symbols: " ^ ENIAM_LCGstringOf.linear_term 0 t)

let assign_frames tokens lex_sems tree =
  print_endline "assign_frames";
  let tree = Array.copy tree in
  let arg_symbols = Array.make (Array.length tree) "" in
  Int.iter 0 (Array.length tree - 1) (fun i ->
    arg_symbols.(i) <- get_arg_symbols tree.(i));
  let _ = assign_frames_rec tokens lex_sems tree arg_symbols IntSet.empty (Ref 0) in
  tree

let assign tokens lex_sems text =
  map_text Struct (fun mode -> function
      ENIAMSentence result ->
        if result.status <> Parsed then ENIAMSentence result else
        ENIAMSentence {result with dependency_tree6=assign_frames tokens lex_sems result.dependency_tree6}
    | t -> t) text