ENIAM_EdgeScore.ml 4.97 KB
open Xstd
open ENIAM_LCGtypes
open Yojson

module MST_Model : sig
  type mst_model = {
    typeAlphabet: int StringMap.t;
    dataAlphabet: int StringMap.t;
    parameters: float array}
  val read_model: string -> mst_model
  val empty: mst_model
  exception MalformedModelJson
end
= struct
  type mst_model = {
    typeAlphabet: int StringMap.t;
    dataAlphabet: int StringMap.t;
    parameters: float array}

  let empty = {typeAlphabet = StringMap.empty;
               dataAlphabet = StringMap.empty;
               parameters = Array.make 0 0.0}
  exception MalformedModelJson

  let construct_data_alphabet keys =
    let counter = ref 0 in
    let map = ref StringMap.empty in
    let length = Array.length keys in
    for i = 0 to length -1 do
      map := StringMap.add !map keys.(i) !counter;
      counter := !counter + 1;
    done;
    !map

  let construct_type_alphabet = construct_data_alphabet

  let read_model fname =
    let data = Basic.from_file fname in
    let open Yojson.Basic.Util in
    let unwrapList = function
        `List l -> l
      | _ -> raise MalformedModelJson in
    let dataA = data |> member "dataAlphabet"  |> unwrapList |> filter_string in
    let typeA = data |> member "typeAlphabet"  |> unwrapList |> filter_string in
    let params = data |> member "parameters" |> unwrapList |> filter_float in
    {typeAlphabet = Array.of_list typeA |> construct_type_alphabet;
     dataAlphabet = Array.of_list dataA |> construct_data_alphabet;
     parameters = Array.of_list params}
end
open MST_Model

let model = ref MST_Model.empty

let initialize () =
  model := MST_Model.read_model "dep.model.json";
  ()

exception UnsupportedLinearTerm of linear_term
exception EmptyVariant

let add_feature str (fv: IntSet.t) =
  if StringMap.mem !model.dataAlphabet str then
    IntSet.add fv (StringMap.find !model.dataAlphabet str)
  else
    fv

let score_fv (fv:IntSet.t) =
  IntSet.fold fv 0.0 (fun score i -> score +. !model.parameters.(i))

let apply_features features fv =
  List.fold_left (|>) fv features

let add_linear_features f_type (obs: string array) first second distStr fv =
  fv

let add_two_obs_features prefix item1F1 item1F2 item2F1 item2F2 distStr fv =
  let add_diststr str = [str; str^"*"^distStr] in
  let flist = List.map ((^) prefix)[
      "2FF1="^item1F1;
      "2FF1="^item1F1^" "^item1F2;
      "2FF1="^item1F1^" "^item1F2^" "^item2F2;
      "2FF1="^item1F1^" "^item1F2^" "^item2F2^" "^item2F1;
      "2FF2="^item1F1^" "^item2F1;
      "2FF3="^item1F1^" "^item2F2;
      "2FF4="^item1F2^" "^item2F1^" "^item2F2;
      "2FF5="^item1F2^" "^item2F2;
      "2FF6="^item2F1^" "^item2F2;
      "2FF7="^item1F2;
      "2FF8="^item2F1;
      "2FF9="^item2F2;
    ] in
  let funs = List.map (add_feature) (List.flatten (List.map add_diststr flist)) in
  apply_features funs fv

type disamb_info = {
  tree: linear_term array
}

let score_edge (data: disamb_info) (parent: node) (child: node) =
  let fv = IntSet.empty in
  let fv = add_two_obs_features "HC"
      parent.orth parent.pos child.orth child.pos "" fv in
  score_fv fv

let rec fill_dep_edges_array
    (data: disamb_info) parent (scores: float IntMap.t) =
  function
    Dot -> scores
  | Ref i -> (match data.tree.(i) with
        Node child -> IntMap.add scores i (score_edge data parent child)
      | _ as x -> raise (UnsupportedLinearTerm x))
  | Tuple l -> List.fold_left (fill_dep_edges_array data parent) scores l
  | Variant (_, l) -> List.fold_left
                        (fill_dep_edges_array data parent)
                        scores  (List.map snd l)
  | _ as x -> raise (UnsupportedLinearTerm x)

let rec disambiguate_args edge_scores =
  function
    Dot -> Dot, 0.0
  | Ref i -> Ref i, IntMap.find edge_scores i
  | Tuple l ->
    let (terms, scores) =
      List.map (disambiguate_args edge_scores) l |> List.split in
    let num = List.length scores |> float_of_int in
    Tuple terms, (List.fold_left (+.) 0.0 scores) /. num
  | Variant (lab, l) ->
    let (lbs, terms) = List.split l in
    let new_terms_scores = List.map (disambiguate_args edge_scores) terms in
    let select_best (term, score) (new_term, new_score) =
      if new_score > score then
        new_term, new_score
      else
        term, score in
    List.fold_left select_best (List.hd new_terms_scores) (List.tl new_terms_scores)
  | _ as x -> raise (UnsupportedLinearTerm x)

(* dezambiguacja argumentów pojedynczego wierzchołka algorytmem zachłannym *)
let disambiguate_node (data: disamb_info) parentI =
  let parent = match data.tree.(parentI) with
      Node node -> node
    | _ as x -> raise (UnsupportedLinearTerm x) in
  let edge_scores = fill_dep_edges_array
      data parent IntMap.empty (parent.args) in
  let (new_term, _) = disambiguate_args edge_scores (parent.args) in
  Node {parent with args = new_term}

let disambiguate_tree tree =
  let tree2 = Array.copy tree in
  let data : disamb_info = {tree = tree} in
  let update parentI _ =
    (let new_term = disambiguate_node data parentI in
     tree2.(parentI) <- new_term;) in
  Array.iteri update tree; tree2