ENIAMmstModel.ml 3.29 KB
open Yojson
open Xstd

module Freq_Model : sig
  val get_freq_score: string -> string -> string option -> float
  val initialize: string -> unit
end
= struct
  let frequencies : float StringMap.t ref = ref StringMap.empty

  let get_freq_score lemma pos case =
    let cat =
      match case with
        Some c -> pos ^ "_" ^ c
      | None -> pos in
    let lookup_str = lemma^ "\t" ^ cat in
    try
      StringMap.find !frequencies lookup_str
    with
      _ -> 0.0

  let load_frequencies filename map =
    let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in
    Xlist.fold l map (fun map line ->
        if String.length line = 0 then map else
          match Str.split_delim (Str.regexp "\t") line with
            [lemma; cat; count] -> StringMap.add map (lemma ^ "\t" ^ cat) (float_of_string count)
          | _ -> failwith ("ENIAMmstDisambiguation load_frequencies: " ^ line))

  let initialize fname =
    frequencies := load_frequencies fname !frequencies;
    ()
end

module MST_Model : sig
  type mst_model
  type feature_vector_t
  exception MalformedModelJson

  val read_model: string -> mst_model
  val initialize: string -> unit
  val add_feature: string -> feature_vector_t -> feature_vector_t
  val score_fv: feature_vector_t -> float
  val empty_fv: feature_vector_t
end
= struct
  type feature_vector_t = IntSet.t

  type mst_model = {
    typeAlphabet: int StringMap.t;
    dataAlphabet: int StringMap.t;
    parameters: float array}


  exception MalformedModelJson

  let model = ref {typeAlphabet = StringMap.empty;
                   dataAlphabet = StringMap.empty;
                   parameters = Array.make 0 0.0}

  let empty_fv = IntSet.empty

  let add_feature str (fv: feature_vector_t) =
    if StringMap.mem !model.dataAlphabet str then
      ((*prerr_string (str ^": " ^ (
           let i = StringMap.find !model.dataAlphabet str in string_of_float(!model.parameters.(i))
         ) ^ "\n");*)IntSet.add fv (StringMap.find !model.dataAlphabet str))
    else
      fv

  let score_fv (fv: feature_vector_t) =
    IntSet.fold fv 0.0 (fun score i -> score +. !model.parameters.(i))

  let construct_data_alphabet keys =
    let counter = ref 0 in
    let map = ref StringMap.empty in
    let length = Array.length keys in
    for i = 0 to length -1 do
      map := StringMap.add !map keys.(i) !counter;
      counter := !counter + 1;
    done;
    !map

  let construct_type_alphabet = construct_data_alphabet

  let read_zip fname =
    let file = Zip.open_in fname in
    let entry = file |> Zip.entries |> List.hd in
    Zip.read_entry file entry

  let read_model fname =
    let data = read_zip fname |> Basic.from_string in
    try
      let open Yojson.Basic.Util in
      let unwrapList = function
          `List l -> l
        | _ -> raise MalformedModelJson in
      let dataA = data |> member "dataAlphabet"  |> unwrapList |> filter_string in
      let typeA = data |> member "typeAlphabet"  |> unwrapList |> filter_string in
      let params = data |> member "parameters" |> unwrapList |> filter_float in
      {typeAlphabet = Array.of_list typeA |> construct_type_alphabet;
       dataAlphabet = Array.of_list dataA |> construct_data_alphabet;
       parameters = Array.of_list params}
    with
      _ -> raise MalformedModelJson

  let initialize fname =
    model := read_model fname;
    ()
end