ENIAMmstModel.ml
3.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
open Yojson
open Xstd
module Freq_Model : sig
val get_freq_score: string -> string -> string option -> float
val initialize: string -> unit
end
= struct
let frequencies : float StringMap.t ref = ref StringMap.empty
let get_freq_score lemma pos case =
let cat =
match case with
Some c -> pos ^ "_" ^ c
| None -> pos in
let lookup_str = lemma^ "\t" ^ cat in
try
StringMap.find !frequencies lookup_str
with
_ -> 0.0
let load_frequencies filename map =
let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in
Xlist.fold l map (fun map line ->
if String.length line = 0 then map else
match Str.split_delim (Str.regexp "\t") line with
[lemma; cat; count] -> StringMap.add map (lemma ^ "\t" ^ cat) (float_of_string count)
| _ -> failwith ("ENIAMmstDisambiguation load_frequencies: " ^ line))
let initialize fname =
frequencies := load_frequencies fname !frequencies;
()
end
module MST_Model : sig
type mst_model
type feature_vector_t
exception MalformedModelJson
val read_model: string -> mst_model
val initialize: string -> unit
val add_feature: string -> feature_vector_t -> feature_vector_t
val score_fv: feature_vector_t -> float
val empty_fv: feature_vector_t
end
= struct
type feature_vector_t = IntSet.t
type mst_model = {
typeAlphabet: int StringMap.t;
dataAlphabet: int StringMap.t;
parameters: float array}
exception MalformedModelJson
let model = ref {typeAlphabet = StringMap.empty;
dataAlphabet = StringMap.empty;
parameters = Array.make 0 0.0}
let empty_fv = IntSet.empty
let add_feature str (fv: feature_vector_t) =
if StringMap.mem !model.dataAlphabet str then
((*prerr_string (str ^": " ^ (
let i = StringMap.find !model.dataAlphabet str in string_of_float(!model.parameters.(i))
) ^ "\n");*)IntSet.add fv (StringMap.find !model.dataAlphabet str))
else
fv
let score_fv (fv: feature_vector_t) =
IntSet.fold fv 0.0 (fun score i -> score +. !model.parameters.(i))
let construct_data_alphabet keys =
let counter = ref 0 in
let map = ref StringMap.empty in
let length = Array.length keys in
for i = 0 to length -1 do
map := StringMap.add !map keys.(i) !counter;
counter := !counter + 1;
done;
!map
let construct_type_alphabet = construct_data_alphabet
let read_zip fname =
let file = Zip.open_in fname in
let entry = file |> Zip.entries |> List.hd in
Zip.read_entry file entry
let read_model fname =
let data = read_zip fname |> Basic.from_string in
try
let open Yojson.Basic.Util in
let unwrapList = function
`List l -> l
| _ -> raise MalformedModelJson in
let dataA = data |> member "dataAlphabet" |> unwrapList |> filter_string in
let typeA = data |> member "typeAlphabet" |> unwrapList |> filter_string in
let params = data |> member "parameters" |> unwrapList |> filter_float in
{typeAlphabet = Array.of_list typeA |> construct_type_alphabet;
dataAlphabet = Array.of_list dataA |> construct_data_alphabet;
parameters = Array.of_list params}
with
_ -> raise MalformedModelJson
let initialize fname =
model := read_model fname;
()
end