ENIAM_EdgeScore.ml
4.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
open Xstd
open ENIAM_LCGtypes
open Yojson
module MST_Model : sig
type mst_model = {
typeAlphabet: int StringMap.t;
dataAlphabet: int StringMap.t;
parameters: float array}
val read_model: string -> mst_model
val empty: mst_model
exception MalformedModelJson
end
= struct
type mst_model = {
typeAlphabet: int StringMap.t;
dataAlphabet: int StringMap.t;
parameters: float array}
let empty = {typeAlphabet = StringMap.empty;
dataAlphabet = StringMap.empty;
parameters = Array.make 0 0.0}
exception MalformedModelJson
let construct_data_alphabet keys =
let counter = ref 0 in
let map = ref StringMap.empty in
let length = Array.length keys in
for i = 0 to length -1 do
map := StringMap.add !map keys.(i) !counter;
counter := !counter + 1;
done;
!map
let construct_type_alphabet = construct_data_alphabet
let read_model fname =
let data = Basic.from_file fname in
let open Yojson.Basic.Util in
let unwrapList = function
`List l -> l
| _ -> raise MalformedModelJson in
let dataA = data |> member "dataAlphabet" |> unwrapList |> filter_string in
let typeA = data |> member "typeAlphabet" |> unwrapList |> filter_string in
let params = data |> member "parameters" |> unwrapList |> filter_float in
{typeAlphabet = Array.of_list typeA |> construct_type_alphabet;
dataAlphabet = Array.of_list dataA |> construct_data_alphabet;
parameters = Array.of_list params}
end
open MST_Model
let model = ref MST_Model.empty
let initialize () =
model := MST_Model.read_model "dep.model.json";
()
exception UnsupportedLinearTerm of linear_term
exception EmptyVariant
let add_feature str (fv: IntSet.t) =
if StringMap.mem !model.dataAlphabet str then
IntSet.add fv (StringMap.find !model.dataAlphabet str)
else
fv
let score_fv (fv:IntSet.t) =
IntSet.fold fv 0.0 (fun score i -> score +. !model.parameters.(i))
let apply_features features fv =
List.fold_left (|>) fv features
let add_linear_features f_type (obs: string array) first second distStr fv =
fv
let add_two_obs_features prefix item1F1 item1F2 item2F1 item2F2 distStr fv =
let add_diststr str = [str; str^"*"^distStr] in
let flist = List.map ((^) prefix)[
"2FF1="^item1F1;
"2FF1="^item1F1^" "^item1F2;
"2FF1="^item1F1^" "^item1F2^" "^item2F2;
"2FF1="^item1F1^" "^item1F2^" "^item2F2^" "^item2F1;
"2FF2="^item1F1^" "^item2F1;
"2FF3="^item1F1^" "^item2F2;
"2FF4="^item1F2^" "^item2F1^" "^item2F2;
"2FF5="^item1F2^" "^item2F2;
"2FF6="^item2F1^" "^item2F2;
"2FF7="^item1F2;
"2FF8="^item2F1;
"2FF9="^item2F2;
] in
let funs = List.map (add_feature) (List.flatten (List.map add_diststr flist)) in
apply_features funs fv
type disamb_info = {
tree: linear_term array
}
let score_edge (data: disamb_info) (parent: node) (child: node) =
let fv = IntSet.empty in
let fv = add_two_obs_features "HC"
parent.orth parent.pos child.orth child.pos "" fv in
score_fv fv
let rec fill_dep_edges_array
(data: disamb_info) parent (scores: float IntMap.t) =
function
Dot -> scores
| Ref i -> (match data.tree.(i) with
Node child -> IntMap.add scores i (score_edge data parent child)
| _ as x -> raise (UnsupportedLinearTerm x))
| Tuple l -> List.fold_left (fill_dep_edges_array data parent) scores l
| Variant (_, l) -> List.fold_left
(fill_dep_edges_array data parent)
scores (List.map snd l)
| _ as x -> raise (UnsupportedLinearTerm x)
let rec disambiguate_args edge_scores =
function
Dot -> Dot, 0.0
| Ref i -> Ref i, IntMap.find edge_scores i
| Tuple l ->
let (terms, scores) =
List.map (disambiguate_args edge_scores) l |> List.split in
let num = List.length scores |> float_of_int in
Tuple terms, (List.fold_left (+.) 0.0 scores) /. num
| Variant (lab, l) ->
let (lbs, terms) = List.split l in
let new_terms_scores = List.map (disambiguate_args edge_scores) terms in
let select_best (term, score) (new_term, new_score) =
if new_score > score then
new_term, new_score
else
term, score in
List.fold_left select_best (List.hd new_terms_scores) (List.tl new_terms_scores)
| _ as x -> raise (UnsupportedLinearTerm x)
(* dezambiguacja argumentów pojedynczego wierzchołka algorytmem zachłannym *)
let disambiguate_node (data: disamb_info) parentI =
let parent = match data.tree.(parentI) with
Node node -> node
| _ as x -> raise (UnsupportedLinearTerm x) in
let edge_scores = fill_dep_edges_array
data parent IntMap.empty (parent.args) in
let (new_term, _) = disambiguate_args edge_scores (parent.args) in
Node {parent with args = new_term}
let disambiguate_tree tree =
let tree2 = Array.copy tree in
let data : disamb_info = {tree = tree} in
let update parentI _ =
(let new_term = disambiguate_node data parentI in
tree2.(parentI) <- new_term;) in
Array.iteri update tree; tree2