LCGchart.ml
8.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
(*
* ENIAM: Categorial Syntactic-Semantic Parser for Polish
* Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
* Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*)
open Xstd
open LCGtypes
open Printf
let make size = Array.make_matrix (size+1) (size+1) ([],0)
let last_node chart = Array.length chart - 1
let add chart i j v layer =
chart.(i).(j) <- [v],layer;
chart
let add_list chart i j l layer =
chart.(i).(j) <- l,layer;
chart
let add_inc chart i j v layer =
let l,layer2 = chart.(i).(j) in
chart.(i).(j) <- v :: l, max layer layer2;
chart
let add_inc_list chart i j l layer =
let l2,layer2 = chart.(i).(j) in
chart.(i).(j) <- l @ l2, max layer layer2;
chart
let find chart i j = fst chart.(i).(j)
let layer chart i j = snd chart.(i).(j)
let fold chart s f =
Int.fold 0 (last_node chart) s (fun s i ->
Int.fold 0 (last_node chart) s (fun s j ->
let layer = layer chart i j in
Xlist.fold (find chart i j) s (fun s (symbol,sem) ->
f s (symbol,i,j,sem,layer))))
let rec find_paths_rec chart last i =
if i = last then [[]] else
Int.fold (i+1) last [] (fun paths j ->
if chart.(i).(j) = [] then paths else
let tails = find_paths_rec chart last j in
if Xlist.size tails > 1000000 then failwith "find_paths_rec: to many paths" else
Xlist.fold tails paths (fun paths tail -> (chart.(i).(j) :: tail) :: paths))
let find_paths chart =
let last = last_node chart - 1 in
find_paths_rec chart last 0
let get_no_entries chart =
Int.fold 0 (last_node chart) 0 (fun n i ->
Int.fold 0 (last_node chart) n (fun n j ->
n + (Xlist.size (find chart i j))))
(* Pod referencją 0 będzie trzymany korzeń termu *)
let lazify chart =
let new_chart = make (last_node chart) in
let references = ExtArray.make (2 * last_node chart) Dot in
let _ = ExtArray.add references Dot in (* to jest potrzebne by na pozycji 0 umieścić korzeń termu *)
let new_chart = fold chart new_chart (fun new_chart (symbol,i,j,sem,layer) ->
let n = ExtArray.add references sem in
add_inc new_chart i j (symbol,Ref n) layer) in
new_chart, references
let rec dep_lazify_rec references (DepNode(id,left,l,right)) =
(* Printf.printf "dep_lazify_rec %s\n" id; *)
let l = Xlist.rev_map l LCGrules.flatten_functor in
let l = Xlist.fold l [] (fun l (symbol,sem) ->
let n = ExtArray.add references sem in
(symbol,Ref n) :: l) in
let left = Xlist.map left (dep_lazify_rec references) in
let right = Xlist.map right (dep_lazify_rec references) in
DepNode(id,left,l,right)
let dep_lazify dep_graph =
let references = ExtArray.make 100 Dot in
let _ = ExtArray.add references Dot in (* to jest potrzebne by na pozycji 0 umieścić korzeń termu *)
dep_lazify_rec references dep_graph, references
let merge_sems l = (* FIXME: dodać warianty *)
let map = Xlist.fold l SymbolMap.empty (fun map (t,sem) -> SymbolMap.add_inc map t [sem] (fun l -> sem :: l)) in
SymbolMap.fold map [] (fun l t sems -> (t,LCGrules.make_variant sems) :: l)
let make_unique chart i j =
let l,layer = chart.(i).(j) in
let l = merge_sems l in
chart.(i).(j) <- l, layer;
chart
let parse chart references timeout time_fun =
(* print_endline "parse 1"; *)
(* LCGrules.references := refs;
LCGrules.next_reference := next_ref; *)
let start_time = time_fun () in
let last_node = last_node chart in
let chart = Int.fold 2 last_node chart (fun chart len ->
Int.fold 0 (last_node - len) chart (fun chart i ->
let k = i + len in
Int.fold 1 (len - 1) chart (fun chart d ->
let time = time_fun () in
if time -. start_time > timeout then raise (Timeout(time -. start_time)) else
let j = i + d in
let l,lay = Xlist.fold LCGrules.rules (find chart i k,layer chart i k) (fun (l,lay) rule ->
(rule references (find chart i j) (find chart j k)) @ l,
(* Xlist.fold (find chart i j) l (fun l a ->
Xlist.fold (find chart j k) l (fun l b ->
(rule (a,b)) @ l)),*)
max lay ((max (layer chart i j) (layer chart j k)) + 1)) in
(* print_int i; print_string " "; print_int j; print_string " "; print_int k; print_newline (); *)
(* let l = LCGreductions.merge_symbols l in *)
(* if Xlist.size l > 0 then Printf.printf "parse: %d-%d |l|=%d\n%!" i k (Xlist.size l); *)
make_unique (add_list chart i k l lay) i k))) in
(* print_endline "parse 2"; *)
chart
(* let rec dep_parse2 funct = function
larg :: left, rarg :: right ->
(dep_parse2 (LCGrules.forward_application funct rarg) (larg :: left, right)) @
(dep_parse2 (LCGrules.backward_application larg funct) (left, rarg :: right))
| larg :: left, [] -> dep_parse2 (LCGrules.backward_application larg funct) (left, [])
| [], rarg :: right -> dep_parse2 (LCGrules.forward_application funct rarg) ([], right)
| [], [] -> funct *)
let assign_not_parsed left right (t,sem) =
let sem = if left = [] then sem else (print_endline "assign_not_parsed: ni 1"; sem) in
let sem = if right = [] then sem else (print_endline "assign_not_parsed: ni 2"; sem) in
t, sem
let rec dep_parse_rec references timeout time_fun (DepNode(id,left,l,right)) =
let left = Xlist.map left (dep_parse_rec references timeout time_fun) in
let right = Xlist.map right (dep_parse_rec references timeout time_fun) in
(* printf "dep_parse_rec id=%s\n" id; *)
let l,left = Xlist.fold left (l,[]) (fun (funct,left) arg ->
(* printf "larg: %s\n" (LCGstringOf.symbol_sem_list arg);
printf "funct: %s\n" (LCGstringOf.symbol_sem_list funct); *)
match LCGrules.backward_application references arg funct with
[] -> (*printf "NOT PARSED\n";*) l, arg :: left
| l -> merge_sems l, left) in
let l,right = Xlist.fold right (l,[]) (fun (funct,right) arg ->
(* printf "funct: %s\n" (LCGstringOf.symbol_sem_list funct);
printf "rarg: %s\n" (LCGstringOf.symbol_sem_list arg); *)
match LCGrules.forward_application references funct arg with
[] -> (*printf "NOT PARSED\n";*) l, arg :: right
| l -> merge_sems l, right) in
Xlist.rev_map l (assign_not_parsed left right)
(* merge_sems (dep_parse2 l (left,right)) *)
let dep_parse dep_graph references timeout time_fun =
(* print_endline "dep_parse"; *)
(* LCGrules.references := refs;
LCGrules.next_reference := next_ref; *)
let start_time = time_fun () in
let dep_graph = dep_parse_rec references timeout time_fun dep_graph in
dep_graph
let is_parsed chart =
let n = last_node chart in
try
let _ = Xlist.assoc (find chart 0 n) (LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom (*"<query>"*)"<sentence>"])) in (* FIXME: !!! *)
true
with Not_found -> false
let is_dep_parsed = function
[] -> false
| [LCGtypes.Bracket(false,false,LCGtypes.Tensor[LCGtypes.Atom "<conll_root>"]),_] -> true
| _ -> failwith "is_dep_parsed"
let get_parsed_term chart =
let n = last_node chart in
let l = Xlist.fold (find chart 0 n) [] (fun l -> function
LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom (*"<query>"*)"<sentence>"]), sem -> (LCGtypes.Cut (LCGtypes.Tuple[sem])) :: l (* FIXME: !!! *)
| _ -> l) in
LCGtypes.Node{LCGrenderer.empty_node with
LCGtypes.pred="<root>";
LCGtypes.cat="interp";
LCGtypes.id=LCGrenderer.get_pro_id ();
LCGtypes.agf=WalTypes.NOSEM;
LCGtypes.args=LCGrules.make_variant l}
let get_dep_parsed_term = function
[LCGtypes.Bracket(false,false,LCGtypes.Tensor[LCGtypes.Atom "<conll_root>"]),sem] ->
let l = [LCGtypes.Cut (LCGtypes.Tuple[sem])] in
LCGtypes.Node{LCGrenderer.empty_node with
LCGtypes.pred="<root>";
LCGtypes.cat="interp";
LCGtypes.id=LCGrenderer.get_pro_id ();
LCGtypes.agf=WalTypes.NOSEM;
LCGtypes.args=LCGrules.make_variant l}
| _ -> failwith "get_dep_parsed_term"
(*FIXME: Bębni na maszynie do pisania.
Na myśl o czym brykasz?*)