|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
|
(*
* ENIAM_LCGparser, a parser for Logical Categorial Grammar formalism
* Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
* Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
*
* This library is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*)
open Xstd
open ENIAM_LCGtypes
open Printf
(* open ENIAMtokenizerTypes
open ENIAMlexSemanticsTypes *)
let make size = Array.make_matrix (size+1) (size+1) ([],0)
let last_node chart = Array.length chart - 1
let add chart i j v layer =
chart.(i).(j) <- [v],layer;
chart
let add_list chart i j l layer =
chart.(i).(j) <- l,layer;
chart
let add_inc chart i j v layer =
let l,layer2 = chart.(i).(j) in
chart.(i).(j) <- v :: l, max layer layer2;
chart
let add_inc_list chart i j l layer =
let l2,layer2 = chart.(i).(j) in
chart.(i).(j) <- l @ l2, max layer layer2;
chart
let find chart i j = fst chart.(i).(j)
let layer chart i j = snd chart.(i).(j)
let fold chart s f =
Int.fold 0 (last_node chart) s (fun s i ->
Int.fold 0 (last_node chart) s (fun s j ->
let layer = layer chart i j in
Xlist.fold (find chart i j) s (fun s (symbol,sem) ->
f s (symbol,i,j,sem,layer))))
let rec find_paths_rec chart last i =
if i = last then [[]] else
Int.fold (i+1) last [] (fun paths j ->
if chart.(i).(j) = [] then paths else
let tails = find_paths_rec chart last j in
if Xlist.size tails > 1000000 then failwith "find_paths_rec: to many paths" else
Xlist.fold tails paths (fun paths tail -> (chart.(i).(j) :: tail) :: paths))
let find_paths chart =
let last = last_node chart - 1 in
find_paths_rec chart last 0
let get_no_entries chart =
Int.fold 0 (last_node chart) 0 (fun n i ->
Int.fold 0 (last_node chart) n (fun n j ->
n + (Xlist.size (find chart i j))))
(* Pod referencją 0 będzie trzymany korzeń termu *)
let lazify chart =
let new_chart = make (last_node chart) in
let references = ExtArray.make (2 * last_node chart) Dot in
let _ = ExtArray.add references Dot in (* to jest potrzebne by na pozycji 0 umieścić korzeń termu *)
let new_chart = fold chart new_chart (fun new_chart (symbol,i,j,sem,layer) ->
let n = ExtArray.add references sem in
add_inc new_chart i j (symbol,Ref n) layer) in
new_chart, references
let rec dep_lazify_rec references (DepNode(id,left,l,right)) =
(* Printf.printf "dep_lazify_rec %s\n" id; *)
let l = Xlist.rev_map l ENIAM_LCGrules.flatten_functor in
let l = Xlist.fold l [] (fun l (symbol,sem) ->
let n = ExtArray.add references sem in
(symbol,Ref n) :: l) in
let left = Xlist.map left (dep_lazify_rec references) in
let right = Xlist.map right (dep_lazify_rec references) in
DepNode(id,left,l,right)
let dep_lazify dep_chart =
let references = ExtArray.make 100 Dot in
let _ = ExtArray.add references Dot in (* to jest potrzebne by na pozycji 0 umieścić korzeń termu *)
dep_lazify_rec references dep_chart, references
let merge_sems l = (* FIXME: dodać warianty *)
let map = Xlist.fold l SymbolMap.empty (fun map (t,sem) -> SymbolMap.add_inc map t [sem] (fun l -> sem :: l)) in
SymbolMap.fold map [] (fun l t sems -> (t,ENIAM_LCGrules.make_variant sems) :: l)
let make_unique chart i j =
let l,layer = chart.(i).(j) in
let l = merge_sems l in
chart.(i).(j) <- l, layer;
chart
let parse chart references timeout time_fun =
(* print_endline "parse 1"; *)
(* ENIAM_LCGrules.references := refs;
ENIAM_LCGrules.next_reference := next_ref; *)
let start_time = time_fun () in
let last_node = last_node chart in
let chart = Int.fold 2 last_node chart (fun chart len ->
Int.fold 0 (last_node - len) chart (fun chart i ->
let k = i + len in
Int.fold 1 (len - 1) chart (fun chart d ->
let time = time_fun () in
if time -. start_time > timeout then raise (Timeout(time -. start_time)) else
let j = i + d in
let l,lay = Xlist.fold ENIAM_LCGrules.rules (find chart i k,layer chart i k) (fun (l,lay) rule ->
(rule references (find chart i j) (find chart j k)) @ l,
(* Xlist.fold (find chart i j) l (fun l a ->
Xlist.fold (find chart j k) l (fun l b ->
(rule (a,b)) @ l)),*)
max lay ((max (layer chart i j) (layer chart j k)) + 1)) in
(* print_int i; print_string " "; print_int j; print_string " "; print_int k; print_newline (); *)
(* let l = LCGreductions.merge_symbols l in *)
(* if Xlist.size l > 0 then Printf.printf "parse: %d-%d |l|=%d\n%!" i k (Xlist.size l); *)
make_unique (add_list chart i k l lay) i k))) in
(* print_endline "parse 2"; *)
chart
let assign_not_parsed left right (t,sem) =
let sem = if left = [] then sem else (print_endline "assign_not_parsed: ni 1"; sem) in
let sem = if right = [] then sem else (print_endline "assign_not_parsed: ni 2"; sem) in
t, sem
let rec dep_parse_rec references start_time timeout time_fun (DepNode(id,left,funct,right)) =
(* printf "dep_parse_rec 1 id=%d\n%!" id; *)
let time = time_fun () in
if time -. start_time > timeout then raise (Timeout(time -. start_time)) else
let left = Xlist.map left (dep_parse_rec references start_time timeout time_fun) in
let right = Xlist.map right (dep_parse_rec references start_time timeout time_fun) in
(* printf "dep_parse_rec 2 id=%d\n%!" id; *)
let funct,left = Xlist.fold left (funct,[]) (fun (funct,left) arg ->
(* printf "larg: %s\n" (ENIAM_LCGstringOf.symbol_sem_list arg);
printf "funct: %s\n" (ENIAM_LCGstringOf.symbol_sem_list funct); *)
match ENIAM_LCGrules.backward_application_conll references arg funct with
[] -> (*printf "NOT PARSED\n";*) funct, arg :: left
| funct -> merge_sems funct, left) in
(* printf "dep_parse_rec 3 |right|=%d \n%!" (Xlist.size right); *)
let funct,right = Xlist.fold right (funct,[]) (fun (funct,right) arg ->
(* printf "funct: %s\n" (ENIAM_LCGstringOf.symbol_sem_list funct);
printf "rarg: %s\n" (ENIAM_LCGstringOf.symbol_sem_list arg); *)
match ENIAM_LCGrules.forward_application_conll references funct arg with
[] -> (*printf "NOT PARSED\n";*) funct, arg :: right
| funct -> merge_sems funct, right) in
(* printf "dep_parse_rec 4\n%!"; *)
if left = [] && right = [] then funct else (
let xleft = Xlist.rev_map left (fun arg -> Xlist.rev_map arg ENIAM_LCGrules.set_x_type) in
let xright = Xlist.rev_map right (fun arg -> Xlist.rev_map arg ENIAM_LCGrules.set_x_type) in
(* printf "dep_parse_rec 5\n%!"; *)
let xfunct,xleft = Xlist.fold xleft (funct,[]) (fun (xfunct,left) arg ->
(* printf "larg: %s\n" (ENIAM_LCGstringOf.symbol_sem_list arg);
printf "funct: %s\n" (ENIAM_LCGstringOf.symbol_sem_list xfunct); *)
match ENIAM_LCGrules.backward_application_conll references arg xfunct with
[] -> (*printf "NOT PARSED\n";*) xfunct, arg :: left
| xfunct -> merge_sems xfunct, left) in
(* printf "dep_parse_rec 6\n%!"; *)
let xfunct,xright = Xlist.fold xright (xfunct,[]) (fun (xfunct,right) arg ->
(* printf "funct: %s\n" (ENIAM_LCGstringOf.symbol_sem_list xfunct);
printf "rarg: %s\n" (ENIAM_LCGstringOf.symbol_sem_list arg); *)
match ENIAM_LCGrules.forward_application_conll references xfunct arg with
[] -> (*printf "NOT PARSED\n";*) xfunct, arg :: right
| xfunct -> merge_sems xfunct, right) in
(* printf "dep_parse_rec 7\n%!"; *)
if xleft = [] && xright = [] then xfunct else
raise (NotDepParsed(id,left,funct,right)))
let dep_parse dep_chart references timeout time_fun =
(* print_endline "dep_parse"; *)
let start_time = time_fun () in
let parsed_dep_chart = dep_parse_rec references start_time timeout time_fun dep_chart in
parsed_dep_chart
let is_parsed chart =
let n = last_node chart in
Xlist.fold (find chart 0 n) false (fun b -> function
ENIAM_LCGtypes.Bracket(true,true,ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom "<root>"]), _ -> true
(* | ENIAM_LCGtypes.Bracket(true,true,ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom "<ors-sentence>"]), _ -> true *)
| _ -> false)
(* try
let _ = Xlist.assoc (find chart 0 n) (ENIAM_LCGtypes.Bracket(true,true,ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom "<sentence>"])) in (* FIXME: !!! *)
true
with Not_found -> false *)
let is_dep_parsed = function
[] -> false
| [ENIAM_LCGtypes.Bracket(false,false,ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom "<conll_root>"]),_] -> true
| [ENIAM_LCGtypes.Bracket(false,false,ENIAM_LCGtypes.Imp(ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom("<conll_root>")],ENIAM_LCGtypes.Forward,ENIAM_LCGtypes.Maybe _)),sem]-> true
| [t,_] -> print_endline @@ ENIAM_LCGstringOf.grammar_symbol_prime t; failwith "is_dep_parsed"
| l -> failwith ("is_dep_parsed " ^ (string_of_int @@ List.length l))
|
|
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
|
(*
let get_parsed_term tokens lex_sems chart =
let n = last_node chart in
let l = Xlist.fold (find chart 0 n) [] (fun l -> function
ENIAM_LCGtypes.Bracket(true,true,ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom "<root>"]), sem -> (ENIAM_LCGtypes.Cut(ENIAM_LCGtypes.Tuple[sem])) :: l
| ENIAM_LCGtypes.Bracket(false,false,ENIAM_LCGtypes.Imp(ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom("<conll_root>")],ENIAM_LCGtypes.Forward,ENIAM_LCGtypes.Maybe _)) as t,sem->
let sem = List.hd (ENIAM_LCGrules.deduce_optarg sem t) in
(ENIAM_LCGtypes.Cut(ENIAM_LCGtypes.Tuple[sem])) :: l
(* | ENIAM_LCGtypes.Bracket(true,true,ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom "<ors-sentence>"]), sem -> (ENIAM_LCGtypes.Cut (ENIAM_LCGtypes.Tuple[sem])) :: l *)
| _ -> l) in
let id = ExtArray.add tokens {empty_token with token=Lemma("<root>","interp",[])} in
let _ = ExtArray.add lex_sems empty_lex_sem in
ENIAM_LCGtypes.Node{LCGrenderer.empty_node with
ENIAM_LCGtypes.pred="<root>";
ENIAM_LCGtypes.cat="interp";
ENIAM_LCGtypes.id=id;
ENIAM_LCGtypes.agf=ENIAMwalTypes.NOSEM;
ENIAM_LCGtypes.args=ENIAM_LCGrules.make_variant l}
let get_dep_parsed_term tokens lex_sems = function
[ENIAM_LCGtypes.Bracket(false,false,ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom "<conll_root>"]),sem] ->
let id = ExtArray.add tokens {empty_token with token=Lemma("<root>","interp",[])} in
let _ = ExtArray.add lex_sems empty_lex_sem in
let l = [ENIAM_LCGtypes.Cut (ENIAM_LCGtypes.Tuple[sem])] in
ENIAM_LCGtypes.Node{LCGrenderer.empty_node with
ENIAM_LCGtypes.pred="<root>";
ENIAM_LCGtypes.cat="interp";
ENIAM_LCGtypes.id=id;
ENIAM_LCGtypes.agf=ENIAMwalTypes.NOSEM;
ENIAM_LCGtypes.args=ENIAM_LCGrules.make_variant l}
| _ -> failwith "get_dep_parsed_term"
*)
(*FIXME: Bębni na maszynie do pisania.
Na myśl o czym brykasz?*)
|