Commit a731666f50d088b4a3451584a3add0fa9c3ae0b1
1 parent
240802de
leksykon kategorialny dla drzew zależnościowych
Showing
4 changed files
with
117 additions
and
49 deletions
parser/LCGchart.ml
... | ... | @@ -17,7 +17,7 @@ |
17 | 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | 18 | *) |
19 | 19 | |
20 | -open Xstd | |
20 | +open Xstd | |
21 | 21 | open LCGtypes |
22 | 22 | open Printf |
23 | 23 | |
... | ... | @@ -25,30 +25,30 @@ let make size = Array.make_matrix (size+1) (size+1) ([],0) |
25 | 25 | |
26 | 26 | let last_node graph = Array.length graph - 1 |
27 | 27 | |
28 | -let add graph i j v layer = | |
28 | +let add graph i j v layer = | |
29 | 29 | graph.(i).(j) <- [v],layer; |
30 | 30 | graph |
31 | - | |
32 | -let add_list graph i j l layer = | |
31 | + | |
32 | +let add_list graph i j l layer = | |
33 | 33 | graph.(i).(j) <- l,layer; |
34 | 34 | graph |
35 | - | |
36 | -let add_inc graph i j v layer = | |
35 | + | |
36 | +let add_inc graph i j v layer = | |
37 | 37 | let l,layer2 = graph.(i).(j) in |
38 | 38 | graph.(i).(j) <- v :: l, max layer layer2; |
39 | 39 | graph |
40 | - | |
41 | -let add_inc_list graph i j l layer = | |
40 | + | |
41 | +let add_inc_list graph i j l layer = | |
42 | 42 | let l2,layer2 = graph.(i).(j) in |
43 | 43 | graph.(i).(j) <- l @ l2, max layer layer2; |
44 | 44 | graph |
45 | - | |
45 | + | |
46 | 46 | let find graph i j = fst graph.(i).(j) |
47 | 47 | let layer graph i j = snd graph.(i).(j) |
48 | - | |
48 | + | |
49 | 49 | let fold graph s f = |
50 | - Int.fold 0 (last_node graph) s (fun s i -> | |
51 | - Int.fold 0 (last_node graph) s (fun s j -> | |
50 | + Int.fold 0 (last_node graph) s (fun s i -> | |
51 | + Int.fold 0 (last_node graph) s (fun s j -> | |
52 | 52 | let layer = layer graph i j in |
53 | 53 | Xlist.fold (find graph i j) s (fun s (symbol,sem) -> |
54 | 54 | f s (symbol,i,j,sem,layer)))) |
... | ... | @@ -60,16 +60,16 @@ let rec find_paths_rec graph last i = |
60 | 60 | let tails = find_paths_rec graph last j in |
61 | 61 | if Xlist.size tails > 1000000 then failwith "find_paths_rec: to many paths" else |
62 | 62 | Xlist.fold tails paths (fun paths tail -> (graph.(i).(j) :: tail) :: paths)) |
63 | - | |
63 | + | |
64 | 64 | let find_paths graph = |
65 | 65 | let last = last_node graph - 1 in |
66 | 66 | find_paths_rec graph last 0 |
67 | - | |
67 | + | |
68 | 68 | let get_no_entries graph = |
69 | - Int.fold 0 (last_node graph) 0 (fun n i -> | |
70 | - Int.fold 0 (last_node graph) n (fun n j -> | |
69 | + Int.fold 0 (last_node graph) 0 (fun n i -> | |
70 | + Int.fold 0 (last_node graph) n (fun n j -> | |
71 | 71 | n + (Xlist.size (find graph i j)))) |
72 | - | |
72 | + | |
73 | 73 | (* Pod referencją 0 będzie trzymany korzeń termu *) |
74 | 74 | let lazify graph = |
75 | 75 | let new_graph = make (last_node graph) in |
... | ... | @@ -77,18 +77,33 @@ let lazify graph = |
77 | 77 | add_inc new_graph i j (symbol,Ref n) layer, |
78 | 78 | (n,sem) :: references, |
79 | 79 | n+1) |
80 | - | |
80 | + | |
81 | +let rec dep_lazify_rec references next_reference (DepNode(id,left,l,right)) = | |
82 | + let l,references,next_reference = Xlist.fold l ([],references,next_reference) (fun (l,references,next_reference) (symbol,sem) -> | |
83 | + (symbol,Ref next_reference) :: l, | |
84 | + (next_reference,sem) :: references, | |
85 | + next_reference + 1) in | |
86 | + let left,references,next_reference = Xlist.fold left ([],references,next_reference) (fun (left,references,next_reference) t -> | |
87 | + let t,references,next_reference = dep_lazify_rec references next_reference t in | |
88 | + t :: left,references,next_reference) in | |
89 | + let right,references,next_reference = Xlist.fold left ([],references,next_reference) (fun (right,references,next_reference) t -> | |
90 | + let t,references,next_reference = dep_lazify_rec references next_reference t in | |
91 | + t :: right,references,next_reference) in | |
92 | + DepNode(id,List.rev left,l,List.rev right),references,next_reference | |
93 | + | |
94 | +let dep_lazify dep_graph = dep_lazify_rec [] 1 dep_graph | |
95 | + | |
81 | 96 | let merge_sems l = (* FIXME: dodać warianty *) |
82 | 97 | let map = Xlist.fold l SymbolMap.empty (fun map (t,sem) -> SymbolMap.add_inc map t [sem] (fun l -> sem :: l)) in |
83 | 98 | SymbolMap.fold map [] (fun l t sems -> (t,LCGrules.make_variant sems) :: l) |
84 | 99 | |
85 | -let make_unique graph i j = | |
100 | +let make_unique graph i j = | |
86 | 101 | let l,layer = graph.(i).(j) in |
87 | 102 | let l = merge_sems l in |
88 | 103 | graph.(i).(j) <- l, layer; |
89 | 104 | graph |
90 | - | |
91 | -let parse timeout graph refs next_ref time_fun = | |
105 | + | |
106 | +let parse timeout graph refs next_ref time_fun = | |
92 | 107 | (* print_endline "parse"; *) |
93 | 108 | LCGrules.references := refs; |
94 | 109 | LCGrules.next_reference := next_ref; |
... | ... | @@ -106,32 +121,61 @@ let parse timeout graph refs next_ref time_fun = |
106 | 121 | (* Xlist.fold (find graph i j) l (fun l a -> |
107 | 122 | Xlist.fold (find graph j k) l (fun l b -> |
108 | 123 | (rule (a,b)) @ l)),*) |
109 | - max lay ((max (layer graph i j) (layer graph j k)) + 1)) in | |
124 | + max lay ((max (layer graph i j) (layer graph j k)) + 1)) in | |
110 | 125 | (* print_int i; print_string " "; print_int j; print_string " "; print_int k; print_newline (); *) |
111 | 126 | (* let l = LCGreductions.merge_symbols l in *) |
112 | 127 | (* if Xlist.size l > 0 then Printf.printf "parse: %d-%d |l|=%d\n%!" i k (Xlist.size l); *) |
113 | 128 | make_unique (add_list graph i k l lay) i k))) in |
114 | 129 | graph, !LCGrules.references, !LCGrules.next_reference |
115 | - | |
130 | + | |
131 | +let dep_parse_rec timeout time_fun (DepNode(id,left,l,right)) = | |
132 | +(*let last_node = last_node graph in | |
133 | +let graph = Int.fold 2 last_node graph (fun graph len -> | |
134 | + Int.fold 0 (last_node - len) graph (fun graph i -> | |
135 | + let k = i + len in | |
136 | + Int.fold 1 (len - 1) graph (fun graph d -> | |
137 | + let time = time_fun () in | |
138 | + if time -. start_time > timeout then raise (Timeout(time -. start_time)) else | |
139 | + let j = i + d in | |
140 | + let l,lay = Xlist.fold LCGrules.rules (find graph i k,layer graph i k) (fun (l,lay) rule -> | |
141 | + (rule (find graph i j) (find graph j k)) @ l, | |
142 | +(* Xlist.fold (find graph i j) l (fun l a -> | |
143 | + Xlist.fold (find graph j k) l (fun l b -> | |
144 | + (rule (a,b)) @ l)),*) | |
145 | + max lay ((max (layer graph i j) (layer graph j k)) + 1)) in | |
146 | + (* print_int i; print_string " "; print_int j; print_string " "; print_int k; print_newline (); *) | |
147 | +(* let l = LCGreductions.merge_symbols l in *) | |
148 | +(* if Xlist.size l > 0 then Printf.printf "parse: %d-%d |l|=%d\n%!" i k (Xlist.size l); *) | |
149 | + make_unique (add_list graph i k l lay) i k))) in*) | |
150 | + | |
151 | + | |
152 | +let dep_parse timeout dep_graph refs next_ref time_fun = | |
153 | +(* print_endline "dep_parse"; *) | |
154 | + LCGrules.references := refs; | |
155 | + LCGrules.next_reference := next_ref; | |
156 | + let start_time = time_fun () in | |
157 | + let dep_graph = dep_parse_rec timeout time_fun dep_graph | |
158 | + dep_graph, !LCGrules.references, !LCGrules.next_reference | |
159 | + | |
116 | 160 | let is_parsed graph = |
117 | 161 | let n = last_node graph in |
118 | 162 | try |
119 | - let _ = Xlist.assoc (find graph 0 n) (LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<query>"])) in | |
163 | + let _ = Xlist.assoc (find graph 0 n) (LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<query>"])) in | |
120 | 164 | true |
121 | 165 | with Not_found -> false |
122 | 166 | |
123 | 167 | let get_parsed_term graph = |
124 | 168 | let n = last_node graph in |
125 | - let l = Xlist.fold (find graph 0 n) [] (fun l -> function | |
169 | + let l = Xlist.fold (find graph 0 n) [] (fun l -> function | |
126 | 170 | LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<query>"]), sem -> (LCGtypes.Cut (LCGtypes.Tuple[sem])) :: l |
127 | 171 | | _ -> l) in |
128 | - LCGtypes.Node{LCGrenderer.empty_node with | |
129 | - LCGtypes.pred="<root>"; | |
130 | - LCGtypes.cat="interp"; | |
172 | + LCGtypes.Node{LCGrenderer.empty_node with | |
173 | + LCGtypes.pred="<root>"; | |
174 | + LCGtypes.cat="interp"; | |
131 | 175 | LCGtypes.id=LCGrenderer.get_pro_id (); |
132 | - LCGtypes.agf=WalTypes.NOSEM; | |
176 | + LCGtypes.agf=WalTypes.NOSEM; | |
133 | 177 | LCGtypes.args=LCGrules.make_variant l} |
134 | - | |
135 | - | |
136 | -(*FIXME: Bębni na maszynie do pisania. | |
178 | + | |
179 | + | |
180 | +(*FIXME: Bębni na maszynie do pisania. | |
137 | 181 | Na myśl o czym brykasz?*) |
... | ... |
parser/LCGlexicon.ml
... | ... | @@ -1264,3 +1264,21 @@ let create (*query*) (paths,last,next_id) = |
1264 | 1264 | let set = Xlist.fold paths IntIntSet.empty (fun set t -> IntIntSet.add set (t.lnode,t.rnode)) in |
1265 | 1265 | let graph = IntIntSet.fold set graph (fun graph (i,j) -> LCGchart.make_unique graph i j) in |
1266 | 1266 | graph, !LCGrenderer.pro_id_counter |
1267 | + | |
1268 | +let rec split_sons left id right = function | |
1269 | + [] -> List.rev (List.sort compare left), List.sort compare right | |
1270 | + | x :: l -> if x < id then split_sons (x :: left) id right l else split_sons left id (x :: right) l | |
1271 | + | |
1272 | +let rec dep_create_rec nodes sons id = | |
1273 | + let node = if id = "0" then [] else StringMap.find nodes id in | |
1274 | + let l = try StringMap.find sons id with Not_found -> [] in | |
1275 | + let left,right = split_sons [] id [] l in | |
1276 | + DepNode(id, Xlist.map left (dep_create_rec nodes sons), node, Xlist.map right (dep_create_rec nodes sons)) | |
1277 | + | |
1278 | +let dep_create paths next_id = | |
1279 | + uni_weight := 0.; | |
1280 | + LCGrenderer.pro_id_counter := next_id; | |
1281 | + let sons = Xlist.fold paths StringMap.empty (fun sons t -> | |
1282 | + StringMap.add_inc sons t.conll_super [t.conll_id] (fun l -> t.conll_id :: l)) in | |
1283 | + let nodes = Xlist.fold paths StringMap.empty (fun nodes t -> StringMap.add nodes t.conll_id (create_entries t t)) in | |
1284 | + dep_create_rec nodes sons "0", !LCGrenderer.pro_id_counter | |
... | ... |
parser/LCGtypes.ml
... | ... | @@ -16,7 +16,7 @@ |
16 | 16 | * You should have received a copy of the GNU General Public License |
17 | 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | 18 | *) |
19 | - | |
19 | + | |
20 | 20 | open Xstd |
21 | 21 | |
22 | 22 | type direction = Forward | Backward | Both |
... | ... | @@ -28,7 +28,7 @@ type node = { |
28 | 28 | cat: string; |
29 | 29 | weight: float; |
30 | 30 | id: int; |
31 | - gs: linear_term; | |
31 | + gs: linear_term; | |
32 | 32 | agf: WalTypes.gf; |
33 | 33 | amorf: WalTypes.morf; |
34 | 34 | arole: string; |
... | ... | @@ -39,15 +39,15 @@ type node = { |
39 | 39 | position: WalTypes.schema_field; |
40 | 40 | attrs: (string * linear_term) list; |
41 | 41 | args: linear_term} |
42 | - | |
43 | -and concept = | |
42 | + | |
43 | +and concept = | |
44 | 44 | {c_sense: linear_term; c_name: linear_term; (*c_visible_var: bool;*) c_quant: linear_term; c_local_quant: bool; |
45 | 45 | (*c_modalities: (string * type_term) list;*) |
46 | 46 | c_relations: linear_term; c_variable: (string * string); c_pos: int} |
47 | - | |
48 | -and context = | |
47 | + | |
48 | +and context = | |
49 | 49 | {cx_sense: linear_term; cx_contents: linear_term; cx_relations: linear_term; cx_variable: (string * string); cx_pos: int} |
50 | - | |
50 | + | |
51 | 51 | and linear_term = |
52 | 52 | Var of linear_variable |
53 | 53 | | Tuple of linear_term list |
... | ... | @@ -85,15 +85,15 @@ and linear_term = |
85 | 85 | | SetContextName of string * linear_term |
86 | 86 | | Ref of int |
87 | 87 | | Cut of linear_term |
88 | - | |
88 | + | |
89 | 89 | type internal_grammar_symbol = |
90 | 90 | Atom of string |
91 | 91 | | AVar of string |
92 | 92 | | With of internal_grammar_symbol list |
93 | 93 | | Zero |
94 | 94 | | Top |
95 | - | |
96 | -type grammar_symbol = | |
95 | + | |
96 | +type grammar_symbol = | |
97 | 97 | Tensor of internal_grammar_symbol list |
98 | 98 | | Plus of grammar_symbol list |
99 | 99 | | Imp of grammar_symbol * direction * grammar_symbol |
... | ... | @@ -108,9 +108,9 @@ type grammar_symbol = |
108 | 108 | module OrderedSymbol = struct |
109 | 109 | |
110 | 110 | type t = grammar_symbol |
111 | - | |
111 | + | |
112 | 112 | let compare = compare |
113 | - | |
113 | + | |
114 | 114 | end |
115 | 115 | |
116 | 116 | module SymbolMap = Xmap.Make(OrderedSymbol) |
... | ... | @@ -118,9 +118,9 @@ module SymbolMap = Xmap.Make(OrderedSymbol) |
118 | 118 | module OrderedTerm = struct |
119 | 119 | |
120 | 120 | type t = linear_term |
121 | - | |
121 | + | |
122 | 122 | let compare = compare |
123 | - | |
123 | + | |
124 | 124 | end |
125 | 125 | |
126 | 126 | module TermMap = Xmap.Make(OrderedTerm) |
... | ... | @@ -129,15 +129,17 @@ module TermSet = Xset.Make(OrderedTerm) |
129 | 129 | module OrderedSymbolTerm = struct |
130 | 130 | |
131 | 131 | type t = grammar_symbol * linear_term |
132 | - | |
132 | + | |
133 | 133 | let compare = compare |
134 | - | |
134 | + | |
135 | 135 | end |
136 | 136 | |
137 | 137 | module SymbolTermSet = Xset.Make(OrderedSymbolTerm) |
138 | - | |
138 | + | |
139 | 139 | type graph = (SymbolTermSet.key list * int) array array |
140 | 140 | |
141 | 141 | exception Timeout of float |
142 | 142 | exception SemTooBig |
143 | 143 | |
144 | +type dep_tree = | |
145 | + DepNode of string * dep_tree list * (grammar_symbol * linear_term) list * dep_tree list (* conll_id * left_nodes * ... * right_nodes *) | |
... | ... |
parser/exec.ml
... | ... | @@ -198,6 +198,10 @@ let rec parse_sentence timeout test_only_flag mode next_id = function |
198 | 198 | | StructSentence(paths,last) -> |
199 | 199 | (match mode with |
200 | 200 | CONLL -> |
201 | + let dep_graph, next_id = LCGlexicon.dep_create paths next_id in | |
202 | + let dep_graph,references,next_reference = LCGchart.dep_lazify dep_graph in | |
203 | + let dep_graph,references,next_reference = LCGchart.dep_parse timeout dep_graph references next_reference time_fun in | |
204 | + (* FIXME: dodać dalsze przetwarzanie dep_graph *) | |
201 | 205 | let xml = DepTree.conll_to_xml paths in |
202 | 206 | let graph = Array.of_list (XmlPrinter.graph_of_xml xml) in (* FIXME: do poprawy *) |
203 | 207 | Visualization.print_graph "results/" "term_conll" graph; |
... | ... |