Commit a731666f50d088b4a3451584a3add0fa9c3ae0b1

Authored by Wojciech Jaworski
1 parent 240802de

leksykon kategorialny dla drzew zależnościowych

parser/LCGchart.ml
... ... @@ -17,7 +17,7 @@
17 17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 18 *)
19 19  
20   -open Xstd
  20 +open Xstd
21 21 open LCGtypes
22 22 open Printf
23 23  
... ... @@ -25,30 +25,30 @@ let make size = Array.make_matrix (size+1) (size+1) ([],0)
25 25  
26 26 let last_node graph = Array.length graph - 1
27 27  
28   -let add graph i j v layer =
  28 +let add graph i j v layer =
29 29 graph.(i).(j) <- [v],layer;
30 30 graph
31   -
32   -let add_list graph i j l layer =
  31 +
  32 +let add_list graph i j l layer =
33 33 graph.(i).(j) <- l,layer;
34 34 graph
35   -
36   -let add_inc graph i j v layer =
  35 +
  36 +let add_inc graph i j v layer =
37 37 let l,layer2 = graph.(i).(j) in
38 38 graph.(i).(j) <- v :: l, max layer layer2;
39 39 graph
40   -
41   -let add_inc_list graph i j l layer =
  40 +
  41 +let add_inc_list graph i j l layer =
42 42 let l2,layer2 = graph.(i).(j) in
43 43 graph.(i).(j) <- l @ l2, max layer layer2;
44 44 graph
45   -
  45 +
46 46 let find graph i j = fst graph.(i).(j)
47 47 let layer graph i j = snd graph.(i).(j)
48   -
  48 +
49 49 let fold graph s f =
50   - Int.fold 0 (last_node graph) s (fun s i ->
51   - Int.fold 0 (last_node graph) s (fun s j ->
  50 + Int.fold 0 (last_node graph) s (fun s i ->
  51 + Int.fold 0 (last_node graph) s (fun s j ->
52 52 let layer = layer graph i j in
53 53 Xlist.fold (find graph i j) s (fun s (symbol,sem) ->
54 54 f s (symbol,i,j,sem,layer))))
... ... @@ -60,16 +60,16 @@ let rec find_paths_rec graph last i =
60 60 let tails = find_paths_rec graph last j in
61 61 if Xlist.size tails > 1000000 then failwith "find_paths_rec: to many paths" else
62 62 Xlist.fold tails paths (fun paths tail -> (graph.(i).(j) :: tail) :: paths))
63   -
  63 +
64 64 let find_paths graph =
65 65 let last = last_node graph - 1 in
66 66 find_paths_rec graph last 0
67   -
  67 +
68 68 let get_no_entries graph =
69   - Int.fold 0 (last_node graph) 0 (fun n i ->
70   - Int.fold 0 (last_node graph) n (fun n j ->
  69 + Int.fold 0 (last_node graph) 0 (fun n i ->
  70 + Int.fold 0 (last_node graph) n (fun n j ->
71 71 n + (Xlist.size (find graph i j))))
72   -
  72 +
73 73 (* Pod referencją 0 będzie trzymany korzeń termu *)
74 74 let lazify graph =
75 75 let new_graph = make (last_node graph) in
... ... @@ -77,18 +77,33 @@ let lazify graph =
77 77 add_inc new_graph i j (symbol,Ref n) layer,
78 78 (n,sem) :: references,
79 79 n+1)
80   -
  80 +
  81 +let rec dep_lazify_rec references next_reference (DepNode(id,left,l,right)) =
  82 + let l,references,next_reference = Xlist.fold l ([],references,next_reference) (fun (l,references,next_reference) (symbol,sem) ->
  83 + (symbol,Ref next_reference) :: l,
  84 + (next_reference,sem) :: references,
  85 + next_reference + 1) in
  86 + let left,references,next_reference = Xlist.fold left ([],references,next_reference) (fun (left,references,next_reference) t ->
  87 + let t,references,next_reference = dep_lazify_rec references next_reference t in
  88 + t :: left,references,next_reference) in
  89 + let right,references,next_reference = Xlist.fold left ([],references,next_reference) (fun (right,references,next_reference) t ->
  90 + let t,references,next_reference = dep_lazify_rec references next_reference t in
  91 + t :: right,references,next_reference) in
  92 + DepNode(id,List.rev left,l,List.rev right),references,next_reference
  93 +
  94 +let dep_lazify dep_graph = dep_lazify_rec [] 1 dep_graph
  95 +
81 96 let merge_sems l = (* FIXME: dodać warianty *)
82 97 let map = Xlist.fold l SymbolMap.empty (fun map (t,sem) -> SymbolMap.add_inc map t [sem] (fun l -> sem :: l)) in
83 98 SymbolMap.fold map [] (fun l t sems -> (t,LCGrules.make_variant sems) :: l)
84 99  
85   -let make_unique graph i j =
  100 +let make_unique graph i j =
86 101 let l,layer = graph.(i).(j) in
87 102 let l = merge_sems l in
88 103 graph.(i).(j) <- l, layer;
89 104 graph
90   -
91   -let parse timeout graph refs next_ref time_fun =
  105 +
  106 +let parse timeout graph refs next_ref time_fun =
92 107 (* print_endline "parse"; *)
93 108 LCGrules.references := refs;
94 109 LCGrules.next_reference := next_ref;
... ... @@ -106,32 +121,61 @@ let parse timeout graph refs next_ref time_fun =
106 121 (* Xlist.fold (find graph i j) l (fun l a ->
107 122 Xlist.fold (find graph j k) l (fun l b ->
108 123 (rule (a,b)) @ l)),*)
109   - max lay ((max (layer graph i j) (layer graph j k)) + 1)) in
  124 + max lay ((max (layer graph i j) (layer graph j k)) + 1)) in
110 125 (* print_int i; print_string " "; print_int j; print_string " "; print_int k; print_newline (); *)
111 126 (* let l = LCGreductions.merge_symbols l in *)
112 127 (* if Xlist.size l > 0 then Printf.printf "parse: %d-%d |l|=%d\n%!" i k (Xlist.size l); *)
113 128 make_unique (add_list graph i k l lay) i k))) in
114 129 graph, !LCGrules.references, !LCGrules.next_reference
115   -
  130 +
  131 +let dep_parse_rec timeout time_fun (DepNode(id,left,l,right)) =
  132 +(*let last_node = last_node graph in
  133 +let graph = Int.fold 2 last_node graph (fun graph len ->
  134 + Int.fold 0 (last_node - len) graph (fun graph i ->
  135 + let k = i + len in
  136 + Int.fold 1 (len - 1) graph (fun graph d ->
  137 + let time = time_fun () in
  138 + if time -. start_time > timeout then raise (Timeout(time -. start_time)) else
  139 + let j = i + d in
  140 + let l,lay = Xlist.fold LCGrules.rules (find graph i k,layer graph i k) (fun (l,lay) rule ->
  141 + (rule (find graph i j) (find graph j k)) @ l,
  142 +(* Xlist.fold (find graph i j) l (fun l a ->
  143 + Xlist.fold (find graph j k) l (fun l b ->
  144 + (rule (a,b)) @ l)),*)
  145 + max lay ((max (layer graph i j) (layer graph j k)) + 1)) in
  146 + (* print_int i; print_string " "; print_int j; print_string " "; print_int k; print_newline (); *)
  147 +(* let l = LCGreductions.merge_symbols l in *)
  148 +(* if Xlist.size l > 0 then Printf.printf "parse: %d-%d |l|=%d\n%!" i k (Xlist.size l); *)
  149 + make_unique (add_list graph i k l lay) i k))) in*)
  150 +
  151 +
  152 +let dep_parse timeout dep_graph refs next_ref time_fun =
  153 +(* print_endline "dep_parse"; *)
  154 + LCGrules.references := refs;
  155 + LCGrules.next_reference := next_ref;
  156 + let start_time = time_fun () in
  157 + let dep_graph = dep_parse_rec timeout time_fun dep_graph
  158 + dep_graph, !LCGrules.references, !LCGrules.next_reference
  159 +
116 160 let is_parsed graph =
117 161 let n = last_node graph in
118 162 try
119   - let _ = Xlist.assoc (find graph 0 n) (LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<query>"])) in
  163 + let _ = Xlist.assoc (find graph 0 n) (LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<query>"])) in
120 164 true
121 165 with Not_found -> false
122 166  
123 167 let get_parsed_term graph =
124 168 let n = last_node graph in
125   - let l = Xlist.fold (find graph 0 n) [] (fun l -> function
  169 + let l = Xlist.fold (find graph 0 n) [] (fun l -> function
126 170 LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<query>"]), sem -> (LCGtypes.Cut (LCGtypes.Tuple[sem])) :: l
127 171 | _ -> l) in
128   - LCGtypes.Node{LCGrenderer.empty_node with
129   - LCGtypes.pred="<root>";
130   - LCGtypes.cat="interp";
  172 + LCGtypes.Node{LCGrenderer.empty_node with
  173 + LCGtypes.pred="<root>";
  174 + LCGtypes.cat="interp";
131 175 LCGtypes.id=LCGrenderer.get_pro_id ();
132   - LCGtypes.agf=WalTypes.NOSEM;
  176 + LCGtypes.agf=WalTypes.NOSEM;
133 177 LCGtypes.args=LCGrules.make_variant l}
134   -
135   -
136   -(*FIXME: Bębni na maszynie do pisania.
  178 +
  179 +
  180 +(*FIXME: Bębni na maszynie do pisania.
137 181 Na myśl o czym brykasz?*)
... ...
parser/LCGlexicon.ml
... ... @@ -1264,3 +1264,21 @@ let create (*query*) (paths,last,next_id) =
1264 1264 let set = Xlist.fold paths IntIntSet.empty (fun set t -> IntIntSet.add set (t.lnode,t.rnode)) in
1265 1265 let graph = IntIntSet.fold set graph (fun graph (i,j) -> LCGchart.make_unique graph i j) in
1266 1266 graph, !LCGrenderer.pro_id_counter
  1267 +
  1268 +let rec split_sons left id right = function
  1269 + [] -> List.rev (List.sort compare left), List.sort compare right
  1270 + | x :: l -> if x < id then split_sons (x :: left) id right l else split_sons left id (x :: right) l
  1271 +
  1272 +let rec dep_create_rec nodes sons id =
  1273 + let node = if id = "0" then [] else StringMap.find nodes id in
  1274 + let l = try StringMap.find sons id with Not_found -> [] in
  1275 + let left,right = split_sons [] id [] l in
  1276 + DepNode(id, Xlist.map left (dep_create_rec nodes sons), node, Xlist.map right (dep_create_rec nodes sons))
  1277 +
  1278 +let dep_create paths next_id =
  1279 + uni_weight := 0.;
  1280 + LCGrenderer.pro_id_counter := next_id;
  1281 + let sons = Xlist.fold paths StringMap.empty (fun sons t ->
  1282 + StringMap.add_inc sons t.conll_super [t.conll_id] (fun l -> t.conll_id :: l)) in
  1283 + let nodes = Xlist.fold paths StringMap.empty (fun nodes t -> StringMap.add nodes t.conll_id (create_entries t t)) in
  1284 + dep_create_rec nodes sons "0", !LCGrenderer.pro_id_counter
... ...
parser/LCGtypes.ml
... ... @@ -16,7 +16,7 @@
16 16 * You should have received a copy of the GNU General Public License
17 17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 18 *)
19   -
  19 +
20 20 open Xstd
21 21  
22 22 type direction = Forward | Backward | Both
... ... @@ -28,7 +28,7 @@ type node = {
28 28 cat: string;
29 29 weight: float;
30 30 id: int;
31   - gs: linear_term;
  31 + gs: linear_term;
32 32 agf: WalTypes.gf;
33 33 amorf: WalTypes.morf;
34 34 arole: string;
... ... @@ -39,15 +39,15 @@ type node = {
39 39 position: WalTypes.schema_field;
40 40 attrs: (string * linear_term) list;
41 41 args: linear_term}
42   -
43   -and concept =
  42 +
  43 +and concept =
44 44 {c_sense: linear_term; c_name: linear_term; (*c_visible_var: bool;*) c_quant: linear_term; c_local_quant: bool;
45 45 (*c_modalities: (string * type_term) list;*)
46 46 c_relations: linear_term; c_variable: (string * string); c_pos: int}
47   -
48   -and context =
  47 +
  48 +and context =
49 49 {cx_sense: linear_term; cx_contents: linear_term; cx_relations: linear_term; cx_variable: (string * string); cx_pos: int}
50   -
  50 +
51 51 and linear_term =
52 52 Var of linear_variable
53 53 | Tuple of linear_term list
... ... @@ -85,15 +85,15 @@ and linear_term =
85 85 | SetContextName of string * linear_term
86 86 | Ref of int
87 87 | Cut of linear_term
88   -
  88 +
89 89 type internal_grammar_symbol =
90 90 Atom of string
91 91 | AVar of string
92 92 | With of internal_grammar_symbol list
93 93 | Zero
94 94 | Top
95   -
96   -type grammar_symbol =
  95 +
  96 +type grammar_symbol =
97 97 Tensor of internal_grammar_symbol list
98 98 | Plus of grammar_symbol list
99 99 | Imp of grammar_symbol * direction * grammar_symbol
... ... @@ -108,9 +108,9 @@ type grammar_symbol =
108 108 module OrderedSymbol = struct
109 109  
110 110 type t = grammar_symbol
111   -
  111 +
112 112 let compare = compare
113   -
  113 +
114 114 end
115 115  
116 116 module SymbolMap = Xmap.Make(OrderedSymbol)
... ... @@ -118,9 +118,9 @@ module SymbolMap = Xmap.Make(OrderedSymbol)
118 118 module OrderedTerm = struct
119 119  
120 120 type t = linear_term
121   -
  121 +
122 122 let compare = compare
123   -
  123 +
124 124 end
125 125  
126 126 module TermMap = Xmap.Make(OrderedTerm)
... ... @@ -129,15 +129,17 @@ module TermSet = Xset.Make(OrderedTerm)
129 129 module OrderedSymbolTerm = struct
130 130  
131 131 type t = grammar_symbol * linear_term
132   -
  132 +
133 133 let compare = compare
134   -
  134 +
135 135 end
136 136  
137 137 module SymbolTermSet = Xset.Make(OrderedSymbolTerm)
138   -
  138 +
139 139 type graph = (SymbolTermSet.key list * int) array array
140 140  
141 141 exception Timeout of float
142 142 exception SemTooBig
143 143  
  144 +type dep_tree =
  145 + DepNode of string * dep_tree list * (grammar_symbol * linear_term) list * dep_tree list (* conll_id * left_nodes * ... * right_nodes *)
... ...
parser/exec.ml
... ... @@ -198,6 +198,10 @@ let rec parse_sentence timeout test_only_flag mode next_id = function
198 198 | StructSentence(paths,last) ->
199 199 (match mode with
200 200 CONLL ->
  201 + let dep_graph, next_id = LCGlexicon.dep_create paths next_id in
  202 + let dep_graph,references,next_reference = LCGchart.dep_lazify dep_graph in
  203 + let dep_graph,references,next_reference = LCGchart.dep_parse timeout dep_graph references next_reference time_fun in
  204 + (* FIXME: dodać dalsze przetwarzanie dep_graph *)
201 205 let xml = DepTree.conll_to_xml paths in
202 206 let graph = Array.of_list (XmlPrinter.graph_of_xml xml) in (* FIXME: do poprawy *)
203 207 Visualization.print_graph "results/" "term_conll" graph;
... ...