Commit 56fac264204becdde8146292d96ea1f2c4924d14
1 parent
1e9e955b
uruchomienie budowy i wizualizacji wyników parsowania przez parser kategorialny
Showing
9 changed files
with
296 additions
and
126 deletions
parser/LCGchart.ml
... | ... | @@ -104,7 +104,7 @@ let make_unique graph i j = |
104 | 104 | graph |
105 | 105 | |
106 | 106 | let parse timeout graph refs next_ref time_fun = |
107 | -(* print_endline "parse"; *) | |
107 | + (* print_endline "parse 1"; *) | |
108 | 108 | LCGrules.references := refs; |
109 | 109 | LCGrules.next_reference := next_ref; |
110 | 110 | let start_time = time_fun () in |
... | ... | @@ -126,48 +126,56 @@ let parse timeout graph refs next_ref time_fun = |
126 | 126 | (* let l = LCGreductions.merge_symbols l in *) |
127 | 127 | (* if Xlist.size l > 0 then Printf.printf "parse: %d-%d |l|=%d\n%!" i k (Xlist.size l); *) |
128 | 128 | make_unique (add_list graph i k l lay) i k))) in |
129 | + (* print_endline "parse 2"; *) | |
129 | 130 | graph, !LCGrules.references, !LCGrules.next_reference |
130 | 131 | |
131 | -let dep_parse_rec timeout time_fun (DepNode(id,left,l,right)) = | |
132 | -(*let last_node = last_node graph in | |
133 | -let graph = Int.fold 2 last_node graph (fun graph len -> | |
134 | - Int.fold 0 (last_node - len) graph (fun graph i -> | |
135 | - let k = i + len in | |
136 | - Int.fold 1 (len - 1) graph (fun graph d -> | |
137 | - let time = time_fun () in | |
138 | - if time -. start_time > timeout then raise (Timeout(time -. start_time)) else | |
139 | - let j = i + d in | |
140 | - let l,lay = Xlist.fold LCGrules.rules (find graph i k,layer graph i k) (fun (l,lay) rule -> | |
141 | - (rule (find graph i j) (find graph j k)) @ l, | |
142 | -(* Xlist.fold (find graph i j) l (fun l a -> | |
143 | - Xlist.fold (find graph j k) l (fun l b -> | |
144 | - (rule (a,b)) @ l)),*) | |
145 | - max lay ((max (layer graph i j) (layer graph j k)) + 1)) in | |
146 | - (* print_int i; print_string " "; print_int j; print_string " "; print_int k; print_newline (); *) | |
147 | -(* let l = LCGreductions.merge_symbols l in *) | |
148 | -(* if Xlist.size l > 0 then Printf.printf "parse: %d-%d |l|=%d\n%!" i k (Xlist.size l); *) | |
149 | - make_unique (add_list graph i k l lay) i k))) in*) | |
150 | - | |
132 | +(* let rec dep_parse2 funct = function | |
133 | + larg :: left, rarg :: right -> | |
134 | + (dep_parse2 (LCGrules.forward_application funct rarg) (larg :: left, right)) @ | |
135 | + (dep_parse2 (LCGrules.backward_application larg funct) (left, rarg :: right)) | |
136 | + | larg :: left, [] -> dep_parse2 (LCGrules.backward_application larg funct) (left, []) | |
137 | + | [], rarg :: right -> dep_parse2 (LCGrules.forward_application funct rarg) ([], right) | |
138 | + | [], [] -> funct *) | |
139 | + | |
140 | +let assign_not_parsed left right (t,sem) = | |
141 | + let sem = if left = [] then sem else failwith "assign_not_parsed: ni 1" in | |
142 | + let sem = if right = [] then sem else failwith "assign_not_parsed: ni 2" in | |
143 | + t, sem | |
144 | + | |
145 | +let rec dep_parse_rec timeout time_fun (DepNode(id,left,l,right)) = | |
146 | + let left = Xlist.map left (dep_parse_rec timeout time_fun) in | |
147 | + let right = Xlist.map right (dep_parse_rec timeout time_fun) in | |
148 | + let l = Xlist.rev_map l LCGrules.flatten_functor in | |
149 | + let l,left = Xlist.fold left (l,[]) (fun (funct,left) arg -> | |
150 | + match LCGrules.backward_application arg funct with | |
151 | + [] -> l, arg :: left | |
152 | + | l -> merge_sems l, left) in | |
153 | + let l,right = Xlist.fold right (l,[]) (fun (funct,right) arg -> | |
154 | + match LCGrules.forward_application funct arg with | |
155 | + [] -> l, arg :: right | |
156 | + | l -> merge_sems l, right) in | |
157 | + Xlist.rev_map l (assign_not_parsed left right) | |
158 | + (* merge_sems (dep_parse2 l (left,right)) *) | |
151 | 159 | |
152 | 160 | let dep_parse timeout dep_graph refs next_ref time_fun = |
153 | 161 | (* print_endline "dep_parse"; *) |
154 | 162 | LCGrules.references := refs; |
155 | 163 | LCGrules.next_reference := next_ref; |
156 | 164 | let start_time = time_fun () in |
157 | - let dep_graph = dep_parse_rec timeout time_fun dep_graph | |
165 | + let dep_graph = dep_parse_rec timeout time_fun dep_graph in | |
158 | 166 | dep_graph, !LCGrules.references, !LCGrules.next_reference |
159 | 167 | |
160 | 168 | let is_parsed graph = |
161 | 169 | let n = last_node graph in |
162 | 170 | try |
163 | - let _ = Xlist.assoc (find graph 0 n) (LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<query>"])) in | |
171 | + let _ = Xlist.assoc (find graph (*0*)1 n) (LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom (*"<query>"*)"<sentence>"])) in (* FIXME: !!! *) | |
164 | 172 | true |
165 | 173 | with Not_found -> false |
166 | 174 | |
167 | 175 | let get_parsed_term graph = |
168 | 176 | let n = last_node graph in |
169 | - let l = Xlist.fold (find graph 0 n) [] (fun l -> function | |
170 | - LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<query>"]), sem -> (LCGtypes.Cut (LCGtypes.Tuple[sem])) :: l | |
177 | + let l = Xlist.fold (find graph (*0*)1 n) [] (fun l -> function | |
178 | + LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom (*"<query>"*)"<sentence>"]), sem -> (LCGtypes.Cut (LCGtypes.Tuple[sem])) :: l (* FIXME: !!! *) | |
171 | 179 | | _ -> l) in |
172 | 180 | LCGtypes.Node{LCGrenderer.empty_node with |
173 | 181 | LCGtypes.pred="<root>"; |
... | ... |
parser/LCGlexicon.ml
... | ... | @@ -1160,6 +1160,7 @@ let rec process_interp (d:PreTypes.token_record) = function (* FIXME: rozpoznawa |
1160 | 1160 | let batrs = make_node lemma "unk" d.id d.weight 0 ["number"; "case"; "gender"; "person"] in |
1161 | 1161 | [LCGrenderer.make_frame_simple quant t d ( batrs)] |
1162 | 1162 | | _,"xxx",[] -> [] (* FIXME *) |
1163 | + | ".","interp",[] -> [LCGrenderer.make_frame_simple [] ["dot"] d (make_node "." "interp" d.id d.weight 0 [])] (* FIXME: to jest potrzebne przy CONLL *) | |
1163 | 1164 | | x,c,l -> failwith ("process_interp: " ^ (String.concat ":" (Xlist.map ([x] :: [c] :: l) (String.concat ".")))) |
1164 | 1165 | |
1165 | 1166 | let process_bracket_lemma (d:PreTypes.token_record) = function |
... | ... |
parser/LCGrules.ml
... | ... | @@ -19,7 +19,7 @@ |
19 | 19 | |
20 | 20 | open Xstd |
21 | 21 | open LCGtypes |
22 | - | |
22 | + | |
23 | 23 | let references = ref [0,Ref 0] |
24 | 24 | let next_reference = ref 0 |
25 | 25 | |
... | ... | @@ -28,7 +28,7 @@ let make_reference sem = |
28 | 28 | references := (r,sem) :: !references; |
29 | 29 | incr next_reference; |
30 | 30 | r |
31 | - | |
31 | + | |
32 | 32 | let new_variable_ref = ref 0 |
33 | 33 | |
34 | 34 | let get_new_variable () = |
... | ... | @@ -44,7 +44,7 @@ let rec unify v1 v2 = function |
44 | 44 | | t,AVar a -> failwith ("unify AVar: " ^ v1 ^ "=" ^ (LCGstringOf.internal_grammar_symbol 0 t) ^ " " ^ v2 ^ "=" ^ (LCGstringOf.internal_grammar_symbol 0 (AVar a))) |
45 | 45 | | Zero, t -> t |
46 | 46 | | t, Zero -> t |
47 | - | With ls, With lt -> | |
47 | + | With ls, With lt -> | |
48 | 48 | let ls = Xlist.map ls (function Atom s -> s | _ -> failwith "unify: With") in |
49 | 49 | let lt = Xlist.map lt (function Atom s -> s | _ -> failwith "unify: With") in |
50 | 50 | let set = StringSet.of_list lt in |
... | ... | @@ -56,12 +56,12 @@ let rec unify v1 v2 = function |
56 | 56 | | Atom s, t -> unify v1 v2 (With[Atom s],t) |
57 | 57 | | s, Atom t -> unify v1 v2 (s,With[Atom t]) |
58 | 58 | | _,_ -> failwith "unify" |
59 | - | |
60 | -(*let unify_fv afv bfv = | |
59 | + | |
60 | +(*let unify_fv afv bfv = | |
61 | 61 | StringMap.fold afv bfv (fun bfv v g -> |
62 | 62 | let g2 = try StringMap.find bfv v with Not_found -> Zero in |
63 | 63 | StringMap.add bfv v (unify v v (g,g2))) *) |
64 | - | |
64 | + | |
65 | 65 | let find_fv fv v = try StringMap.find fv v with Not_found -> failwith ("find_fv: "^ v) |
66 | 66 | let add_fv = StringMap.add |
67 | 67 | let remove_fv = StringMap.remove |
... | ... | @@ -69,7 +69,7 @@ let is_empty_fv = StringMap.is_empty |
69 | 69 | let empty_fv = StringMap.empty |
70 | 70 | let fold_fv = StringMap.fold |
71 | 71 | |
72 | -let string_of_fv fv = | |
72 | +let string_of_fv fv = | |
73 | 73 | let l = StringMap.fold fv [] (fun l v (t,e) -> (e ^ ": " ^ v ^ ":=" ^ LCGstringOf.internal_grammar_symbol 0 t) :: l) in |
74 | 74 | String.concat "," (List.sort compare l) |
75 | 75 | |
... | ... | @@ -83,16 +83,16 @@ let rec infer s = function |
83 | 83 | let make_variant = function |
84 | 84 | [] -> failwith "make_variant" |
85 | 85 | | [t] -> t |
86 | - | l -> | |
86 | + | l -> | |
87 | 87 | (* let e = get_variant_label () in *) |
88 | 88 | let l,_ = Xlist.fold l ([],1) (fun (l,i) -> function |
89 | 89 | t -> (string_of_int i,t) :: l, i+1) in |
90 | 90 | Variant("",l) |
91 | - | |
91 | + | |
92 | 92 | let make_subst e = function |
93 | 93 | Zero -> Dot |
94 | 94 | | Atom t -> Val t |
95 | - | With l -> | |
95 | + | With l -> | |
96 | 96 | (* let e = get_variant_label () in *) |
97 | 97 | let l,_ = Xlist.fold l ([],1) (fun (l,i) -> function |
98 | 98 | Atom t -> (string_of_int i,Val t) :: l, i+1 |
... | ... | @@ -100,71 +100,71 @@ let make_subst e = function |
100 | 100 | Variant(e,List.rev l) |
101 | 101 | | AVar a -> SubstVar a |
102 | 102 | | _ -> failwith "make_subst 2" |
103 | - | |
103 | + | |
104 | 104 | let internal_deduce_matching afv bfv sem = function (* maczowany term * argument funktora *) |
105 | 105 | Atom s, Atom t -> if s = t then [afv,bfv,sem] else [] |
106 | 106 | | Atom s, Top -> [afv,bfv,sem] |
107 | - | Zero, Atom t -> [afv,bfv,sem] | |
108 | - | Zero, Top -> [afv,bfv,sem] | |
109 | - | AVar a, Atom t -> | |
107 | + | Zero, Atom t -> [afv,bfv,sem] | |
108 | + | Zero, Top -> [afv,bfv,sem] | |
109 | + | AVar a, Atom t -> | |
110 | 110 | let g,e = find_fv afv a in |
111 | 111 | let l = if infer t g then [add_fv afv a (Atom t,e),bfv,sem] else [] in |
112 | 112 | (* Printf.printf "AVar,Atom: [%s] '%s' [%s] '%s' -> %d\n%!" (string_of_fv afv) (LCGstringOf.internal_grammar_symbol 1 (AVar a)) (string_of_fv bfv) (LCGstringOf.internal_grammar_symbol 1 (Atom t)) (Xlist.size l); *) |
113 | 113 | l |
114 | 114 | | AVar a, Top -> [afv,bfv,sem] |
115 | - | Zero, AVar b -> (*print_endline "idm";*)[afv,bfv,sem] | |
116 | - | Atom s, AVar b -> | |
115 | + | Zero, AVar b -> (*print_endline "idm";*)[afv,bfv,sem] | |
116 | + | Atom s, AVar b -> | |
117 | 117 | let g,e = find_fv bfv b in |
118 | 118 | if infer s g then [afv, add_fv bfv b (Atom s,e),sem] else [] |
119 | - | AVar a, AVar b -> | |
119 | + | AVar a, AVar b -> | |
120 | 120 | let ga,ea = find_fv afv a in |
121 | 121 | let gb,eb = find_fv bfv b in |
122 | 122 | (try let subst = (*print_endline "internal_deduce_matching";*)unify a b (ga,gb) in [add_fv afv a (AVar b,eb), add_fv bfv b (subst,eb),sem] with Not_found -> []) |
123 | - | Top, Top -> [afv,bfv,sem] | |
124 | - | Top, _ -> [] | |
123 | + | Top, Top -> [afv,bfv,sem] | |
124 | + | Top, _ -> [] | |
125 | 125 | | s,t -> failwith ("internal_deduce_matching pattern: " ^ LCGstringOf.internal_grammar_symbol 1 s ^ " " ^ LCGstringOf.internal_grammar_symbol 1 t) |
126 | 126 | |
127 | -let rec imp_selector s dir fv in_sem d = function | |
128 | - Maybe t -> | |
129 | - if d = Both || d = dir then | |
127 | +let rec imp_selector s dir fv in_sem d = function | |
128 | + Maybe t -> | |
129 | + if d = Both || d = dir then | |
130 | 130 | let x = get_new_variable () in |
131 | 131 | let y = get_new_variable () in |
132 | 132 | [fv,Imp(s,dir,Maybe t),t,Lambda(x,Lambda(y,App(in_sem,Insert(Apply(Var x),Var y))))] else [] |
133 | 133 | | t -> if d = Both || d = dir then [fv,s,t,in_sem] else [] |
134 | - | |
134 | + | |
135 | 135 | let rec impset_selector s dir fv in_sem rev = function |
136 | 136 | [],_ -> [] |
137 | - | (d,Maybe t) :: l,i -> | |
138 | - (if d = Both || d = dir then | |
137 | + | (d,Maybe t) :: l,i -> | |
138 | + (if d = Both || d = dir then | |
139 | 139 | let x = get_new_variable () in |
140 | 140 | let y = get_new_variable () in |
141 | 141 | let s = if rev = [] && l = [] then s else ImpSet(s,List.rev rev @ l) in |
142 | - [fv,Imp(s,dir,Maybe t),t,Lambda(x,Lambda(y,App(LambdaRot(i,in_sem),Insert(Apply(Var x),Var y))))] | |
143 | - else []) @ | |
142 | + [fv,Imp(s,dir,Maybe t),t,Lambda(x,Lambda(y,App(LambdaRot(i,in_sem),Insert(Apply(Var x),Var y))))] | |
143 | + else []) @ | |
144 | 144 | (impset_selector s dir fv in_sem ((d,Maybe t) :: rev) (l,i+1)) |
145 | - | (d,t) :: l,i -> | |
146 | - (if d = Both || d = dir then | |
145 | + | (d,t) :: l,i -> | |
146 | + (if d = Both || d = dir then | |
147 | 147 | let s = if rev = [] && l = [] then s else ImpSet(s,List.rev rev @ l) in |
148 | - [fv,s,t,LambdaRot(i,in_sem)] | |
149 | - else []) @ | |
148 | + [fv,s,t,LambdaRot(i,in_sem)] | |
149 | + else []) @ | |
150 | 150 | (impset_selector s dir fv in_sem ((d,t) :: rev) (l,i+1)) |
151 | - | |
151 | + | |
152 | 152 | let rec deduce_tensor afv bfv rev_sems = function |
153 | 153 | [] -> [afv,bfv,List.rev rev_sems] |
154 | - | (s,(t,v)) :: tensor_elems -> | |
154 | + | (s,(t,v)) :: tensor_elems -> | |
155 | 155 | let l = internal_deduce_matching afv bfv v (s,t) in |
156 | 156 | (* Printf.printf "deduce_tensor: [%s] '%s' [%s] '%s' -> %d\n%!" (string_of_fv afv) (LCGstringOf.internal_grammar_symbol_prime s) (string_of_fv bfv) (LCGstringOf.internal_grammar_symbol_prime t) (Xlist.size l); *) |
157 | 157 | Xlist.fold l [] (fun found (afv,bfv,sem) -> |
158 | 158 | (deduce_tensor afv bfv (sem :: rev_sems) tensor_elems) @ found) |
159 | - | |
159 | + | |
160 | 160 | let rec deduce_matching afv bfv in_sem = function (* maczowany term * argument funktora *) |
161 | 161 | (* | Plus l, t -> (* zakładam, że afv jest pusty *) |
162 | 162 | let x = get_new_variable () in |
163 | 163 | let found = Xlist.multiply_list (Xlist.map l (fun s -> |
164 | - Xlist.map (deduce_matching afv bfv (Var x) (s,t)) (fun (afv,bfv,sem) -> | |
164 | + Xlist.map (deduce_matching afv bfv (Var x) (s,t)) (fun (afv,bfv,sem) -> | |
165 | 165 | if not (is_empty_fv afv) then failwith "deduce_matching: is_empty_fv afv" else |
166 | 166 | bfv,sem))) in |
167 | - Xlist.fold found [] (fun found l -> | |
167 | + Xlist.fold found [] (fun found l -> | |
168 | 168 | try |
169 | 169 | let bfv = Xlist.fold (List.tl l) (fst (List.hd l)) (fun bfv (frame_bfv,_) -> unify_fv bfv frame_bfv) in |
170 | 170 | let sem = Case(in_sem,(Xlist.map l (fun (_,sem) -> x,sem))) in |
... | ... | @@ -172,13 +172,13 @@ let rec deduce_matching afv bfv in_sem = function (* maczowany term * argument f |
172 | 172 | with Not_found -> found)*) |
173 | 173 | | s, Plus l -> (* istotne jest by prawy plus byl po lewym *) |
174 | 174 | fst (Xlist.fold l ([],1) (fun (found,i) t -> Xlist.map (deduce_matching afv bfv in_sem (s,t)) (fun (afv,bfv,sem) -> afv,bfv,Inj(i,sem)) @ found, i+1)) |
175 | -(* | Star s, Star t -> | |
175 | +(* | Star s, Star t -> | |
176 | 176 | let x = get_new_variable () in |
177 | 177 | Xlist.map (deduce_matching afv bfv (Var x) (s,t)) (fun (afv,bfv,sem) -> afv,bfv,Map(in_sem,Lambda(x,sem)))*) |
178 | 178 | | Star _, _ -> [] |
179 | 179 | | _, Star _ -> [] |
180 | - | WithVar(v,g,e,s),t -> | |
181 | - Xlist.map (deduce_matching (add_fv afv v (g,e)) bfv (ProjVar(v,in_sem)) (s,t)) (fun (afv,bfv,sem) -> | |
180 | + | WithVar(v,g,e,s),t -> | |
181 | + Xlist.map (deduce_matching (add_fv afv v (g,e)) bfv (ProjVar(v,in_sem)) (s,t)) (fun (afv,bfv,sem) -> | |
182 | 182 | let g,e = find_fv afv v in |
183 | 183 | remove_fv afv v,bfv,Subst(sem,v,make_subst e g)) |
184 | 184 | | One, Maybe _ -> [afv,bfv,Empty in_sem] |
... | ... | @@ -186,28 +186,28 @@ let rec deduce_matching afv bfv in_sem = function (* maczowany term * argument f |
186 | 186 | | One, _ -> [] |
187 | 187 | | _, One -> [] |
188 | 188 | | _, Maybe _ -> [] |
189 | - | Imp(psi,d,phi), Imp(tau,dir,sigma) -> | |
189 | + | Imp(psi,d,phi), Imp(tau,dir,sigma) -> | |
190 | 190 | (List.flatten (Xlist.map (deduce_optarg in_sem phi) (fun sem -> deduce_matching afv bfv sem (psi,Imp(tau,dir,sigma))))) @ |
191 | 191 | let l = imp_selector psi dir afv in_sem d phi in |
192 | 192 | List.flatten (Xlist.map l (fun (afv,psi,phi,sem) -> |
193 | 193 | let x = get_new_variable () in |
194 | - let l = List.flatten (Xlist.map (deduce_matching bfv afv (Var x) (sigma,phi)) (fun (bfv,afv,p) -> | |
194 | + let l = List.flatten (Xlist.map (deduce_matching bfv afv (Var x) (sigma,phi)) (fun (bfv,afv,p) -> | |
195 | 195 | deduce_matching afv bfv (App(in_sem,p)) (psi,tau))) in |
196 | 196 | Xlist.map l (fun (afv,bfv,sem) -> afv,bfv,Lambda(x,sem)))) |
197 | - | ImpSet(psi,phi_list), Imp(tau,dir,sigma) -> | |
197 | + | ImpSet(psi,phi_list), Imp(tau,dir,sigma) -> | |
198 | 198 | (List.flatten (Xlist.map (deduce_optargs in_sem phi_list) (fun sem -> deduce_matching afv bfv sem (psi,Imp(tau,dir,sigma))))) @ |
199 | 199 | let l = impset_selector psi dir afv in_sem [] (phi_list,1) in |
200 | 200 | List.flatten (Xlist.map l (fun (afv,psi,phi,sem) -> |
201 | 201 | let x = get_new_variable () in |
202 | - let l = List.flatten (Xlist.map (deduce_matching bfv afv (Var x) (sigma,phi)) (fun (bfv,afv,p) -> | |
202 | + let l = List.flatten (Xlist.map (deduce_matching bfv afv (Var x) (sigma,phi)) (fun (bfv,afv,p) -> | |
203 | 203 | deduce_matching afv bfv (App(sem,p)) (psi,tau))) in |
204 | 204 | Xlist.map l (fun (afv,bfv,sem) -> afv,bfv,Lambda(x,sem)))) |
205 | - | Imp(s,d,s2), t -> | |
205 | + | Imp(s,d,s2), t -> | |
206 | 206 | List.flatten (Xlist.map (deduce_optarg in_sem s2) (fun sem -> deduce_matching afv bfv sem (s,t))) |
207 | - | ImpSet(s,l), t -> | |
207 | + | ImpSet(s,l), t -> | |
208 | 208 | List.flatten (Xlist.map (deduce_optargs in_sem l) (fun sem -> deduce_matching afv bfv sem (s,t))) |
209 | 209 | | _, Imp(s,d,s2) -> [] |
210 | - | Tensor l1, Tensor l2 -> | |
210 | + | Tensor l1, Tensor l2 -> | |
211 | 211 | (* Printf.printf "Tensor: [%s] '%s' [%s] '%s'\n%!" (string_of_fv afv) (LCGstringOf.grammar_symbol 1 (Tensor l1)) (string_of_fv bfv) (LCGstringOf.grammar_symbol 1 (Tensor l2)); *) |
212 | 212 | if Xlist.size l1 <> Xlist.size l2 then [] else ( |
213 | 213 | let dots = Xlist.map (List.tl l1) (fun _ -> Dot) in |
... | ... | @@ -216,22 +216,22 @@ let rec deduce_matching afv bfv in_sem = function (* maczowany term * argument f |
216 | 216 | let sem_substs_list = deduce_tensor afv bfv [] (List.combine l1 (List.combine l2 (in_sem :: dots)(*variables2*))) in |
217 | 217 | let l = Xlist.map sem_substs_list (fun (afv,bfv,sems) -> |
218 | 218 | let sem = List.hd sems(*LetIn(variables,in_sem,Tuple sems)*) in |
219 | - afv,bfv,sem) in | |
219 | + afv,bfv,sem) in | |
220 | 220 | l) |
221 | 221 | | Tensor _, _ -> [] |
222 | 222 | | _, Tensor _ -> [] |
223 | 223 | | s,t -> failwith ("deduce_matching: " ^ LCGstringOf.grammar_symbol 1 s ^ " " ^ LCGstringOf.grammar_symbol 1 t) |
224 | 224 | |
225 | -and deduce_optarg in_sem t = | |
225 | +and deduce_optarg in_sem t = | |
226 | 226 | let l = deduce_matching empty_fv empty_fv (Dot(*Triple(Dot,Dot,Dot)*)) (One,t) in |
227 | 227 | match l with |
228 | - [] -> [] | |
228 | + [] -> [] | |
229 | 229 | | [_,_,sem] -> [App(in_sem, sem)] |
230 | - | l -> (*print_endline ("deduce_optarg: " ^ LCGstringOf.grammar_symbol 0 t ^ " " ^ | |
230 | + | l -> (*print_endline ("deduce_optarg: " ^ LCGstringOf.grammar_symbol 0 t ^ " " ^ | |
231 | 231 | String.concat " " (Xlist.map l (fun (_,_,sem) -> LCGstringOf.linear_term 0 sem)));*) failwith "deduce_optarg" |
232 | 232 | |
233 | 233 | and deduce_optargs sem l = |
234 | - let b,sems = Xlist.fold (List.rev l) (true,[]) (fun (b,sems) (_,t) -> | |
234 | + let b,sems = Xlist.fold (List.rev l) (true,[]) (fun (b,sems) (_,t) -> | |
235 | 235 | if not b then b,[] else |
236 | 236 | let l = deduce_matching empty_fv empty_fv (Dot(*Triple(Dot,Dot,Dot)*)) (One,t) in |
237 | 237 | if l = [] then false,[] else |
... | ... | @@ -244,100 +244,107 @@ let make_forward sem l = |
244 | 244 | let l,sem,_ = Xlist.fold l ([],sem,1) (fun (l,sem,i) -> function |
245 | 245 | Forward,t -> (Forward,t) :: l,sem,i+1 |
246 | 246 | | Both,t -> (Forward,t) :: l,sem,i+1 |
247 | - | Backward,t -> | |
247 | + | Backward,t -> | |
248 | 248 | let res = deduce_matching empty_fv empty_fv Dot (One,t) in |
249 | 249 | if res = [] then raise Not_found else |
250 | 250 | let _,_,res = List.hd res in |
251 | 251 | l, App(LambdaRot(i,sem),res), i) in |
252 | 252 | List.rev l, sem |
253 | - | |
253 | + | |
254 | 254 | let rec deduce_imp dir afv in_sem = function |
255 | 255 | Tensor _ -> [] |
256 | 256 | | Star _ -> [] |
257 | 257 | | Plus _ -> [] |
258 | 258 | | WithVar(v,g,e,s) -> deduce_imp dir (add_fv afv v (g,e)) (ProjVar(v,in_sem)) s |
259 | - | Imp(s,d,t) -> | |
260 | - (List.flatten (Xlist.map (deduce_optarg in_sem t) (fun sem -> deduce_imp dir afv sem s))) @ | |
259 | + | Imp(s,d,t) -> | |
260 | + (List.flatten (Xlist.map (deduce_optarg in_sem t) (fun sem -> deduce_imp dir afv sem s))) @ | |
261 | 261 | (imp_selector s dir afv in_sem d t) |
262 | - | ImpSet(s,l) -> | |
262 | + | ImpSet(s,l) -> | |
263 | 263 | let l2,in_sem2 = if dir = Backward then l,in_sem else make_forward in_sem l in |
264 | - (List.flatten (Xlist.map (deduce_optargs in_sem l) (fun sem -> deduce_imp dir afv sem s))) @ | |
264 | + (List.flatten (Xlist.map (deduce_optargs in_sem l) (fun sem -> deduce_imp dir afv sem s))) @ | |
265 | 265 | (impset_selector s dir afv in_sem2 [] (l2,1)) |
266 | 266 | | s -> failwith ("deduce_imp: " ^ LCGstringOf.grammar_symbol 1 s) |
267 | 267 | |
268 | -let rec deduce_app dir (funct,funct_sem) args = | |
269 | -(* Printf.printf "deduce_app: '%s' [%s]\n%!" (LCGstringOf.grammar_symbol 1 funct) | |
268 | +let rec deduce_app dir (funct,funct_sem) args = | |
269 | +(* Printf.printf "deduce_app: '%s' [%s]\n%!" (LCGstringOf.grammar_symbol 1 funct) | |
270 | 270 | (String.concat "; " (Xlist.map args (fun (arg,_) -> "'" ^ LCGstringOf.grammar_symbol 1 arg ^ "'"))); *) |
271 | - List.flatten (Xlist.map (deduce_imp dir empty_fv funct_sem funct) (fun (fv,psi,phi,funct_sem) -> | |
272 | - let l = Xlist.fold args [] (fun l (arg,arg_sem) -> | |
271 | + List.flatten (Xlist.map (deduce_imp dir empty_fv funct_sem funct) (fun (fv,psi,phi,funct_sem) -> | |
272 | + let l = Xlist.fold args [] (fun l (arg,arg_sem) -> | |
273 | 273 | let res = deduce_matching empty_fv fv arg_sem (arg,phi) in |
274 | 274 | (* Printf.printf "deduce_matching: '%s' '%s' -> %d\n%!" (LCGstringOf.grammar_symbol 1 arg) (LCGstringOf.grammar_symbol 1 phi) (Xlist.size res); *) |
275 | 275 | res @ l) in |
276 | - let map = Xlist.fold l StringMap.empty (fun map (afv,bfv,sem) -> | |
276 | + let map = Xlist.fold l StringMap.empty (fun map (afv,bfv,sem) -> | |
277 | 277 | if not (is_empty_fv afv) then failwith "deduce_app" else |
278 | 278 | StringMap.add_inc map (string_of_fv bfv) (bfv,[sem]) (fun (fv,sems) -> fv, sem :: sems)) in |
279 | - StringMap.fold map [] (fun l _ (bfv,sems) -> | |
279 | + StringMap.fold map [] (fun l _ (bfv,sems) -> | |
280 | 280 | let reference = make_reference (App(funct_sem,make_variant sems)) in |
281 | 281 | (fold_fv bfv (psi,Ref reference) (fun (t,sem) v (g,e) -> WithVar(v,g,e,t), VariantVar(v,sem))) :: l))) |
282 | 282 | |
283 | -(*let rec forward_application = function | |
283 | +(*let rec forward_application = function | |
284 | 284 | (Bracket(lf,false,funct),sem), (Bracket(false,rf,arg),arg_sem) -> Xlist.map (deduce_app Forward (funct,sem) (arg,arg_sem)) (fun (t,sem) -> Bracket(lf,rf,t), LCGreductions.linear_term_beta_reduction2 sem) |
285 | 285 | | (Bracket(lf,true,funct),sem), (Bracket(true,true,arg),arg_sem) -> Xlist.map (deduce_app Forward (funct,sem) (arg,arg_sem)) (fun (t,sem) -> Bracket(lf,true,t), LCGreductions.linear_term_beta_reduction2 sem) |
286 | 286 | | (BracketSet(Forward),_), (Bracket(false,rf,arg),arg_sem) -> [Bracket(true,rf,arg),arg_sem] |
287 | 287 | | ((x,_),(y,_)) -> (*Printf.printf "forward_application: '%s' '%s'\n%!" (LCGstringOf.grammar_symbol_prime x) (LCGstringOf.grammar_symbol_prime y);*) [] |
288 | - | |
289 | -let rec backward_application = function | |
288 | + | |
289 | +let rec backward_application = function | |
290 | 290 | (Bracket(lf,false,arg),arg_sem), (Bracket(false,rf,funct),sem) -> Xlist.map (deduce_app Backward (funct,sem) (arg,arg_sem)) (fun (t,sem) -> Bracket(lf,rf,t), LCGreductions.linear_term_beta_reduction2 sem) |
291 | 291 | | (Bracket(true,true,arg),arg_sem), (Bracket(true,rf,funct),sem) -> Xlist.map (deduce_app Backward (funct,sem) (arg,arg_sem)) (fun (t,sem) -> Bracket(true,rf,t), LCGreductions.linear_term_beta_reduction2 sem) |
292 | 292 | | (Bracket(lf,false,arg),arg_sem), (BracketSet(Backward),_) -> [Bracket(lf,true,arg),arg_sem] |
293 | 293 | | _ -> []*) |
294 | - | |
294 | + | |
295 | 295 | let forward_application functs args = |
296 | 296 | Xlist.fold functs [] (fun l -> function |
297 | - Bracket(lf,false,funct),sem -> | |
298 | - let argst,argsf = Xlist.fold args ([],[]) (fun (argst,argsf) -> function | |
297 | + Bracket(lf,false,funct),sem -> | |
298 | + let argst,argsf = Xlist.fold args ([],[]) (fun (argst,argsf) -> function | |
299 | 299 | Bracket(false,true,arg),arg_sem -> (arg,arg_sem) :: argst, argsf |
300 | 300 | | Bracket(false,false,arg),arg_sem -> argst, (arg,arg_sem) :: argsf |
301 | 301 | | _ -> argst,argsf) in |
302 | - let l = Xlist.fold (deduce_app Forward (funct,sem) argst) l (fun l (t,sem) -> | |
302 | + let l = Xlist.fold (deduce_app Forward (funct,sem) argst) l (fun l (t,sem) -> | |
303 | 303 | (Bracket(lf,true,t), (*LCGreductions.linear_term_beta_reduction2*) sem) :: l) in |
304 | - Xlist.fold (deduce_app Forward (funct,sem) argsf) l (fun l (t,sem) -> | |
304 | + Xlist.fold (deduce_app Forward (funct,sem) argsf) l (fun l (t,sem) -> | |
305 | 305 | (Bracket(lf,false,t), (*LCGreductions.linear_term_beta_reduction2*) sem) :: l) |
306 | - | Bracket(lf,true,funct),sem -> | |
306 | + | Bracket(lf,true,funct),sem -> | |
307 | 307 | let args = Xlist.fold args [] (fun args -> function Bracket(true,true,arg),arg_sem -> (arg,arg_sem) :: args | _ -> args) in |
308 | - Xlist.fold (deduce_app Forward (funct,sem) args) l (fun l (t,sem) -> | |
308 | + Xlist.fold (deduce_app Forward (funct,sem) args) l (fun l (t,sem) -> | |
309 | 309 | (Bracket(lf,true,t), (*LCGreductions.linear_term_beta_reduction2*) sem) :: l) |
310 | 310 | | BracketSet(Forward),_ -> Xlist.fold args l (fun l -> function Bracket(false,rf,arg),arg_sem -> (Bracket(true,rf,arg),arg_sem) :: l | _ -> l) |
311 | 311 | | _ -> l) |
312 | - | |
312 | + | |
313 | 313 | let backward_application args functs = |
314 | -(* Printf.printf "backward_application: [%s] [%s]\n%!" | |
314 | +(* Printf.printf "backward_application: [%s] [%s]\n%!" | |
315 | 315 | (String.concat "; " (Xlist.map args (fun (arg,_) -> "'" ^ LCGstringOf.grammar_symbol 1 arg ^ "'"))) |
316 | 316 | (String.concat "; " (Xlist.map functs (fun (arg,_) -> "'" ^ LCGstringOf.grammar_symbol 1 arg ^ "'"))); *) |
317 | 317 | Xlist.fold functs [] (fun l -> function |
318 | - Bracket(false,rf,funct),sem -> | |
319 | - let argst,argsf = Xlist.fold args ([],[]) (fun (argst,argsf) -> function | |
318 | + Bracket(false,rf,funct),sem -> | |
319 | + let argst,argsf = Xlist.fold args ([],[]) (fun (argst,argsf) -> function | |
320 | 320 | Bracket(true,false,arg),arg_sem -> (arg,arg_sem) :: argst, argsf |
321 | 321 | | Bracket(false,false,arg),arg_sem -> argst, (arg,arg_sem) :: argsf |
322 | 322 | | _ -> argst,argsf) in |
323 | - let l = Xlist.fold (deduce_app Backward (funct,sem) argst) l (fun l (t,sem) -> | |
323 | + let l = Xlist.fold (deduce_app Backward (funct,sem) argst) l (fun l (t,sem) -> | |
324 | 324 | (Bracket(true,rf,t), (*LCGreductions.linear_term_beta_reduction2*) sem) :: l) in |
325 | - Xlist.fold (deduce_app Backward (funct,sem) argsf) l (fun l (t,sem) -> | |
325 | + Xlist.fold (deduce_app Backward (funct,sem) argsf) l (fun l (t,sem) -> | |
326 | 326 | (Bracket(false,rf,t), (*LCGreductions.linear_term_beta_reduction2*) sem) :: l) |
327 | - | Bracket(true,rf,funct),sem -> | |
327 | + | Bracket(true,rf,funct),sem -> | |
328 | 328 | let args = Xlist.fold args [] (fun args -> function Bracket(true,true,arg),arg_sem -> (arg,arg_sem) :: args | _ -> args) in |
329 | - Xlist.fold (deduce_app Backward (funct,sem) args) l (fun l (t,sem) -> | |
329 | + Xlist.fold (deduce_app Backward (funct,sem) args) l (fun l (t,sem) -> | |
330 | 330 | (Bracket(true,rf,t), (*LCGreductions.linear_term_beta_reduction2*) sem) :: l) |
331 | 331 | | BracketSet(Backward),_ -> (*print_endline "tt";*) Xlist.fold args l (fun l -> function Bracket(lf,false,arg),arg_sem -> (Bracket(lf,true,arg),arg_sem) :: l | _ -> l) |
332 | 332 | | _ -> l) |
333 | - | |
334 | - | |
335 | - | |
333 | + | |
334 | + | |
335 | + | |
336 | 336 | (* FIXME: błąd przy redukcji "Jan chce iść spać" *) |
337 | - | |
337 | + | |
338 | 338 | let rules = [ |
339 | - backward_application; | |
339 | + backward_application; | |
340 | 340 | forward_application; |
341 | 341 | ] |
342 | 342 | |
343 | - | |
344 | 343 | \ No newline at end of file |
344 | +let rec flatten_functor2 l seml = function | |
345 | + Imp(s,d,t),Lambda(v,sem) -> flatten_functor2 ((d,t) :: l) (v :: seml) (s,sem) | |
346 | + | ImpSet(s,l2),LambdaSet(vl,sem) -> flatten_functor2 (l2 @ l) (vl @ seml) (s,sem) | |
347 | + | s,sem -> ImpSet(s,l),LambdaSet(seml,sem) | |
348 | + | |
349 | +let rec flatten_functor = function | |
350 | + WithVar(v,g,e,s), VariantVar(x,t) -> let s,t = flatten_functor (s,t) in WithVar(v,g,e,s), VariantVar(x,t) | |
351 | + | t -> flatten_functor2 [] [] t | |
... | ... |
parser/exec.ml
... | ... | @@ -61,6 +61,22 @@ let empty_eniam_parse_result = { |
61 | 61 | paths=[| |]; |
62 | 62 | } |
63 | 63 | |
64 | +let empty_conll_parse_result = { | |
65 | + status=Idle; | |
66 | + msg=""; | |
67 | + lex_time=0.; | |
68 | + parse_time=0.; | |
69 | + reduction_time=0.; | |
70 | + sem_time=0.; | |
71 | + (* paths_size=0; | |
72 | + graph_size=0; | |
73 | + term_size=0; *) | |
74 | + dep_graph=DepNode("-100",[],[],[]); | |
75 | + dep_graph_parsed=[]; | |
76 | + (* term=[| |]; | |
77 | + paths=[| |]; *) | |
78 | + } | |
79 | + | |
64 | 80 | let empty_sum_result = { |
65 | 81 | no_queries=0; |
66 | 82 | no_pre_error=0; |
... | ... | @@ -144,10 +160,13 @@ let eniam_parse_sentence timeout test_only_flag paths last next_id = |
144 | 160 | try |
145 | 161 | let graph, next_id = LCGlexicon.create (paths,last,next_id) in |
146 | 162 | let graph,references,next_reference = LCGchart.lazify graph in |
163 | + let result = if test_only_flag then result else {result with graph=graph} in | |
147 | 164 | let time3 = time_fun () in |
148 | 165 | let result = {result with lex_time=time3 -. time2} in |
149 | 166 | try |
167 | + (* print_endline "eniam_parse_sentence 1"; *) | |
150 | 168 | let graph,references,next_reference = LCGchart.parse timeout graph references next_reference time_fun in |
169 | + (* print_endline "eniam_parse_sentence 2"; *) | |
151 | 170 | let time4 = time_fun () in |
152 | 171 | let result = if test_only_flag then result else {result with graph=graph} in |
153 | 172 | let result = {result with parse_time=time4 -. time3; graph_size=LCGchart.get_no_entries graph} in |
... | ... | @@ -160,10 +179,14 @@ let eniam_parse_sentence timeout test_only_flag paths last next_id = |
160 | 179 | let result = {result with reduction_time=time5 -. time4; term_size=Array.length references} in |
161 | 180 | if LCGreductions.is_reduced_references references then |
162 | 181 | try |
182 | + print_endline "eniam_parse_sentence 3"; | |
163 | 183 | LCGreductions.assign_labels references; (* uwaga: niejawna zmiana imperatywna w result *) |
184 | + print_endline "eniam_parse_sentence 4"; | |
164 | 185 | LCGreductions.remove_cuts references; (* uwaga: niejawna zmiana imperatywna w result *) |
165 | 186 | (* if Array.length references < 10000 then print_xml_graph "results/trees/" id references; *) |
166 | - let paths_array = extend_paths_array paths_array next_id references in | |
187 | + print_endline "eniam_parse_sentence 5"; | |
188 | + let paths_array = extend_paths_array paths_array !LCGrenderer.pro_id_counter references in | |
189 | + print_endline "eniam_parse_sentence 6"; | |
167 | 190 | let result = if test_only_flag then result else {result with paths=paths_array} in |
168 | 191 | let time6 = time_fun () in |
169 | 192 | {result with status=Parsed; sem_time=time6 -. time5}, next_id |
... | ... | @@ -191,6 +214,37 @@ let eniam_parse_sentence timeout test_only_flag paths last next_id = |
191 | 214 | let time3 = time_fun () in |
192 | 215 | {result with status=LexiconError; msg=Printexc.to_string e; lex_time=time3 -. time2}, next_id |
193 | 216 | |
217 | +let conll_parse_sentence timeout test_only_flag paths last next_id = | |
218 | + let result = empty_conll_parse_result in | |
219 | + let time2 = time_fun () in | |
220 | + let paths_array = make_paths_array (paths,last,next_id) in | |
221 | + try | |
222 | + let dep_graph, next_id = LCGlexicon.dep_create paths next_id in | |
223 | + let dep_graph,references,next_reference = LCGchart.dep_lazify dep_graph in | |
224 | + let result = if test_only_flag then result else {result with dep_graph=dep_graph} in | |
225 | + let time3 = time_fun () in | |
226 | + let result = {result with lex_time=time3 -. time2} in | |
227 | + try | |
228 | + (* print_endline "conll_parse_sentence 1"; *) | |
229 | + let dep_graph,references,next_reference = LCGchart.dep_parse timeout dep_graph references next_reference time_fun in | |
230 | + (* print_endline "conll_parse_sentence 2"; *) | |
231 | + let time4 = time_fun () in | |
232 | + let result = if test_only_flag then result else {result with dep_graph_parsed=dep_graph} in | |
233 | + let result = {result with parse_time=time4 -. time3} in | |
234 | + (* FIXME: dodać dalsze przetwarzanie dep_graph *) | |
235 | + {result with status=NotParsed}, next_id | |
236 | + with | |
237 | + Timeout t -> | |
238 | + let time4 = time_fun () in | |
239 | + {result with status=ParseTimeout; msg=Printf.sprintf "%f" t; parse_time=time4 -. time3}, next_id | |
240 | + | e -> | |
241 | + let time4 = time_fun () in | |
242 | + {result with status=ParseError; msg=Printexc.to_string e; parse_time=time4 -. time3}, next_id | |
243 | + with e -> | |
244 | + let time3 = time_fun () in | |
245 | + {result with status=LexiconError; msg=Printexc.to_string e; lex_time=time3 -. time2}, next_id | |
246 | + | |
247 | + | |
194 | 248 | let mate_in, mate_out = Unix.open_process "java -jar ../dependencyParser/basic/mate-tools/dist/anna-3.5.jar -model ../dependencyParser/basic/mate-tools/examples/160622_Polish_MateParser.mdl -test" |
195 | 249 | |
196 | 250 | let rec parse_sentence timeout test_only_flag mode next_id = function |
... | ... | @@ -198,19 +252,18 @@ let rec parse_sentence timeout test_only_flag mode next_id = function |
198 | 252 | | StructSentence(paths,last) -> |
199 | 253 | (match mode with |
200 | 254 | CONLL -> |
201 | - let dep_graph, next_id = LCGlexicon.dep_create paths next_id in | |
255 | + let result, next_id = conll_parse_sentence timeout test_only_flag paths last next_id in | |
256 | + CONLLSentence result, next_id | |
257 | + (* let dep_graph, next_id = LCGlexicon.dep_create paths next_id in | |
202 | 258 | let dep_graph,references,next_reference = LCGchart.dep_lazify dep_graph in |
203 | - let dep_graph,references,next_reference = LCGchart.dep_parse timeout dep_graph references next_reference time_fun in | |
204 | - (* FIXME: dodać dalsze przetwarzanie dep_graph *) | |
205 | - let xml = DepTree.conll_to_xml paths in | |
259 | + let dep_graph,references,next_reference = LCGchart.dep_parse timeout dep_graph references next_reference time_fun in *) | |
260 | + (* let xml = DepTree.conll_to_xml paths in | |
206 | 261 | let graph = XmlPrinter.graph_of_xml xml in (* FIXME: do poprawy *) |
207 | 262 | Visualization.print_graph "results/" "term_conll" graph; |
208 | 263 | let result = {empty_eniam_parse_result with status=Parsed; term=graph} in |
209 | - ENIAMSentence result, next_id | |
210 | - (* StructSentence(paths,last), next_id *) | |
264 | + ENIAMSentence result, next_id *) | |
211 | 265 | | ENIAM -> |
212 | 266 | let result, next_id = eniam_parse_sentence timeout test_only_flag paths last next_id in |
213 | - Visualization.print_graph "results/" "term_eniam" result.term; | |
214 | 267 | ENIAMSentence result, next_id |
215 | 268 | | Mate -> |
216 | 269 | (*print_endline "parse_sentence 1"; |
... | ... |
parser/execTypes.ml
... | ... | @@ -34,6 +34,22 @@ type eniam_parse_result = { |
34 | 34 | paths: PreTypes.token_record array; |
35 | 35 | } |
36 | 36 | |
37 | +type conll_parse_result = { | |
38 | + status: status; | |
39 | + msg: string; | |
40 | + lex_time: float; | |
41 | + parse_time: float; | |
42 | + reduction_time: float; | |
43 | + sem_time: float; | |
44 | + (* paths_size: int; | |
45 | + graph_size: int; | |
46 | + term_size: int; *) | |
47 | + dep_graph: LCGtypes.dep_tree; | |
48 | + dep_graph_parsed: (LCGtypes.SymbolMap.key * LCGtypes.linear_term) list; | |
49 | + (* term: LCGtypes.linear_term array; | |
50 | + paths: PreTypes.token_record array; *) | |
51 | + } | |
52 | + | |
37 | 53 | type mode = |
38 | 54 | Raw | Struct | CONLL | ENIAM | Mate |
39 | 55 | |
... | ... | @@ -46,6 +62,7 @@ type sentence = |
46 | 62 | (* | Skladnica of skladnica_tree *) |
47 | 63 | | AltSentence of (mode * sentence) list (* string = etykieta np raw, nkjp, krzaki *) |
48 | 64 | | ENIAMSentence of eniam_parse_result |
65 | + | CONLLSentence of conll_parse_result | |
49 | 66 | |
50 | 67 | and paragraph_record = {pid: string; pbeg: int; plen: int; psentence: sentence} (* beg i len liczone po znakach unicode ( * 100 ???) *) |
51 | 68 | |
... | ... |
parser/pipe.ml
... | ... | @@ -162,9 +162,10 @@ let process_conll_corpus filename = |
162 | 162 | (* Xlist.iter corpus (fun sentence -> print_endline (CONLL.string_of_sentence sentence)); *) |
163 | 163 | let ic,oc = Unix.open_connection (get_sock_addr Paths.pre_host Paths.pre_port) in |
164 | 164 | Xlist.iter corpus (fun query -> |
165 | - let result = Exec.process_query ic oc 3000. false "x" query 10 in | |
165 | + let result = Exec.process_query ic oc 30. false "x" query 10 in | |
166 | 166 | Visualization.print_html_text "results/" "input_text" result.input_text; |
167 | 167 | Visualization.print_html_text "results/" "pre_text" result.pre_text; |
168 | + Visualization.print_html_text "results/" "parsed_text" result.parsed_text; | |
168 | 169 | (* printf "input_text:\n%s\n" (Visualization.string_of_text result.input_text); |
169 | 170 | printf "pre_text:\n%s\n" (Visualization.string_of_text result.pre_text); *) |
170 | 171 | Exec.print_result stdout result; |
... | ... | @@ -176,7 +177,8 @@ let process_conll_corpus filename = |
176 | 177 | () |
177 | 178 | |
178 | 179 | let _ = |
179 | - process_conll_corpus "../../NLP resources/Skladnica-zaleznosciowa-mod_130121.conll"; | |
180 | + (* process_conll_corpus "../../NLP resources/Skladnica-zaleznosciowa-mod_130121.conll"; *) | |
181 | + process_conll_corpus "../testy/skladnica-proste-zdania.conll"; | |
180 | 182 | () |
181 | 183 | |
182 | 184 | (* TO DO: |
... | ... | @@ -188,6 +190,10 @@ let _ = |
188 | 190 | - dopasowanie do siebie tokenów w różnych wersjach tokenizacji |
189 | 191 | - dopasowanie do siebie akapitów i zdań |
190 | 192 | - uporządkowanie etykiet: labels, last itp. |
193 | + 2016.10.15 | |
194 | + - przypisywanie zdaniom wierzchołków i last | |
195 | + - parsowanie do <query> czy <sentence> i zakresy | |
196 | + - nadmiar węzłów pro po parsowaniu | |
191 | 197 | *) |
192 | 198 | |
193 | 199 | |
... | ... |
parser/visualization.ml
... | ... | @@ -70,6 +70,20 @@ let string_of_token_record1 t = |
70 | 70 | let string_of_paths1 paths = |
71 | 71 | String.concat "\n " (Xlist.map paths string_of_token_record1) |
72 | 72 | |
73 | +let string_of_status = function | |
74 | + ExecTypes.Idle -> "idle" | |
75 | + | ExecTypes.PreprocessingError -> "error_pre" | |
76 | + | ExecTypes.LexiconError -> "error_lex" | |
77 | + | ExecTypes.ParseError -> "error_parse" | |
78 | + | ExecTypes.ParseTimeout -> "timeout" | |
79 | + | ExecTypes.NotParsed -> "not_parsed" | |
80 | + | ExecTypes.ReductionError -> "error_reduction" | |
81 | + | ExecTypes.TooManyNodes -> "to_many_nodes" | |
82 | + | ExecTypes.NotReduced -> "not_reduced" | |
83 | + | ExecTypes.SemError -> "error_sem" | |
84 | + | ExecTypes.NotTranslated -> "not_translated" | |
85 | + | ExecTypes.Parsed -> "parsed" | |
86 | + | |
73 | 87 | let rec xml_of_graph = function |
74 | 88 | Node t -> Xml.Element("node",["pred",t.pred;"cat",t.cat;"weight",string_of_float t.weight;"id",string_of_int t.id],[ |
75 | 89 | Xml.Element("gs",[],[xml_of_graph t.gs]); |
... | ... | @@ -682,19 +696,83 @@ let html_of_struct_sentence paths last = |
682 | 696 | t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (escape_html (string_of_token t.PreTypes.token)) |
683 | 697 | t.PreTypes.id t.PreTypes.lnode t.PreTypes.rnode t.PreTypes.conll_id t.PreTypes.conll_super t.PreTypes.conll_label |
684 | 698 | (String.concat "; " t.PreTypes.attrs))) ^ |
685 | - sprintf "<tr><td></td><td>%d</td><td></td><td></td><td></td><td></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>" last ^ | |
699 | + sprintf "<tr><td></td><td></td><td></td><td></td><td></td><td></td><td>%d</td><td></td><td></td><td></td><td></td><td></td></tr>" last ^ | |
686 | 700 | "</table>" |
687 | 701 | |
702 | +let create_latex_graph path name graph = | |
703 | + LatexMain.latex_file_out path name "a1" false (fun file -> | |
704 | + Printf.fprintf file "%s\n" (LCGlatexOf.graph graph)); | |
705 | + LatexMain.latex_compile_and_clean path name | |
706 | + | |
707 | +let html_of_eniam_sentence (result : eniam_parse_result) = | |
708 | + match result.status with | |
709 | + Idle -> "idle\n" | |
710 | + (* | PreprocessingError -> "error_pre: %s\n" result.msg *) | |
711 | + | LexiconError -> sprintf "error_lex: %s\n" result.msg | |
712 | + | ParseError -> | |
713 | + create_latex_graph "results/" "chart" result.graph; | |
714 | + sprintf "error_parse: %s\n" result.msg | |
715 | + | ParseTimeout -> | |
716 | + create_latex_graph "results/" "chart" result.graph; | |
717 | + sprintf "timeout: %s\n" result.msg | |
718 | + | NotParsed -> | |
719 | + create_latex_graph "results/" "chart" result.graph; | |
720 | + sprintf "not_parsed: paths_size=%d graph_size=%d\n" result.paths_size result.graph_size | |
721 | + | ReductionError -> sprintf "error_reduction: %s\n" result.msg | |
722 | + | TooManyNodes -> sprintf "to_many_nodes: paths_size=%d graph_size=%d\n" result.paths_size result.graph_size | |
723 | + | NotReduced -> sprintf "not_reduced: paths_size=%d graph_size=%d\n" result.paths_size result.graph_size | |
724 | + | SemError -> sprintf "error_sem: %s term_size=%d\n" result.msg result.term_size | |
725 | + (* | NotTranslated -> "not_translated: \n" *) | |
726 | + | Parsed -> | |
727 | + print_tree "results/" "tree" result.paths result.term; | |
728 | + print_graph "results/" "term" result.term; | |
729 | + LCGlatexOf.print_references "references" result.term; | |
730 | + sprintf "parsed: paths_size=%d graph_size=%d term_size=%d\n" result.paths_size result.graph_size result.term_size | |
731 | + | _ -> failwith "html_of_eniam_sentence" | |
732 | + | |
733 | +let html_of_conll_sentence (result : conll_parse_result) = | |
734 | + match result.status with | |
735 | + Idle -> "idle\n" | |
736 | + (* | PreprocessingError -> "error_pre: %s\n" result.msg *) | |
737 | + | LexiconError -> sprintf "error_lex: %s\n" result.msg | |
738 | + | ParseError -> | |
739 | + (* create_latex_graph "results/" "chart" result.graph; *) | |
740 | + sprintf "error_parse: %s\n" result.msg | |
741 | + | ParseTimeout -> | |
742 | + (* create_latex_graph "results/" "chart" result.graph; *) | |
743 | + sprintf "timeout: %s\n" result.msg | |
744 | + | NotParsed -> | |
745 | + (* create_latex_graph "results/" "chart" result.graph; *) | |
746 | + sprintf "not_parsed\n" | |
747 | + | ReductionError -> sprintf "error_reduction: %s\n" result.msg | |
748 | + | TooManyNodes -> sprintf "to_many_nodes\n" | |
749 | + | NotReduced -> sprintf "not_reduced\n" | |
750 | + | SemError -> sprintf "error_sem: %s\n" result.msg | |
751 | + (* | NotTranslated -> "not_translated: \n" *) | |
752 | + | Parsed -> | |
753 | + (* print_tree "results/" "tree" result.paths result.term; | |
754 | + print_graph "results/" "term" result.term; | |
755 | + LCGlatexOf.print_references "references" result.term; *) | |
756 | + sprintf "parsed\n" | |
757 | + | _ -> failwith "html_of_eniam_sentence" | |
758 | + | |
759 | + | |
688 | 760 | let rec html_of_sentence = function |
689 | 761 | RawSentence s -> s |
690 | 762 | | StructSentence(paths,last) -> html_of_struct_sentence paths last |
763 | + | ENIAMSentence result -> html_of_eniam_sentence result | |
764 | + (* print_endline "html_of_sentence 1"; | |
765 | + print_tree "results/" "tree_eniam" result.paths result.term; | |
766 | + print_endline "html_of_sentence 2"; | |
767 | + string_of_status result.status *) | |
768 | + | CONLLSentence result -> html_of_conll_sentence result | |
691 | 769 | | ORSentence _ -> failwith "html_of_sentence: ni" |
692 | 770 | | AltSentence l -> |
693 | 771 | "<table border=1>" ^ |
694 | 772 | String.concat "\n" (Xlist.map l (fun (mode,sentence) -> |
695 | 773 | sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_sentence sentence))) ^ |
696 | 774 | "</table>" |
697 | - | _ -> failwith "html_of_sentence: ni" | |
775 | + (* | _ -> failwith "html_of_sentence: ni" *) | |
698 | 776 | |
699 | 777 | let rec html_of_paragraph = function |
700 | 778 | RawParagraph s -> s |
... | ... |
pre/preProcessing.ml
... | ... | @@ -235,7 +235,7 @@ let select_tokens (paths,last) = |
235 | 235 | | Interp orth -> t :: paths |
236 | 236 | (* | Dig(value,cat) -> t :: paths *) |
237 | 237 | | Other2 orth -> t :: paths |
238 | - | Lemma(lemma,cat,interp) -> t :: paths | |
238 | + | Lemma(lemma,cat,interp) -> if cat = "brev" then paths else t :: paths | |
239 | 239 | | Proper _ -> failwith "select_tokens" |
240 | 240 | (* | Compound _ -> t :: paths *) |
241 | 241 | | _ -> paths)), last |
... | ... |
pre/preSentences.ml
... | ... | @@ -101,7 +101,7 @@ let extract_sentences par (paths,last) = |
101 | 101 | let paths,last = get_sentence t in |
102 | 102 | {pid=string_of_int t.id; pbeg=t.beg; plen=t.len; |
103 | 103 | psentence=AltSentence[Raw,RawSentence (get_raw_sentence par t.beg t.len); |
104 | - ENIAM,StructSentence(paths,last)]} :: sentences else sentences) :: pars) in | |
104 | + ENIAM,StructSentence(paths,(*last*)10)]} :: sentences else sentences) :: pars) in (* FIXME: (*last*)10 !!!! *) | |
105 | 105 | match pars with |
106 | 106 | [sentences] -> sentences |
107 | 107 | | _ -> failwith "extract_sentences" |
... | ... |