Commit f9158b5a130c5a3597ae60d004fcc33614e12575
1 parent
c0772f92
wstępne uruchamianie MateParser
Showing
2 changed files
with
22 additions
and
10 deletions
corpora/CONLL.ml
... | ... | @@ -18,6 +18,15 @@ let string_of_token mode token = match mode with |
18 | 18 | String.concat "\t" [token.conll_id; |
19 | 19 | token.orth; lemma; cat; cat; interp; "_"; "_"; |
20 | 20 | string_of_int token.beg; string_of_int token.len] |
21 | + | Mate -> let lemma,cat,interp = match token.token with | |
22 | + | Lemma(a,b,c) -> a,b,if c = [[]] | |
23 | + then "_" | |
24 | + else String.concat "][" @@ Xlist.map c (fun x -> | |
25 | + String.concat "|" @@ Xlist.map x ( fun y -> | |
26 | + String.concat "." y)) | |
27 | + | _ -> failwith ("string_of_token: not Lemma") in | |
28 | + String.concat "\t" [token.conll_id; | |
29 | + token.orth; lemma; cat; cat; interp; "_"; "_"] | |
21 | 30 | | _ -> failwith "string_of_token: ni" |
22 | 31 | |
23 | 32 | let rec string_of_sentence mode = function |
... | ... |
parser/exec.ml
... | ... | @@ -221,8 +221,7 @@ let eniam_parse_sentence timeout test_only_flag paths last next_id = |
221 | 221 | let time3 = time_fun () in |
222 | 222 | {result with status=LexiconError; msg=Printexc.to_string e; lex_time=time3 -. time2}, next_id |
223 | 223 | |
224 | - | |
225 | - | |
224 | +let mate_in, mate_out = Unix.open_process "java -jar ../dependencyParser/basic/mate-tools/dist/anna-3.5.jar -model ../dependencyParser/basic/mate-tools/examples/160622_Polish_MateParser.mdl -test" | |
226 | 225 | |
227 | 226 | let rec parse_sentence timeout test_only_flag mode next_id = function |
228 | 227 | RawSentence s -> RawSentence s, next_id |
... | ... | @@ -233,10 +232,14 @@ let rec parse_sentence timeout test_only_flag mode next_id = function |
233 | 232 | let result, next_id = eniam_parse_sentence timeout test_only_flag paths last next_id in |
234 | 233 | ENIAMSentence result, next_id |
235 | 234 | | Mate -> |
236 | - (*let conll = CONLL.string_of paths in | |
237 | - Printf.fprintf mate_out "%s\n%!" conll; | |
238 | - let conll = CONLL.read_conll mate_in in | |
239 | - konwersja na strukturę danych*) | |
235 | + print_endline "parse_sentence 1"; | |
236 | + let conll = CONLL.string_of_sentence PreTypes.Mate (PreTypes.StructSentence(paths,last)) in | |
237 | + print_endline "parse_sentence 2"; | |
238 | + Printf.fprintf mate_out "%s\n\n%!" conll; | |
239 | + print_endline "parse_sentence 3"; | |
240 | + let conll = CONLL.load_sentence mate_in in | |
241 | + print_endline "parse_sentence 4"; | |
242 | + (*konwersja na strukturę danych*) | |
240 | 243 | (* FIXME: tu trzeba wstawić konwersję na tekstowy format CONLL, |
241 | 244 | uruchomienie MateParser i |
242 | 245 | powtórną konwersję wyniku. *) |
... | ... | @@ -278,15 +281,15 @@ let rec extract_query_text = function |
278 | 281 | | AltText l -> (try extract_query_text (Xlist.assoc l Raw) with Not_found -> failwith "extract_query_text") |
279 | 282 | | _ -> failwith "extract_query_text" |
280 | 283 | |
281 | -let process_query ic oc timeout test_only_flag id full_query max_n = | |
284 | +let process_query pre_in pre_out timeout test_only_flag id full_query max_n = | |
282 | 285 | print_endline "process_query 0"; |
283 | 286 | let result = {empty_result with input_text=translate_text full_query} in |
284 | 287 | let time1 = time_fun () in |
285 | 288 | print_endline "process_query 1"; |
286 | - Marshal.to_channel oc full_query []; | |
287 | - flush oc; | |
289 | + Marshal.to_channel pre_out full_query []; | |
290 | + flush pre_out; | |
288 | 291 | print_endline "process_query 2"; |
289 | - let pre_text,msg,pre_time1 = (Marshal.from_channel ic : PreTypes.text * string * float) in | |
292 | + let pre_text,msg,pre_time1 = (Marshal.from_channel pre_in : PreTypes.text * string * float) in | |
290 | 293 | let time2 = time_fun () in |
291 | 294 | let result = if test_only_flag then result else {result with pre_text=translate_text pre_text} in |
292 | 295 | let result = {result with pre_time1=pre_time1; pre_time2=time2 -. time1} in |
... | ... |