Commit f9158b5a130c5a3597ae60d004fcc33614e12575

Authored by Wojciech Jaworski
1 parent c0772f92

wstępne uruchamianie MateParser

corpora/CONLL.ml
... ... @@ -18,6 +18,15 @@ let string_of_token mode token = match mode with
18 18 String.concat "\t" [token.conll_id;
19 19 token.orth; lemma; cat; cat; interp; "_"; "_";
20 20 string_of_int token.beg; string_of_int token.len]
  21 + | Mate -> let lemma,cat,interp = match token.token with
  22 + | Lemma(a,b,c) -> a,b,if c = [[]]
  23 + then "_"
  24 + else String.concat "][" @@ Xlist.map c (fun x ->
  25 + String.concat "|" @@ Xlist.map x ( fun y ->
  26 + String.concat "." y))
  27 + | _ -> failwith ("string_of_token: not Lemma") in
  28 + String.concat "\t" [token.conll_id;
  29 + token.orth; lemma; cat; cat; interp; "_"; "_"]
21 30 | _ -> failwith "string_of_token: ni"
22 31  
23 32 let rec string_of_sentence mode = function
... ...
parser/exec.ml
... ... @@ -221,8 +221,7 @@ let eniam_parse_sentence timeout test_only_flag paths last next_id =
221 221 let time3 = time_fun () in
222 222 {result with status=LexiconError; msg=Printexc.to_string e; lex_time=time3 -. time2}, next_id
223 223  
224   -
225   -
  224 +let mate_in, mate_out = Unix.open_process "java -jar ../dependencyParser/basic/mate-tools/dist/anna-3.5.jar -model ../dependencyParser/basic/mate-tools/examples/160622_Polish_MateParser.mdl -test"
226 225  
227 226 let rec parse_sentence timeout test_only_flag mode next_id = function
228 227 RawSentence s -> RawSentence s, next_id
... ... @@ -233,10 +232,14 @@ let rec parse_sentence timeout test_only_flag mode next_id = function
233 232 let result, next_id = eniam_parse_sentence timeout test_only_flag paths last next_id in
234 233 ENIAMSentence result, next_id
235 234 | Mate ->
236   - (*let conll = CONLL.string_of paths in
237   - Printf.fprintf mate_out "%s\n%!" conll;
238   - let conll = CONLL.read_conll mate_in in
239   - konwersja na strukturę danych*)
  235 + print_endline "parse_sentence 1";
  236 + let conll = CONLL.string_of_sentence PreTypes.Mate (PreTypes.StructSentence(paths,last)) in
  237 + print_endline "parse_sentence 2";
  238 + Printf.fprintf mate_out "%s\n\n%!" conll;
  239 + print_endline "parse_sentence 3";
  240 + let conll = CONLL.load_sentence mate_in in
  241 + print_endline "parse_sentence 4";
  242 + (*konwersja na strukturę danych*)
240 243 (* FIXME: tu trzeba wstawić konwersję na tekstowy format CONLL,
241 244 uruchomienie MateParser i
242 245 powtórną konwersję wyniku. *)
... ... @@ -278,15 +281,15 @@ let rec extract_query_text = function
278 281 | AltText l -> (try extract_query_text (Xlist.assoc l Raw) with Not_found -> failwith "extract_query_text")
279 282 | _ -> failwith "extract_query_text"
280 283  
281   -let process_query ic oc timeout test_only_flag id full_query max_n =
  284 +let process_query pre_in pre_out timeout test_only_flag id full_query max_n =
282 285 print_endline "process_query 0";
283 286 let result = {empty_result with input_text=translate_text full_query} in
284 287 let time1 = time_fun () in
285 288 print_endline "process_query 1";
286   - Marshal.to_channel oc full_query [];
287   - flush oc;
  289 + Marshal.to_channel pre_out full_query [];
  290 + flush pre_out;
288 291 print_endline "process_query 2";
289   - let pre_text,msg,pre_time1 = (Marshal.from_channel ic : PreTypes.text * string * float) in
  292 + let pre_text,msg,pre_time1 = (Marshal.from_channel pre_in : PreTypes.text * string * float) in
290 293 let time2 = time_fun () in
291 294 let result = if test_only_flag then result else {result with pre_text=translate_text pre_text} in
292 295 let result = {result with pre_time1=pre_time1; pre_time2=time2 -. time1} in
... ...