Commit 0eea9135b9d3dab14d24fcd9bd9db8f5927c3c0e

Authored by Wojciech Jaworski
1 parent 24689c97

Scalenie z konwerterem świgrowego xmla

integration/ENIAMpreIntegration.ml
... ... @@ -197,8 +197,14 @@ let print_swigra_xml dir =
197 197  
198 198 let parse_swigra_sentence s =
199 199 curl_swigra s;
  200 + print_endline "s1";
200 201 let xml = Xml.parse_file (swigra_path ^ "/httpd/forest-disamb.xml") in
  202 + print_endline "s2";
  203 + let conll = SkladnicaXmlToConll.parse xml in
  204 + print_endline "s3";
201 205 print_swigra_xml swigra_path;
  206 + print_endline "s4";
  207 + print_endline conll;
202 208 RawSentence s
203 209  
204 210 let compare_mode (x,_) (y,_) = compare_mode x y
... ...
XmlToConll/SkladnicaTreeFinder.ml renamed to integration/SkladnicaTreeFinder.ml
XmlToConll/SkladnicaTypes.ml renamed to integration/SkladnicaTypes.ml
XmlToConll/SkladnicaXmlToConll.ml renamed to integration/SkladnicaXmlToConll.ml
... ... @@ -48,7 +48,13 @@ let rec tree_to_conll super = function
48 48 | TreeNotFound -> "empty_tree"
49 49 | _ -> failwith "tree_to_conll"
50 50  
51   -let _ =
  51 +let parse xml =
  52 + let forest = SkladnicaXmlToOcaml.to_ocaml_forest xml in
  53 + (* print_endline (SkladnicaTreeFinder.text_of_forest forest); *)
  54 + let tree = SkladnicaTreeFinder.get_tree "" forest in
  55 + tree_to_conll 0 tree
  56 +
  57 +(* let _ =
52 58 let path =
53 59 try
54 60 Sys.argv.(1)
... ... @@ -56,11 +62,12 @@ let _ =
56 62 | _ -> failwith ("Usage: " ^ Sys.argv.(0) ^ " corpus_name\n") in
57 63 List.iteri (fun i filename ->
58 64 (* print_string (string_of_int i ^ " "); flush stdout; *)
59   - let forest = SkladnicaXmlToOcaml.to_ocaml_forest (Xml.parse_file filename) in
  65 + (* let forest = SkladnicaXmlToOcaml.to_ocaml_forest (Xml.parse_file filename) in
60 66 (* print_endline (SkladnicaTreeFinder.text_of_forest forest); *)
61 67 let tree = SkladnicaTreeFinder.get_tree filename forest in
62   - let str_tree = tree_to_conll 0 tree in
  68 + let str_tree = tree_to_conll 0 tree in *)
  69 + let str_tree = parse (Xml.parse_file filename) in
63 70 if str_tree <> "empty_tree"
64 71 then print_endline (str_tree ^ "\n");
65 72 ) (get_filenames path)
66   - (* StringSet.iter print_endline !m *)
  73 + (* StringSet.iter print_endline !m *) *)
... ...
XmlToConll/SkladnicaXmlToOcaml.ml renamed to integration/SkladnicaXmlToOcaml.ml
integration/makefile
... ... @@ -6,7 +6,7 @@ OCAMLFLAGS=$(INCLUDES) -g
6 6 OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-integration.cmxa
7 7 INSTALLDIR=`ocamlc -where`/eniam
8 8  
9   -SOURCES= ENIAM_CONLL.ml CONLL_adapter.ml ENIAMpreIntegration.ml
  9 +SOURCES= ENIAM_CONLL.ml CONLL_adapter.ml SkladnicaTypes.ml SkladnicaXmlToOcaml.ml SkladnicaTreeFinder.ml SkladnicaXmlToConll.ml ENIAMpreIntegration.ml
10 10  
11 11 all: eniam-integration.cma eniam-integration.cmxa
12 12  
... ...