Commit 0eea9135b9d3dab14d24fcd9bd9db8f5927c3c0e
1 parent
24689c97
Scalenie z konwerterem świgrowego xmla
Showing
6 changed files
with
18 additions
and
5 deletions
integration/ENIAMpreIntegration.ml
... | ... | @@ -197,8 +197,14 @@ let print_swigra_xml dir = |
197 | 197 | |
198 | 198 | let parse_swigra_sentence s = |
199 | 199 | curl_swigra s; |
200 | + print_endline "s1"; | |
200 | 201 | let xml = Xml.parse_file (swigra_path ^ "/httpd/forest-disamb.xml") in |
202 | + print_endline "s2"; | |
203 | + let conll = SkladnicaXmlToConll.parse xml in | |
204 | + print_endline "s3"; | |
201 | 205 | print_swigra_xml swigra_path; |
206 | + print_endline "s4"; | |
207 | + print_endline conll; | |
202 | 208 | RawSentence s |
203 | 209 | |
204 | 210 | let compare_mode (x,_) (y,_) = compare_mode x y |
... | ... |
XmlToConll/SkladnicaTreeFinder.ml renamed to integration/SkladnicaTreeFinder.ml
XmlToConll/SkladnicaTypes.ml renamed to integration/SkladnicaTypes.ml
XmlToConll/SkladnicaXmlToConll.ml renamed to integration/SkladnicaXmlToConll.ml
... | ... | @@ -48,7 +48,13 @@ let rec tree_to_conll super = function |
48 | 48 | | TreeNotFound -> "empty_tree" |
49 | 49 | | _ -> failwith "tree_to_conll" |
50 | 50 | |
51 | -let _ = | |
51 | +let parse xml = | |
52 | + let forest = SkladnicaXmlToOcaml.to_ocaml_forest xml in | |
53 | + (* print_endline (SkladnicaTreeFinder.text_of_forest forest); *) | |
54 | + let tree = SkladnicaTreeFinder.get_tree "" forest in | |
55 | + tree_to_conll 0 tree | |
56 | + | |
57 | +(* let _ = | |
52 | 58 | let path = |
53 | 59 | try |
54 | 60 | Sys.argv.(1) |
... | ... | @@ -56,11 +62,12 @@ let _ = |
56 | 62 | | _ -> failwith ("Usage: " ^ Sys.argv.(0) ^ " corpus_name\n") in |
57 | 63 | List.iteri (fun i filename -> |
58 | 64 | (* print_string (string_of_int i ^ " "); flush stdout; *) |
59 | - let forest = SkladnicaXmlToOcaml.to_ocaml_forest (Xml.parse_file filename) in | |
65 | + (* let forest = SkladnicaXmlToOcaml.to_ocaml_forest (Xml.parse_file filename) in | |
60 | 66 | (* print_endline (SkladnicaTreeFinder.text_of_forest forest); *) |
61 | 67 | let tree = SkladnicaTreeFinder.get_tree filename forest in |
62 | - let str_tree = tree_to_conll 0 tree in | |
68 | + let str_tree = tree_to_conll 0 tree in *) | |
69 | + let str_tree = parse (Xml.parse_file filename) in | |
63 | 70 | if str_tree <> "empty_tree" |
64 | 71 | then print_endline (str_tree ^ "\n"); |
65 | 72 | ) (get_filenames path) |
66 | - (* StringSet.iter print_endline !m *) | |
73 | + (* StringSet.iter print_endline !m *) *) | |
... | ... |
XmlToConll/SkladnicaXmlToOcaml.ml renamed to integration/SkladnicaXmlToOcaml.ml
integration/makefile
... | ... | @@ -6,7 +6,7 @@ OCAMLFLAGS=$(INCLUDES) -g |
6 | 6 | OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-integration.cmxa |
7 | 7 | INSTALLDIR=`ocamlc -where`/eniam |
8 | 8 | |
9 | -SOURCES= ENIAM_CONLL.ml CONLL_adapter.ml ENIAMpreIntegration.ml | |
9 | +SOURCES= ENIAM_CONLL.ml CONLL_adapter.ml SkladnicaTypes.ml SkladnicaXmlToOcaml.ml SkladnicaTreeFinder.ml SkladnicaXmlToConll.ml ENIAMpreIntegration.ml | |
10 | 10 | |
11 | 11 | all: eniam-integration.cma eniam-integration.cmxa |
12 | 12 | |
... | ... |