depTree.ml
3.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
open Xstd
open PreTypes
let tuple_it taglist =
match List.length taglist with
0 -> Xml.Element("dot",[],[])
| 1 -> List.hd taglist
| _ -> Xml.Element("tuple",[],taglist)
let get_amorf_basic token_r = "empty" (* FIXME *)
let get_amorf token_r = "empty" (* FIXME *)
let get_vals token_r cat interp = get_amorf_basic token_r ::
match cat with
"subst" -> List.rev ("ter" :: (List.rev interp))
| _ -> interp (* FIXME *)
let get_basic_attrs token_r = ["A","a";"B","b"] (* FIXME *)
let get_attrs token_r =
let attrs = get_basic_attrs token_r in
List.map (fun (label, value) ->
Xml.Element("attr",["label",label],[
Xml.Element("val",[],[Xml.PCData value])])) attrs
let xml_of_gs token_r cat interp =
let vals = get_vals token_r cat interp in (** **)
let vals = List.map (fun x -> Xml.Element("val",[],[Xml.PCData x])) vals in
Xml.Element("gs",[],[tuple_it vals])
let xml_of_agf token_r = Xml.Element("agf",[],[Xml.PCData token_r.conll_label])
let xml_of_amorf token_r = Xml.Element("amorf",[],[Xml.PCData (get_amorf token_r)])
let xml_of_attrs token_r = Xml.Element("attrs",[],get_attrs token_r) (* FIXME *)
let xml_of_args token_rs token_r =
let children = List.filter (fun pom -> pom.conll_super = token_r.conll_id) token_rs in
let children_to_graph = List.map (fun pom ->
Xml.Element("ref",["id", pom.conll_id],[])) children in
Xml.Element("args",[],[tuple_it children_to_graph])
let xml_of_token_r token_rs token_r =
let pred, cat, interp = match token_r.token with
| Lemma(a,b,c) -> a, b, Xlist.map (List.hd c) (fun x -> List.hd x)
| _ -> failwith ("xml_of_token_r: not Lemma") in
Xml.Element("graph_node",["id", token_r.conll_id],[
Xml.Element("node",["pred",pred;"cat",cat;"weight","0";"id", token_r.conll_id],
(xml_of_gs token_r cat interp) :: (** **)
(xml_of_agf token_r) ::
(xml_of_amorf token_r) :: (** **)
(xml_of_attrs token_r) :: (** **)
[xml_of_args token_rs token_r]
) ])
let conll_to_xml token_rs =
Xml.Element("graph",[],List.map (xml_of_token_r token_rs) token_rs)
(***************************************************************************************************)
let get_info i = function
AltText[Raw,RawText text1;CONLL,StructText([StructParagraph[
{pid = id; pbeg = beg; plen = len; psentence =
AltSentence[Raw, RawSentence text2; CONLL, StructSentence(token_rs,-1)]}]],-1)] -> token_rs, id
| StructText([StructParagraph[{pid = id; pbeg = -1; plen = -1; psentence =
StructSentence(token_rs,-1)}]],-1) -> token_rs, "id_not_found" ^ (string_of_int i)
| _ -> failwith "get_info"
let print_corpus filename =
let corpus = File.file_in filename (fun file -> CONLL.match_corpus (CONLL.load_corpus file)) in
List.mapi (fun i x ->
let token_rs, id = get_info i x in
let xml = conll_to_xml token_rs in
let id = Str.global_replace (Str.regexp "/") "_" id in
let oc = open_out ("xml_test/"^id^".xml") in
output_string oc (Xml.to_string_fmt xml);
flush oc;
XmlPrinter.print_xml "xml_test/" id xml) corpus
(*let _ =
print_corpus "xml_test/sentence1.conll"*)