depTree.ml 3.09 KB
open Xstd
open PreTypes

let tuple_it taglist =
  match List.length taglist with
    0 -> Xml.Element("dot",[],[])
  | 1 -> List.hd taglist
  | _ -> Xml.Element("tuple",[],taglist)

let get_amorf_basic token_r = "empty" (* FIXME *)

let get_amorf token_r = "empty" (* FIXME *)

let get_vals token_r cat interp = get_amorf_basic token_r ::
  match cat with
    "subst" -> List.rev ("ter" :: (List.rev interp))
  | _ -> interp (* FIXME *)

let get_basic_attrs token_r = ["A","a";"B","b"] (* FIXME *)

let get_attrs token_r =
  let attrs = get_basic_attrs token_r in
  List.map (fun (label, value) ->
      Xml.Element("attr",["label",label],[
        Xml.Element("val",[],[Xml.PCData value])])) attrs

let xml_of_gs token_r cat interp =
  let vals = get_vals token_r cat interp in (** **)
  let vals = List.map (fun x -> Xml.Element("val",[],[Xml.PCData x])) vals in
  Xml.Element("gs",[],[tuple_it vals])

let xml_of_agf token_r = Xml.Element("agf",[],[Xml.PCData token_r.conll_label])

let xml_of_amorf token_r = Xml.Element("amorf",[],[Xml.PCData (get_amorf token_r)])

let xml_of_attrs token_r = Xml.Element("attrs",[],get_attrs token_r) (* FIXME *)

let xml_of_args token_rs token_r =
  let children = List.filter (fun pom -> pom.conll_super = token_r.conll_id) token_rs in
  let children_to_graph = List.map (fun pom ->
    Xml.Element("ref",["id", pom.conll_id],[])) children in
  Xml.Element("args",[],[tuple_it children_to_graph])

let xml_of_token_r token_rs token_r =
  let pred, cat, interp = match token_r.token with
      | Lemma(a,b,c) -> a, b, Xlist.map (List.hd c) (fun x -> List.hd x)
      | _ -> failwith ("xml_of_token_r: not Lemma") in
  Xml.Element("graph_node",["id", token_r.conll_id],[
    Xml.Element("node",["pred",pred;"cat",cat;"weight","0";"id", token_r.conll_id],
      (xml_of_gs token_r cat interp) ::    (** **)
      (xml_of_agf token_r) ::
      (xml_of_amorf token_r) :: (** **)
      (xml_of_attrs token_r) :: (** **)
      [xml_of_args token_rs token_r]
      ) ])

let conll_to_xml token_rs =
  Xml.Element("graph",[],List.map (xml_of_token_r token_rs) token_rs)


(***************************************************************************************************)

let get_info i = function
    AltText[Raw,RawText text1;CONLL,StructText([StructParagraph[
     {pid = id; pbeg = beg; plen = len; psentence =
       AltSentence[Raw, RawSentence text2; CONLL, StructSentence(_,token_rs,-1)]}]],-1)] -> token_rs, id
  | StructText([StructParagraph[{pid = id; pbeg = -1; plen = -1; psentence =
      StructSentence(_,token_rs,-1)}]],-1) -> token_rs, "id_not_found" ^ (string_of_int i)
  | _ -> failwith "get_info"

let print_corpus filename =
  let corpus = File.file_in filename (fun file -> CONLL.match_corpus (CONLL.load_corpus file)) in
  List.mapi (fun i x ->
    let token_rs, id = get_info i x in
    let xml = conll_to_xml token_rs in
    let id = Str.global_replace (Str.regexp "/") "_" id in
    let oc = open_out ("xml_test/"^id^".xml") in
      output_string oc (Xml.to_string_fmt xml);
      flush oc;
      XmlPrinter.print_xml "xml_test/" id xml) corpus

(*let _ =
  print_corpus "xml_test/sentence1.conll"*)