connections.ml
1.27 KB
open Xstd
open PreTypes
let wynik = ref []
let get_paths = function
DepSentence paths -> Array.to_list paths
| _ -> failwith "get_paths"
let lemma_string = function
Lemma(l,c,i) -> l ^ " " ^ c ^ " " ^ (if i = [[]]
then "_"
else String.concat "][" @@ Xlist.map i (fun x ->
String.concat "|" @@ Xlist.map x ( fun y ->
String.concat "." y)))
| Interp t -> t
| _ -> failwith "lemma_string"
let get_connection_part paths tokens i id super label =
if super > 0
then
let id2,super2,label2 = List.nth paths super in
wynik := ((lemma_string (ExtArray.get tokens id2).token) ^ " " ^
(lemma_string (ExtArray.get tokens id).token) ^ " " ^ label ^ "\n")
:: !wynik
let parse_for_connections paths tokens =
List.iteri (fun i (id,super,label) -> get_connection_part paths tokens i id super label) paths
let process_conll_corpus_for_connections filename =
let oc = open_out "../miscellaneous/connections_skladnica.txt" in
let corpus = File.file_in filename (fun file -> CONLL.load_corpus file) in
Xlist.iter corpus (fun (p_record, tokens) -> parse_for_connections (get_paths p_record.psentence) tokens);
Xlist.iter (List.sort compare !wynik) (output_string oc); flush oc