plWordnet.ml 4.28 KB
(********************************************************)
(*                                                      *)
(*  Copyright 2014 Wojciech Jaworski.                   *)
(*                                                      *)
(*  All rights reserved.                                *)
(*                                                      *)
(********************************************************)

open Xstd


let zasoby_path = "../../NLP resources/"

let plwordnet_filename = zasoby_path ^ "Słowosieć/plwordnet-3.0.xml"

let select_pos synmap pos =
  IntMap.fold synmap IntSet.empty (fun selected id syn ->
    if syn.syn_pos = pos then IntSet.add selected id else selected)

let select_big_synsets synmap threshold =
  IntMap.fold synmap IntSet.empty (fun selected id syn ->
    if syn.syn_no_hipo >= threshold then IntSet.add selected id else selected)


(**************************************************)


(*
let string_of_units units =
  String.concat " " (Xlist.map units fst)

let string_of_lu lu =
  Printf.sprintf "\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\"" lu.lu_name lu.lu_pos lu.lu_tagcount lu.lu_domain
    lu.lu_desc lu.lu_workstate lu.lu_source lu.lu_variant

let string_of_syn syn =
  Printf.sprintf "\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\"" syn.syn_workstate syn.syn_split
    syn.syn_owner syn.syn_definition syn.syn_desc syn.syn_abstract (string_of_units syn.syn_units)

let lu_names = ["name"; "pos"; "tagcount"; "domain"; "desc"; "workstate"; "source"; "variant"]
let syn_names = ["workstate"; "split"; "owner"; "definition"; "desc"; "abstract"; "units"]
let rel_names = ["parent"; "child"; "valid"; "owner"]

let print_lu_map filename lumap =
  File.file_out filename (fun file ->
    Printf.fprintf file "id;%s\n" (String.concat ";" lu_names);
    StringMap.iter lumap (fun id lu ->
      Printf.fprintf file "%s;%s\n" id (string_of_lu lu)))

let print_syn_map filename synmap =
  File.file_out filename (fun file ->
    Printf.fprintf file "id;%s\n" (String.concat ";" syn_names);
    StringMap.iter synmap (fun id syn ->
      Printf.fprintf file "%s;%s\n" id (string_of_syn syn)))

let print_rels filename rel_id rels =
  File.file_out filename (fun file ->
    Printf.fprintf file "%s\n" (String.concat ";" rel_names);
    Xlist.iter rels (fun r ->
      if r.r_relation = rel_id then
        Printf.fprintf file "%s;%s;%s;%s\n" r.r_parent r.r_child r.r_valid r.r_owner))




let pwn_pos = ["czasownik pwn"; "przymiotnik pwn"; "przysłówek pwn"; "rzeczownik pwn"]

let remove_pwn synmap =
  StringMap.fold synmap StringMap.empty (fun synmap id syn ->
    if Xlist.mem pwn_pos syn.syn_pos then synmap else StringMap.add synmap id syn)

(*let get_maximal_not_isolated_synsets synmap hipero hipo =
  let set = get_maximal_synsets synmap hipero in
  let set = StringSet.fold set StringSet.empty (fun set id ->
    if StringMap.mem hipo id then StringSet.add set id else set) in
  set*)


let has_syn_above_threshold synmap threshold conn =
  StringSet.fold conn false (fun b id ->
    if (StringMap.find synmap id).syn_no_hipo >= threshold then true else b)

let remove_conn l id =
  Xlist.fold l [] (fun l conn ->
    if StringSet.mem conn id then l else conn :: l)

let select_conn l id =
  Xlist.fold l [] (fun l conn ->
    if StringSet.mem conn id then conn :: l else l)

let print_hipo_graph path name threshold synmap hipo conn =
  ignore (Xlist.fold conn 1 (fun n conn ->
    let name = name ^ "_" ^ string_of_int n in
    if has_syn_above_threshold synmap threshold conn then (
    File.file_out (path ^ name ^ ".gv") (fun file ->
      Printf.fprintf file "digraph G {\n  node [shape=box]\n";(*  "rankdir = LR\n";*)
      StringMap.iter synmap (fun id syn ->
        if StringSet.mem conn id && syn.syn_no_hipo >= threshold then
          Printf.fprintf file "  %s [label=\"%s\\n%d\"]\n" id (syn_name_single syn) syn.syn_no_hipo);
      StringMap.iter hipo (fun id1 l ->
        if StringSet.mem conn id1 && (StringMap.find synmap id1).syn_no_hipo >= threshold then
          Xlist.iter l (fun id2 ->
            if (StringMap.find synmap id2).syn_no_hipo >= threshold then
          Printf.fprintf file "  %s -> %s\n" id1 id2));
      Printf.fprintf file "}\n");
    Sys.chdir path;
    ignore (Sys.command ("dot -Tpng " ^ name ^ ".gv -o " ^ name ^ ".png"));
    Sys.chdir "..";
    n+1) else n))


*)