plWordnet.ml
4.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
(********************************************************)
(* *)
(* Copyright 2014 Wojciech Jaworski. *)
(* *)
(* All rights reserved. *)
(* *)
(********************************************************)
open Xstd
let zasoby_path = "../../NLP resources/"
let plwordnet_filename = zasoby_path ^ "Słowosieć/plwordnet-3.0.xml"
let select_pos synmap pos =
IntMap.fold synmap IntSet.empty (fun selected id syn ->
if syn.syn_pos = pos then IntSet.add selected id else selected)
let select_big_synsets synmap threshold =
IntMap.fold synmap IntSet.empty (fun selected id syn ->
if syn.syn_no_hipo >= threshold then IntSet.add selected id else selected)
(**************************************************)
(*
let string_of_units units =
String.concat " " (Xlist.map units fst)
let string_of_lu lu =
Printf.sprintf "\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\"" lu.lu_name lu.lu_pos lu.lu_tagcount lu.lu_domain
lu.lu_desc lu.lu_workstate lu.lu_source lu.lu_variant
let string_of_syn syn =
Printf.sprintf "\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\"" syn.syn_workstate syn.syn_split
syn.syn_owner syn.syn_definition syn.syn_desc syn.syn_abstract (string_of_units syn.syn_units)
let lu_names = ["name"; "pos"; "tagcount"; "domain"; "desc"; "workstate"; "source"; "variant"]
let syn_names = ["workstate"; "split"; "owner"; "definition"; "desc"; "abstract"; "units"]
let rel_names = ["parent"; "child"; "valid"; "owner"]
let print_lu_map filename lumap =
File.file_out filename (fun file ->
Printf.fprintf file "id;%s\n" (String.concat ";" lu_names);
StringMap.iter lumap (fun id lu ->
Printf.fprintf file "%s;%s\n" id (string_of_lu lu)))
let print_syn_map filename synmap =
File.file_out filename (fun file ->
Printf.fprintf file "id;%s\n" (String.concat ";" syn_names);
StringMap.iter synmap (fun id syn ->
Printf.fprintf file "%s;%s\n" id (string_of_syn syn)))
let print_rels filename rel_id rels =
File.file_out filename (fun file ->
Printf.fprintf file "%s\n" (String.concat ";" rel_names);
Xlist.iter rels (fun r ->
if r.r_relation = rel_id then
Printf.fprintf file "%s;%s;%s;%s\n" r.r_parent r.r_child r.r_valid r.r_owner))
let pwn_pos = ["czasownik pwn"; "przymiotnik pwn"; "przysłówek pwn"; "rzeczownik pwn"]
let remove_pwn synmap =
StringMap.fold synmap StringMap.empty (fun synmap id syn ->
if Xlist.mem pwn_pos syn.syn_pos then synmap else StringMap.add synmap id syn)
(*let get_maximal_not_isolated_synsets synmap hipero hipo =
let set = get_maximal_synsets synmap hipero in
let set = StringSet.fold set StringSet.empty (fun set id ->
if StringMap.mem hipo id then StringSet.add set id else set) in
set*)
let has_syn_above_threshold synmap threshold conn =
StringSet.fold conn false (fun b id ->
if (StringMap.find synmap id).syn_no_hipo >= threshold then true else b)
let remove_conn l id =
Xlist.fold l [] (fun l conn ->
if StringSet.mem conn id then l else conn :: l)
let select_conn l id =
Xlist.fold l [] (fun l conn ->
if StringSet.mem conn id then conn :: l else l)
let print_hipo_graph path name threshold synmap hipo conn =
ignore (Xlist.fold conn 1 (fun n conn ->
let name = name ^ "_" ^ string_of_int n in
if has_syn_above_threshold synmap threshold conn then (
File.file_out (path ^ name ^ ".gv") (fun file ->
Printf.fprintf file "digraph G {\n node [shape=box]\n";(* "rankdir = LR\n";*)
StringMap.iter synmap (fun id syn ->
if StringSet.mem conn id && syn.syn_no_hipo >= threshold then
Printf.fprintf file " %s [label=\"%s\\n%d\"]\n" id (syn_name_single syn) syn.syn_no_hipo);
StringMap.iter hipo (fun id1 l ->
if StringSet.mem conn id1 && (StringMap.find synmap id1).syn_no_hipo >= threshold then
Xlist.iter l (fun id2 ->
if (StringMap.find synmap id2).syn_no_hipo >= threshold then
Printf.fprintf file " %s -> %s\n" id1 id2));
Printf.fprintf file "}\n");
Sys.chdir path;
ignore (Sys.command ("dot -Tpng " ^ name ^ ".gv -o " ^ name ^ ".png"));
Sys.chdir "..";
n+1) else n))
*)