plWordnet.ml 21.6 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494
(********************************************************)
(*                                                      *)
(*  Copyright 2014 Wojciech Jaworski.                   *)
(*                                                      *)
(*  All rights reserved.                                *)
(*                                                      *)
(********************************************************)

open Xstd

(*let nexus_path = "/home/yacheu/Dokumenty/Badania/Jezyk i Umysl/Przetwarzanie Języka Naturalnego/zasoby/"
let toshiba_ub_path = "/home/wjaworski/Dokumenty/zasoby/"

let get_host_name () =
  let chan = Unix.open_process_in "uname -n" in
  input_line chan

let zasoby_path =
  match get_host_name () with
    "nexus" -> nexus_path
  | "toshiba-UB" -> toshiba_ub_path
(*   | "mozart" -> "." *)
  | s -> failwith ("unknown host: " ^ s)*)

let zasoby_path = "../../NLP resources/"

let plwordnet_filename = zasoby_path ^ "Słowosieć/plwordnet-3.0.xml"

let create_relation_map rel_id rels =
  Xlist.fold rels Graph.empty (fun graph r ->
    if r.r_relation = rel_id then
      Graph.add graph r.r_parent r.r_child
    else graph)

let assign_no_hipo synmap hipo =
  IntMap.mapi synmap (fun id syn ->
    {syn with syn_no_hipo=IntSet.size (Graph.get_children_ids hipo IntSet.empty id)})

let select_pos synmap pos =
  IntMap.fold synmap IntSet.empty (fun selected id syn ->
    if syn.syn_pos = pos then IntSet.add selected id else selected)

let select_big_synsets synmap threshold =
  IntMap.fold synmap IntSet.empty (fun selected id syn ->
    if syn.syn_no_hipo >= threshold then IntSet.add selected id else selected)

let lu_name lu =
  lu.lu_name ^ "-" ^ lu.lu_variant

let syn_name syn =
  String.concat ", " (Xlist.map syn.syn_units (fun (_,lu) -> lu_name lu))

let syn_name_single syn =
  if syn.syn_units = [] then "empty" else
  lu_name (snd (List.hd syn.syn_units))

let print_synset_names filename synmap selected =
  File.file_out filename (fun file ->
    IntSet.iter selected (fun id ->
      let syn = IntMap.find synmap id in
      let names = syn_name syn in
      Printf.fprintf file "%d\t%d\t%s\t%s\n" syn.syn_no_hipo id syn.syn_pos names))


(**************************************************)


(*
let string_of_tests tests =
  String.concat " " (Xlist.map tests (fun (t,p) -> "(" ^ t ^ "," ^ p ^ ")"))

let string_of_units units =
  String.concat " " (Xlist.map units fst)

let string_of_lu lu =
  Printf.sprintf "\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\"" lu.lu_name lu.lu_pos lu.lu_tagcount lu.lu_domain
    lu.lu_desc lu.lu_workstate lu.lu_source lu.lu_variant

let string_of_syn syn =
  Printf.sprintf "\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\"" syn.syn_workstate syn.syn_split
    syn.syn_owner syn.syn_definition syn.syn_desc syn.syn_abstract (string_of_units syn.syn_units)

let string_of_rt rt =
  Printf.sprintf "\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\"" rt.rt_type rt.rt_reverse rt.rt_name rt.rt_description rt.rt_posstr
    rt.rt_display rt.rt_shortcut rt.rt_autoreverse rt.rt_pwn (string_of_tests rt.rt_tests)

let lu_names = ["name"; "pos"; "tagcount"; "domain"; "desc"; "workstate"; "source"; "variant"]
let syn_names = ["workstate"; "split"; "owner"; "definition"; "desc"; "abstract"; "units"]
let rt_names = ["type"; "reverse"; "name"; "description"; "posstr"; "display"; "shortcut"; "autoreverse"; "pwn"; "tests"]
let rel_names = ["parent"; "child"; "valid"; "owner"]

let print_lu_map filename lumap =
  File.file_out filename (fun file ->
    Printf.fprintf file "id;%s\n" (String.concat ";" lu_names);
    StringMap.iter lumap (fun id lu ->
      Printf.fprintf file "%s;%s\n" id (string_of_lu lu)))

let print_syn_map filename synmap =
  File.file_out filename (fun file ->
    Printf.fprintf file "id;%s\n" (String.concat ";" syn_names);
    StringMap.iter synmap (fun id syn ->
      Printf.fprintf file "%s;%s\n" id (string_of_syn syn)))

let print_rt_map filename rel_count rtmap =
  File.file_out filename (fun file ->
    Printf.fprintf file "id;quantity;%s\n" (String.concat ";" rt_names);
    StringMap.iter rtmap (fun id rt ->
      Printf.fprintf file "%s;%d;%s\n" id (try StringQMap.find rel_count id with Not_found -> 0) (string_of_rt rt)))

let print_rels filename rel_id rels =
  File.file_out filename (fun file ->
    Printf.fprintf file "%s\n" (String.concat ";" rel_names);
    Xlist.iter rels (fun r ->
      if r.r_relation = rel_id then
        Printf.fprintf file "%s;%s;%s;%s\n" r.r_parent r.r_child r.r_valid r.r_owner))




let count_relations qmap rtmap rels =
  Xlist.fold rels qmap (fun qmap rel ->
    if not (StringMap.mem rtmap rel.r_relation) then print_endline ("unknown relation: " ^ rel.r_relation);
    StringQMap.add qmap rel.r_relation)

let print_lu_relation filename lumap attr =
  File.file_out filename (fun file ->
    StringMap.iter attr (fun id1 l ->
      Xlist.iter l (fun id2 ->
        let lu1 = StringMap.find lumap id1 in
        let lu2 = StringMap.find lumap id2 in
        Printf.fprintf file "%s\t%s\n" (lu_name lu1) (lu_name lu2))))

let print_syn_relation filename synmap attr =
  File.file_out filename (fun file ->
    StringMap.iter attr (fun id1 l ->
      Xlist.iter l (fun id2 ->
        let syn1 = StringMap.find synmap id1 in
        let syn2 = StringMap.find synmap id2 in
        Printf.fprintf file "%s\t%s\n" (syn_name syn1) (syn_name syn2))))

let print_lu_relations path lumap lr =
  let relations = Xlist.fold lr StringMap.empty (fun relations r ->
    let map = try StringMap.find relations r.r_relation with Not_found -> StringMap.empty in
    let map = StringMap.add_inc map r.r_parent [r.r_child] (fun l -> r.r_child :: l) in
    StringMap.add relations r.r_relation map) in
  StringMap.iter relations (fun id relation ->
    print_lu_relation (path ^ id ^ ".tab") lumap relation)

let print_syn_relations path synmap sr =
  let relations = Xlist.fold sr StringMap.empty (fun relations r ->
    let map = try StringMap.find relations r.r_relation with Not_found -> StringMap.empty in
    let map = StringMap.add_inc map r.r_parent [r.r_child] (fun l -> r.r_child :: l) in
    StringMap.add relations r.r_relation map) in
  StringMap.iter relations (fun id relation ->
    print_syn_relation (path ^ id ^ ".tab") synmap relation)

let create_relation_map_lex lu_syn rel_id rels =
  Xlist.fold rels Graph.empty (fun graph r ->
    if r.r_relation = rel_id then
      let parent = StringMap.find lu_syn r.r_parent in
      let child = StringMap.find lu_syn r.r_child in
      Graph.add graph (int_of_string r.r_parent) (int_of_string r.r_child)
    else graph)

let test_reverse_rel ra rb =
  StringMap.iter ra (fun ida l ->
    Xlist.iter l (fun idb ->
      try
        let l = StringMap.find rb idb in
        if Xlist.mem l ida then ()
        else print_endline ("test_reverse_rel a: " ^ ida)
      with Not_found -> print_endline ("test_reverse_rel b: " ^ idb)))

let pwn_pos = ["czasownik pwn"; "przymiotnik pwn"; "przysłówek pwn"; "rzeczownik pwn"]

let remove_pwn synmap =
  StringMap.fold synmap StringMap.empty (fun synmap id syn ->
    if Xlist.mem pwn_pos syn.syn_pos then synmap else StringMap.add synmap id syn)

(*let get_maximal_not_isolated_synsets synmap hipero hipo =
  let set = get_maximal_synsets synmap hipero in
  let set = StringSet.fold set StringSet.empty (fun set id ->
    if StringMap.mem hipo id then StringSet.add set id else set) in
  set*)

let rev_relation map =
  StringMap.fold map StringMap.empty (fun map id l ->
    Xlist.fold l map (fun map id2 ->
      StringMap.add_inc map id2 [id] (fun l -> id :: l)))

let sum_relation map map2 =
  StringMap.fold map2 map (fun map id l ->
    StringMap.add_inc map id l (fun l2 -> l @ l2))


    let synset_name synmap lumap id =
      let syn = try StringMap.find synmap id with Not_found -> failwith "synset_name" in
      let names = Xlist.map syn.syn_units (fun (luid,_) ->
        let lu = try StringMap.find lumap luid with Not_found -> failwith "synset_name" in
        lu.lu_name ^ " " ^ lu.lu_variant) in
      String.concat ", " names

(*    let synset_name_with_args walenty synmap lumap id =
      let syn = try StringMap.find synmap id with Not_found -> failwith "synset_name_with_args" in
      let names = Xlist.map syn.syn_units (fun luid ->
        let lu = try StringMap.find lumap luid with Not_found -> failwith "synset_name_with_args" in
        lu.lu_name, lu.lu_variant) in
      String.concat ", " (Xlist.map names (fun (name,variant) ->
        let args = try StringSet.to_list (StringMap.find walenty name) with Not_found -> [] in
        if args = [] then name ^ " " ^ variant else
        name ^ " " ^ variant ^ " <B>" ^ String.concat " " args ^ "</B>"))
*)
    let is_synset_abstract synmap id =
      let syn = try StringMap.find synmap id with Not_found -> failwith "is_synset_abstract" in
      match syn.syn_abstract with
        "true" -> true
      | "false" -> false
      | _ -> failwith "is_synset_abstract"

(*
let count_relation_map rmap =
  let qmap = StringMap.fold rmap IntQMap.empty (fun qmap _ l ->
    IntQMap.add qmap (Xlist.size l)) in
  IntQMap.iter qmap (fun len q ->
    Printf.printf "%6d %2d\n" q len)
*)
let rec create_spaces n =
  if n = 0 then "" else " " ^ (create_spaces (n - 1))

let rec is_synset_abstract2_rec tree synmap id visited =
  if is_synset_abstract synmap id then true,visited else
  if StringSet.mem visited id then false,visited else
  let visited = StringSet.add visited id in
  Xlist.fold (try StringMap.find tree id with Not_found -> []) (false,visited) (fun (b,visited) id2 ->
    let v,visited = is_synset_abstract2_rec tree synmap id2 visited in
    v || b, visited)

let is_synset_abstract2 tree synmap id =
  fst (is_synset_abstract2_rec tree synmap id StringSet.empty)

let rec print_subtree_rec file treshold tree synmap lumap level abs_parent id visited =
  if StringSet.mem visited id then (
    if is_synset_abstract2 tree synmap id || StringSet.size (get_subtree tree id StringSet.empty) >= treshold || abs_parent then
      Printf.fprintf file "%s%d VISITED %s%s\n" (create_spaces level)
        (StringSet.size (get_subtree tree id StringSet.empty))
        (if is_synset_abstract synmap id then "*" else "")
        (synset_name synmap lumap id);
    visited
  ) else (
    if is_synset_abstract2 tree synmap id || StringSet.size (get_subtree tree id StringSet.empty) >= treshold || abs_parent then
      Printf.fprintf file "%s%d %s%s\n" (create_spaces level)
        (StringSet.size (get_subtree tree id StringSet.empty))
        (if is_synset_abstract synmap id then "*" else "")
        (synset_name synmap lumap id);
    Xlist.fold (try StringMap.find tree id with Not_found -> []) (StringSet.add visited id) (fun visited id2 ->
      print_subtree_rec file treshold tree synmap lumap (level+2) (is_synset_abstract synmap id) id2 visited))

let print_subtree filename treshold tree synmap lumap id =
  File.file_out filename (fun file ->
    ignore(print_subtree_rec file treshold tree synmap lumap 0 true id StringSet.empty))

let rec create_subtree_xml tree synmap lumap id visited =
  let name = synset_name synmap lumap id in
  let size = StringSet.size (get_subtree tree id StringSet.empty) in
  let abstract = if is_synset_abstract synmap id then ["abstract","true"] else [] in
  if StringSet.mem visited id then
    Xml.Element("node",("name",name) :: ("visited","true") :: ("size",string_of_int size) :: abstract,[]), visited
  else
    let l,visited = Xlist.fold (try StringMap.find tree id with Not_found -> []) ([],StringSet.add visited id) (fun (l,visited) id2 ->
      let xml,visited = create_subtree_xml tree synmap lumap id2 visited in
      xml :: l, visited) in
    Xml.Element("node",("name",name) :: ("size",string_of_int size) :: abstract,l),visited

let print_subtree_xml filename tree synmap lumap id =
  File.file_out filename (fun file ->
    let xml,_ = create_subtree_xml tree synmap lumap id StringSet.empty in
    Printf.fprintf file "%s" (Xml.to_string_fmt xml))

let rec find_connected_components_rec rel threshold synmap set conn id =
  (* if id = "28358" then print_endline "find_connected_components_rec 1"; *)
  if StringSet.mem set id || (StringMap.find synmap id).syn_no_hipo < threshold then conn,set else (
  (* if id = "28358" then print_endline "find_connected_components_rec 2"; *)
  let conn = StringSet.add conn id in
  let set = StringSet.add set id in
  let l = try StringMap.find rel id with Not_found -> [] in
  (* if id = "28358" then Printf.printf "find_connected_components_rec 3: |conn|=%d\n%!" (StringSet.size conn); *)
  Xlist.fold l (conn,set) (fun (conn,set) id ->
    find_connected_components_rec rel threshold synmap set conn id))

let find_connected_components rel threshold synmap =
  let l,_ = StringMap.fold rel ([],StringSet.empty) (fun (l,set) id _ ->
    if StringSet.mem set id then l,set else
    let conn,set = find_connected_components_rec rel threshold synmap set StringSet.empty id in
    (* if StringSet.mem conn "28358" then print_endline "find_connected_components 1"; *)
    conn :: l, set) in
  l

let has_syn_above_threshold synmap threshold conn =
  StringSet.fold conn false (fun b id ->
    if (StringMap.find synmap id).syn_no_hipo >= threshold then true else b)

let remove_conn l id =
  Xlist.fold l [] (fun l conn ->
    if StringSet.mem conn id then l else conn :: l)

let select_conn l id =
  Xlist.fold l [] (fun l conn ->
    if StringSet.mem conn id then conn :: l else l)

let print_hipo_graph path name threshold synmap hipo conn =
  ignore (Xlist.fold conn 1 (fun n conn ->
    let name = name ^ "_" ^ string_of_int n in
    if has_syn_above_threshold synmap threshold conn then (
    File.file_out (path ^ name ^ ".gv") (fun file ->
      Printf.fprintf file "digraph G {\n  node [shape=box]\n";(*  "rankdir = LR\n";*)
      StringMap.iter synmap (fun id syn ->
        if StringSet.mem conn id && syn.syn_no_hipo >= threshold then
          Printf.fprintf file "  %s [label=\"%s\\n%d\"]\n" id (syn_name_single syn) syn.syn_no_hipo);
      StringMap.iter hipo (fun id1 l ->
        if StringSet.mem conn id1 && (StringMap.find synmap id1).syn_no_hipo >= threshold then
          Xlist.iter l (fun id2 ->
            if (StringMap.find synmap id2).syn_no_hipo >= threshold then
          Printf.fprintf file "  %s -> %s\n" id1 id2));
      Printf.fprintf file "}\n");
    Sys.chdir path;
    ignore (Sys.command ("dot -Tpng " ^ name ^ ".gv -o " ^ name ^ ".png"));
    Sys.chdir "..";
    n+1) else n))



(*
let rec create_spaces_html n =
  if n = 0 then "" else "&nbsp;&nbsp;&nbsp;" ^ (create_spaces_html (n - 1))

let create_args_map walenty synmap lumap =
  StringMap.fold synmap StringMap.empty (fun map id syn ->
    let names = Xlist.map syn.syn_units (fun luid ->
      let lu = try StringMap.find lumap luid with Not_found -> failwith "find_args" in
      lu.lu_name) in
    let args = Xlist.fold names StringSet.empty (fun set name ->
      try
        StringSet.union set (StringMap.find walenty name)
      with Not_found -> set) in
    if StringSet.is_empty args then map else
    StringMap.add map id args)

let rec create_args_tree_map_rec tree argmap id map =
  if StringMap.mem map id then map,StringMap.find map id else
  let args = try StringMap.find argmap id with Not_found -> StringSet.empty in
  let map,args = Xlist.fold (try StringMap.find tree id with Not_found -> []) (map,args) (fun (map,args) id2 ->
    let map2,args2 = create_args_tree_map_rec tree argmap id2 map in
    map2, StringSet.union args args2) in
  StringMap.add map id args, args

let create_args_tree_map tree argmap id =
  fst (create_args_tree_map_rec tree argmap id StringMap.empty)

let string_of_args argmap argtreemap id =
  let args = StringSet.to_list (try StringMap.find argtreemap id with Not_found -> StringSet.empty) in
  let local_args = try StringMap.find argmap id with Not_found -> StringSet.empty in
  let args = Xlist.map args (fun arg ->
    if StringSet.mem local_args arg then "<B>" ^ arg ^ "</B>"
    else arg) in
  String.concat " " args

let string_of_args2 argmap id =
  let args = StringSet.to_list (try StringMap.find argmap id with Not_found -> StringSet.empty) in
  "<B>" ^ String.concat " " args ^ "</B>"

(*let find_args walenty tree synmap lumap id =
  let syn = try StringMap.find synmap id with Not_found -> failwith "find_args" in
  let names = Xlist.map syn.syn_units (fun luid ->
    let lu = try StringMap.find lumap luid with Not_found -> failwith "find_args" in
    lu.lu_name) in
  let args = Xlist.fold names StringSet.empty (fun set name ->
    try
      StringSet.union set (StringMap.find walenty name)
    with Not_found -> set) in
  String.concat ", " (StringSet.to_list args)  *)

let rec print_arg_subtree_rec file argmap argtreemap tree synmap lumap level abs_parent id visited =
  if StringSet.mem visited id then (
    if is_synset_abstract2 tree synmap id || StringSet.size (get_subtree tree id StringSet.empty) >= 100 || abs_parent then
      Printf.fprintf file "%s<FONT SIZE=2>%s</FONT> %d VISITED %s%s %s<BR>\n" (create_spaces_html level) id
        (StringSet.size (get_subtree tree id StringSet.empty))
        (if is_synset_abstract synmap id then "*" else "")
        (synset_name synmap lumap id)
        (string_of_args argmap argtreemap id);
    visited
  ) else (
    if is_synset_abstract2 tree synmap id || StringSet.size (get_subtree tree id StringSet.empty) >= 100 || abs_parent then
      Printf.fprintf file "%s<FONT SIZE=2>%s</FONT> %d %s%s %s<BR>\n" (create_spaces_html level) id
        (StringSet.size (get_subtree tree id StringSet.empty))
        (if is_synset_abstract synmap id then "*" else "")
        (synset_name synmap lumap id)
        (string_of_args argmap argtreemap id);
    Xlist.fold (try StringMap.find tree id with Not_found -> []) (StringSet.add visited id) (fun visited id2 ->
      print_arg_subtree_rec file argmap argtreemap tree synmap lumap (level+2) (is_synset_abstract synmap id) id2 visited))

let print_html_header file =
  Printf.fprintf file "<HTML><HEAD><META HTTP-EQUIV=\"CONTENT-TYPE\" CONTENT=\"text/html; charset=utf8\"></HEAD><BODY>\n"

let print_html_trailer file =
  Printf.fprintf file "</BODY></HTML>\n"

let print_arg_subtree filename argmap argtreemap tree synmap lumap id =
  File.file_out filename (fun file ->
    print_html_header file;
    ignore(print_arg_subtree_rec file argmap argtreemap tree synmap lumap 0 true id StringSet.empty);
    print_html_trailer file)

let has_interesting_arg argtreemap id =
  try
    StringSet.size (StringMap.find argtreemap id) > 0
  with Not_found -> false

let rec print_arg_subtree_rec2 file argmap argtreemap walenty tree synmap lumap level abs_parent id visited =
  if StringSet.mem visited id then (
    if has_interesting_arg argtreemap id then
      Printf.fprintf file "%s<FONT SIZE=2>%s</FONT> %d VISITED %s%s<BR>\n" (create_spaces_html level) id
        (StringSet.size (get_subtree tree id StringSet.empty))
        (if is_synset_abstract synmap id then "*" else "")
        (synset_name_with_args walenty synmap lumap id)(*
        (string_of_args2 argmap id)*);
    visited
  ) else (
    if has_interesting_arg argtreemap id then
      Printf.fprintf file "%s<FONT SIZE=2>%s</FONT> %d %s%s<BR>\n" (create_spaces_html level) id
        (StringSet.size (get_subtree tree id StringSet.empty))
        (if is_synset_abstract synmap id then "*" else "")
        (synset_name_with_args walenty synmap lumap id)(*
        (string_of_args2 argmap id)*);
    Xlist.fold (try StringMap.find tree id with Not_found -> []) (StringSet.add visited id) (fun visited id2 ->
      print_arg_subtree_rec2 file argmap argtreemap walenty tree synmap lumap (level+2) (is_synset_abstract synmap id) id2 visited))

let print_arg_subtree2 filename argmap argtreemap walenty tree synmap lumap id =
  File.file_out filename (fun file ->
    print_html_header file;
    ignore(print_arg_subtree_rec2 file argmap argtreemap walenty tree synmap lumap 0 true id StringSet.empty);
    print_html_trailer file)

let compare_lu_id (x,_,_) (y,_,_) = compare (int_of_string x)  (int_of_string y)

let print_lexical_units path lumap =
  let map = StringMap.fold lumap StringMap.empty (fun map id lu ->
    StringMap.add_inc map lu.lu_pos [id,lu.lu_name,lu.lu_variant] (fun l -> (id,lu.lu_name,lu.lu_variant) :: l)) in
  StringMap.iter map (fun pos l ->
    File.file_out (path ^ pos ^ ".tab") (fun file ->
      Xlist.iter (Xlist.sort l compare_lu_id) (fun (id,lemma,variant) ->
        Printf.fprintf file "%s\t%s\t%s\n" id lemma variant)))
*)
(*
let print_synsets filename synmap =
  File.file_out filename (fun file ->
    StringMap.iter synmap (fun id syn ->
      Printf.fprintf file "%s\t%s\n" id (String.concat " " syn.syn_units)))

let print_hipero filename hipero =
  File.file_out filename (fun file ->
    StringMap.iter hipero (fun id l ->
      Printf.fprintf file "%s\t%s\n" id (String.concat " " l)))
*)

(*let xml_fold filename s f =
  File.file_in filename (fun file  ->
    let scanbuf = Scanf.Scanning.from_function (fun () -> input_char file) in
    let (*size*)_ =
      try
	Scanf.bscanf scanbuf "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<array-list owner=\"\" date=\"2013-07-24 11.45.32\" version=\"WordnetLoom 1.64.0\">\n"
      with
	_ -> failwith ("xml_fold " ^ filename ^ ": invalid header") in
    let lex = Lexing.from_function (fun buf len -> input file buf 0 len) in
    let par = XmlParser.make () in
    XmlParser.prove par false;
    XmlParser.check_eof par false;
    let r = ref s in
    let i = ref 0 in
    try
      while true do
	try
	  let graph = (*of_xml*) (XmlParser.parse par (XmlParser.SLexbuf lex)) in
	  r := f (!r) graph;
	  incr i;
	with Parsing.Parse_error -> failwith ("xml_fold " ^ filename ^ ": Parse_error")
      done;
      !r
    with
      Xml.Error e -> (
        print_endline ("xml_fold Xml.Error " ^ Xml.error e);
        Printf.printf "%d\n" (!i);
	(*if !i = size then*) !r
	(*else failwith ("xml_fold " ^ filename ^ ": invalid file contents")*)))*)
*)