freq_test.ml 1.41 KB
(*
 *  ENIAMcorpora is a library that integrates ENIAM with corpora in CONLL format
 *  Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
 *
 *  This library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

open Xstd

let _ =
  let l = File.load_tab "../resources/NKJP1M/NKJP1M-frequency.tab" (function
      [orth; lemma; interp; freq] -> orth, lemma, interp, int_of_string freq
    | l -> failwith ("load_frequencies: " ^ String.concat "\t" l)) in
  let qmap = Xlist.fold l StringQMap.empty (fun qmap (orth, lemma, interp, freq) ->
    let interp = List.hd (Xstring.split ":" interp) in
    StringQMap.add_val qmap (lemma ^ "\t" ^ interp) freq) in
  StringQMap.iter qmap (fun k v -> Printf.printf "%d\t%s\n" v k)