ENIAMlexSemanticsTypes.ml 4.29 KB
(*
 *  ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
 *  Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
 *
 *  This library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

open ENIAMtokenizerTypes
open Xstd

(* FIXME: usunąć *)
(*type labels = {
  number: string;
  case: string;
  gender: string;
  person: string;
  aspect: string;
  }*)

type semantics =
    Normal
  | Special of string list
(*  | SpecialNoun of type_arg list * type_term
  | SpecialMod of string * (type_arg list * type_term)*)
  | PrepSemantics of (string * string * string * StringSet.t * string list) list (* case,role,role_attr,hipero,sel_prefs *)

type frame = {
  selectors: (ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list;
  meanings: ((*ENIAMwalTypes.meaning **) string * (string * int) list * float) list;
  positions: ENIAMwalTypes.position list;
  arole: string;
  arole_attr: string;
  arev: bool;
  sopinion: ENIAMwalTypes.opinion;
  fopinion: ENIAMwalTypes.opinion;
  }

let empty_frame = {selectors=[]; meanings=[]; positions=[]; arole=""; arole_attr=""; arev=false;
  sopinion=ENIAMwalTypes.Nieokreslony; fopinion=ENIAMwalTypes.Nieokreslony}

type lex_sem = {
  schemata: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list *
             (ENIAM_LCGtypes.direction * ENIAM_LCGtypes.grammar_symbol) list) list;
  lex_entries: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list *
                ENIAM_LCGtypes.grammar_symbol) list;
  frames: frame list;
  cats: string list;
  (* e: labels; *)
  (* valence: (int * ENIAMwalTypes.frame) list;
  simple_valence: (int * ENIAMwalTypes.frame) list;
  very_simple_valence: ((ENIAM_LCGgrammarPLtypes.cat * ENIAM_LCGgrammarPLtypes.selector_relation * string list) list * ENIAM_LCGtypes.grammar_symbol) list; *)
  (* senses: (string * (string * int) list * float) list; *)
  (* lroles: string * string; *)
  semantics: semantics;
  }

(*let empty_labels = {
  number="";
  case="";
  gender="";
  person="";
  aspect="";
  }*)

let empty_lex_sem = {
  schemata=[]; lex_entries=[]; frames=[]; cats=["X"];
  (*e=empty_labels;*) (*valence=[]; simple_valence=[]; very_simple_valence=[];*) (*senses=[];*)
  (*lroles="","";*) semantics=Normal}

(* FIXME: poprawić katalog *)
(*let subst_uncountable_lexemes_filename = resource_path ^ "/lexSemantics/subst_uncountable.dat"
let subst_uncountable_lexemes_filename2 = resource_path ^ "/lexSemantics/subst_uncountable_stare.dat"
let subst_container_lexemes_filename = resource_path ^ "/lexSemantics/subst_container.dat"
let subst_numeral_lexemes_filename = resource_path ^ "/lexSemantics/subst_numeral.dat"
  let subst_time_lexemes_filename = resource_path ^ "/lexSemantics/subst_time.dat"*)
(* let subst_uncountable_lexemes_filename = resource_path ^ "/Walenty/subst_uncountable.dat"
let subst_uncountable_lexemes_filename2 = resource_path ^ "/Walenty/subst_uncountable_stare.dat"
let subst_container_lexemes_filename = resource_path ^ "/Walenty/subst_container.dat"
let subst_numeral_lexemes_filename = resource_path ^ "/Walenty/subst_numeral.dat"
let subst_time_lexemes_filename = resource_path ^ "/Walenty/subst_time.dat" *)

let hipero_threshold = 3
let unknown_meaning_weight = -1.

let lu_filename = resource_path ^ "/plWordnet/lu.tab"
let ex_hipo_filename = resource_path ^ "/plWordnet/ex_hipo.tab"
let syn_filename = resource_path ^ "/plWordnet/syn.tab"

let predef_filename = resource_path ^ "/lexSemantics/predef_prefs.tab"
let proper_classes_filename = resource_path ^ "/lexSemantics/proper_classes.tab"