ENIAMwalTypes.ml 8.91 KB
(*
 *  ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty".
 *  Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
 *
 *  This library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

open Xstd

type opinion = Dobry | Pewny | Potoczny | Watpliwy | Archaiczny | Zly | Wulgarny | Domyslny | OpinionUndef
type negation = Negation | Aff | NegationUndef | NegationNA
type pred = PredTrue | PredFalse | PredUndef | PredNA
type aspect = Aspect of string | AspectUndef | AspectNA
type case = Case of string | Str | Part | CaseAgr | NomAgr | GenAgr | AllAgr | CaseUndef | AllUAgr | CaseUAgr
type comp = Comp of string | Zeby | Gdy | CompUndef
type comp_type = Int | Rel | CompTypeUndef (*| CompTypeAgr*)
type number = Number of string | NumberUndef | NumberAgr
type gender = Gender of string | GenderUndef | GenderAgr | Genders of string list
type grad = Grad of string | GradUndef
type refl = ReflEmpty | ReflTrue | ReflFalse | ReflUndef
type acm = Acm of string | AcmUndef

type mood = (*Mood of*) string (*| MoodUndef*)
type tense = string
type aux = NoAux | PastAux | FutAux | ImpAux

  type nsem = Common of string | Time

type gf = SUBJ | OBJ | ARG

type pos =
    SUBST of number * case
  | PREP of case
  | NUM of case * gender * acm
  | ADJ of number * case * gender * grad
  | ADV of grad
  | GER of number * case * gender * aspect * negation * refl
  | PACT of number * case * gender * aspect * negation * refl
  | PPAS of number * case * gender * aspect * negation
  | INF of aspect * negation * refl
  | QUB
  | COMPAR
  | COMP of comp_type
  | PERS of (*number * gender * aspect * person * *)negation * refl
  | FIXED

type phrase =
    NP of case
  | PrepNP of string * case
  | AdjP of case
  | PrepAdjP of string * case
  | NumP of case
  | PrepNumP of string * case
  | ComprepNP of string
  | ComparP of string (** case*)
  | CP of comp_type * comp
  | NCP of case * comp_type * comp
  | PrepNCP of string * case * comp_type * comp
  | InfP of aspect
  | AdvP
  | FixedP of string
  | Num of case * acm
  | Or
  | Refl
  | Recip
  | Qub
  | Pro
  | ProNG
  | Null
  | GerP of case
  | PrepGerP of string * case
  | PpasP of case
  | PrepPpasP of string * case
  | PactP of case

type phrase_abbr =
    Xp of string
  | Advp of string
  | Nonch
  | Distrp
  | Possp

type phrase_comp =
    Cp
  | Ncp of case
  | Prepncp of string * case

type lex =
    Lexeme of string
  | ORconcat of lex list
  | ORcoord of lex list
  | XOR of lex list
  | Elexeme of gender

type restr = Natr | Ratr | Ratrs | Ratr1 | Atr | Atr1 | NoRestr

(*type lex_specs =
    NSpecs of number
  | AdvSpecs of grad
  | AdjSpecs of number * gender * grad
  | PpasSpecs of number * gender * negation
  | PactSpecs of number * gender * negation * refl
  | GerSpecs of number * negation * refl
  | CSpecs of negation * refl
  | NumSpecs of gender
  | EmptySpecs *)

type sel_prefs =
    SynsetId of int
  | Predef of string
  | RelationArgId of string * int (* nazwa relacji * id argumentu ramy *)
  | RelationRole of string * string * string (* relacji * rola * atrybut roli *)

type position = {psn_id: int; gf: gf; role: string; role_attr: string; sel_prefs: sel_prefs list;
                 mode: string list; cr: string list; ce: string list; morfs: morf list}

and morf =
    Phrase of phrase
  (* | PhraseMode of string * phrase *)
  | E of phrase
  | LexPhrase of (pos * lex) list * (restr * position list)
  (* | LexRPhrase of (pos * lex) list * (restr * position list) *)
  (* | LexPhraseMode of string * (pos * lex) list * (restr * position list) *)
  (* | LexRPhraseMode of string * (pos * lex) list * (restr * position list) *)
  | PhraseAbbr of phrase_abbr * morf list
  | PhraseComp of phrase_comp * (comp_type * comp list)
  | LexPhraseId of string * pos * lex
  | LexArg of string * pos * string
  | MorfId of int
(*  | LexRealization of morf * string*)
(*  | Raised of string list * direction * string list
    | Multi of phrase list*)

let empty_position =
  {psn_id=(-1); gf=ARG; role=""; role_attr=""; mode=[]; sel_prefs=[]; cr=[]; ce=[]; morfs=[]}

type lex_record = {
  lex_argument: morf;
  lex_arguments: morf list;
  lex_lemma: lex;
  lex_numeral_lemma: lex;
  lex_mode: string list;
  lex_negation: negation;
  lex_degree: grad;
  lex_number: number;
  lex_reflex: refl;
  lex_gender: gender;
  lex_modification: restr * position list;
}

let empty_lex = {lex_argument=Phrase Null; lex_arguments=[]; lex_lemma=Lexeme "";
                 lex_numeral_lemma=Lexeme ""; lex_mode=[]; lex_negation=NegationUndef;
                 lex_degree=GradUndef; lex_number=NumberUndef; lex_reflex=ReflUndef;
                 lex_gender=GenderUndef; lex_modification = Natr,[]}

type meaning = {mng_id: int;
                name: string;
                variant: string;
                plwnluid: int;
                gloss: string}

let empty_meaning = {mng_id = (-1);
                     name = "";
                     variant = "";
                     plwnluid = (-1);
                     gloss = ""}

type frame_atrs =
    EmptyAtrs of meaning list
  | DefaultAtrs of meaning list * refl * opinion * negation * pred * aspect
  | ComprepAtrs of string
  | NounAtrs of meaning list * string * nsem (** string list*)
  | AdjAtrs of meaning list * case * string (** string * string list*)
  | PersAtrs of meaning list * string * negation * mood * tense * aux * aspect
  | GerAtrs of meaning list * string * negation * aspect
  | NonPersAtrs of meaning list * string * string * string * negation * aspect

type schema = {sch_id: int; opinion: opinion; reflexiveMark: refl; aspect: aspect;
               negativity: negation; predicativity: pred; positions: position list; text_rep: string}

type schema2 =
    Frame of frame_atrs * position list
  | LexFrame of string * pos * restr * position list
  | ComprepFrame of string * pos * restr * position list
(*  | FrameR of frame_atrs * (string * string * string list * string list * morf list) list
  | LexFrameR of string * pos * restr * (string * string * string list * string list * morf list) list
  | ComprepFrameR of string * pos * restr * (string * string * string list * string list * morf list) list *)



module OrderedAbbr = struct
  type  t = phrase_abbr
  let compare = compare
end

(* module MorfSet = Xset.Make(OrderedMorf) *)
module AbbrMap = Xmap.Make(OrderedAbbr)

module OrderedComp = struct
  type  t = comp_type
  let compare = compare
end

(* module MorfSet = Xset.Make(OrderedMorf) *)
module CompMap = Xmap.Make(OrderedComp)

let resource_path =
  try Sys.getenv "ENIAM_RESOURCE_PATH"
  with Not_found -> "/usr/share/eniam"

let realizations_filename = resource_path ^ "/Walenty/phrase_types_expand_20150909.txt"

let walenty_filename =
  try Sys.getenv "WALENTY_LOCALIZATION"
  with Not_found -> "/usr/share/walenty/walenty.xml"

let subst_uncountable_lexemes_filename = resource_path ^ "/Walenty/subst_uncountable.dat"
let subst_uncountable_lexemes_filename2 = resource_path ^ "/Walenty/subst_uncountable_stare.dat"
let subst_container_lexemes_filename = resource_path ^ "/Walenty/subst_container.dat"
let subst_numeral_lexemes_filename = resource_path ^ "/Walenty/subst_numeral.dat"
let subst_time_lexemes_filename = resource_path ^ "/Walenty/subst_time.dat"


type example = {exm_id: int;
                meaning: int;
                phrases: (int * int * int) list;
                sentence: string;
                source: string;
                opinion: opinion;
                note: string}

type argument = {arg_id: int;
                 role: string;
                 role_attribute: string;
                 sel_prefs: sel_prefs list}

type frame  = {frm_id: int;
               opinion: string;
               meanings: int list;
               arguments: argument list}

type connection = {argument: int;
                   phrases: (int * int list) list}

type alternation = {schema: int; frame: int; connections: connection list}

type entry = {ent_id: int;
              status: string;
              form_orth: string;
              form_pos: string;
              schemata: schema list;
              examples: example list;
              frames: frame list;
              meanings: meaning list;
              alternations: alternation list}

let empty_entry = {ent_id=(-1); status=""; form_orth=""; form_pos=""; schemata=[]; examples=[];
                   frames=[]; meanings=[]; alternations=[]}