ENIAMwalTypes.ml 8.64 KB
(*
 *  ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty".
 *  Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
 *
 *  This library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

open Xstd

type opinion = Dobry | Pewny | Potoczny | Watpliwy | Archaiczny | Zly | Wulgarny | Domyslny | OpinionUndef
type negation = Negation | Aff | NegationUndef | NegationNA
type pred = PredTrue | PredFalse | PredUndef | PredNA
type aspect = Aspect of string | AspectUndef | AspectNA
type case = Case of string | Str | Part | CaseAgr | NomAgr | GenAgr | AllAgr | CaseUndef | AllUAgr | CaseUAgr
type comp = Comp of string | Zeby | Gdy | CompUndef
type comp_type = Int | Rel | CompTypeUndef (*| CompTypeAgr*)
type number = Number of string | NumberUndef | NumberAgr
type gender = Gender of string | GenderUndef | GenderAgr | Genders of string list
type grad = Grad of string | GradUndef
type refl = ReflEmpty | ReflTrue | ReflFalse | ReflUndef
type acm = Acm of string | AcmUndef

type mood = (*Mood of*) string (*| MoodUndef*)
type tense = string
type aux = NoAux | PastAux | FutAux | ImpAux

  type nsem = Common of string | Time

type gf = SUBJ | OBJ | ARG

type pos =
    SUBST of number * case
  | PREP of case
  | NUM of case * gender * acm
  | ADJ of number * case * gender * grad
  | ADV of grad
  | GER of number * case * gender * aspect * negation * refl
  | PACT of number * case * gender * aspect * negation * refl
  | PPAS of number * case * gender * aspect * negation
  | INF of aspect * negation * refl
  | QUB
  | COMPAR
  | COMP of comp_type
  | PERS of (*number * gender * aspect * person * *)negation * refl

type phrase =
    NP of case
  | PrepNP of string * case
  | AdjP of case
  | PrepAdjP of string * case
  | NumP of case
  | PrepNumP of string * case
  | ComprepNP of string
  | ComparNP of string * case
  | ComparPP of string
  | CP of comp_type * comp
  | NCP of case * comp_type * comp
  | PrepNCP of string * case * comp_type * comp
  | InfP of aspect
  | AdvP
  | FixedP of string
  | Num of case * acm
  | Or
  | Refl
  | Recip
  | Qub
  | Pro
  | ProNG
  | Null
  | GerP of case
  | PrepGerP of string * case
  | PpasP of case
  | PrepPpasP of string * case
  | PactP of case

type phrase_abbr =
    Xp of string
  | Advp of string
  | ComparP of string
  | Nonch
  | Distrp
  | Possp

type phrase_comp =
    Cp
  | Ncp of case
  | Prepncp of string * case

type lex =
    Lexeme of string
  | ORconcat of lex list
  | ORcoord of lex list
  | XOR of lex list
  | Elexeme of gender

type restr = Natr | Ratr | Ratrs | Ratr1 | Atr | Atr1 | NoRestr

type direction = Forward | Backward | Both

(*type lex_specs =
    NSpecs of number
  | AdvSpecs of grad
  | AdjSpecs of number * gender * grad
  | PpasSpecs of number * gender * negation
  | PactSpecs of number * gender * negation * refl
  | GerSpecs of number * negation * refl
  | CSpecs of negation * refl
  | NumSpecs of gender
  | EmptySpecs *)

type id = {hash: bool; suffix: string; numbers: int list}

let empty_id = {hash = false; suffix = ""; numbers = []}

type position = {psn_id: id; gf: gf; role: string; role_attr: string; sel_prefs: string list;
                 cr: string list; ce: string list; dir: direction; morfs: (id * morf) list}

and morf =
    Phrase of phrase
  | E of phrase
  | LexPhrase of (pos * lex) list * (restr * position list)
  | LexPhraseMode of string * (pos * lex) list * (restr * position list)
  | PhraseAbbr of phrase_abbr * morf list
  | PhraseComp of phrase_comp * (comp_type * comp list)
  | LexPhraseId of string * pos * lex
  | LexArg of string * pos * string
(*  | LexRealization of morf * string*)
(*  | Raised of string list * direction * string list
    | Multi of phrase list*)

let empty_position =
  {psn_id=empty_id; gf=ARG; role=""; role_attr="";sel_prefs=[]; cr=[]; ce=[]; dir=Both; morfs=[]}

type lex_record = {
  lex_argument: morf;
  lex_arguments: morf list;
  lex_lemma: lex;
  lex_numeral_lemma: lex;
  lex_negation: negation;
  lex_degree: grad;
  lex_number: number;
  lex_reflex: refl;
  lex_gender: gender;
  lex_modification: restr * position list;
}

let empty_lex = {lex_argument=Phrase Null; lex_arguments=[]; lex_lemma=Lexeme "";
                 lex_numeral_lemma=Lexeme ""; lex_negation=NegationUndef;
                 lex_degree=GradUndef; lex_number=NumberUndef; lex_reflex=ReflUndef;
                 lex_gender=GenderUndef; lex_modification = Natr,[]}

type frame_atrs =
    EmptyAtrs of string list
  | DefaultAtrs of string list * refl * opinion * negation * pred * aspect
  | ComprepAtrs of string
  | NounAtrs of string list * string * nsem (** string list*)
  | AdjAtrs of string list * case * string (** string * string list*)
  | PersAtrs of string list * string * negation * mood * tense * aux * aspect
  | GerAtrs of string list * string * negation * aspect
  | NonPersAtrs of string list * string * string * string * negation * aspect

type schema = {sch_id: int; opinion: opinion; reflexiveMark: refl; aspect: aspect;
               negativity: negation; predicativity: pred; positions: position list; text_rep: string}

type schema2 =
    Frame of frame_atrs * position list
  | LexFrame of string * pos * restr * position list
  | ComprepFrame of string * pos * restr * position list
(*  | FrameR of frame_atrs * (string * string * string list * string list * morf list) list
  | LexFrameR of string * pos * restr * (string * string * string list * string list * morf list) list
  | ComprepFrameR of string * pos * restr * (string * string * string list * string list * morf list) list *)



module OrderedAbbr = struct
  type  t = phrase_abbr
  let compare = compare
end

(* module MorfSet = Xset.Make(OrderedMorf) *)
module AbbrMap = Xmap.Make(OrderedAbbr)

module OrderedComp = struct
  type  t = comp_type
  let compare = compare
end

(* module MorfSet = Xset.Make(OrderedMorf) *)
module CompMap = Xmap.Make(OrderedComp)

let resource_path =
  try Sys.getenv "ENIAM_RESOURCE_PATH"
  with Not_found -> "/usr/share/eniam"

let realizations_filename = resource_path ^ "/Walenty/phrase_types_expand_20150909.txt"

let walenty_filename =
  try Sys.getenv "WALENTY_LOCALIZATION"
  with Not_found -> "/usr/share/walenty/walenty.xml"

let subst_uncountable_lexemes_filename = resource_path ^ "/Walenty/subst_uncountable.dat"
let subst_uncountable_lexemes_filename2 = resource_path ^ "/Walenty/subst_uncountable_stare.dat"
let subst_container_lexemes_filename = resource_path ^ "/Walenty/subst_container.dat"
let subst_numeral_lexemes_filename = resource_path ^ "/Walenty/subst_numeral.dat"
let subst_time_lexemes_filename = resource_path ^ "/Walenty/subst_time.dat"


type example = {exm_id: int;
                meaning: id;
                phrases: id list;
                sentence: string;
                source: string;
                opinion: opinion;
                note: string}

type sel_prefs =
    NumericP of int
  | SymbolP of string
  | RelationP of string * id

type argument = {arg_id: id;
                 role: string;
                 role_attribute: string;
                 sel_prefs: sel_prefs list list}

type frame  = {frm_id: int;
               opinion: string;
               meanings: id list;
               arguments: argument list}

type meaning = {mng_id: int;
                name: string;
                variant: string;
                plwnluid: int;
                gloss: string}

let empty_meaning = {mng_id = (-1);
                     name = "";
                     variant = "";
                     plwnluid = (-1);
                     gloss = ""}

type connection = {argument: id;
                   phrases: id list}

type entry = {ent_id: int;
              status: string;
              form_orth: string;
              form_pos: string;
              schemata: schema list;
              examples: example list;
              frames: frame list;
              meanings: meaning list;
              alternations: connection list list}

let empty_entry = {ent_id=(-1); status=""; form_orth=""; form_pos=""; schemata=[]; examples=[];
                   frames=[]; meanings=[]; alternations=[]}