ENIAMwalTypes.ml 6.75 KB
(*
 *  ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty".
 *  Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
 *
 *  This library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

open Xstd

type token =
    Text of string
  | Paren of token list
  | Bracet of token list
  | SqBra of token list
  | LParen | RParen | LBracet | RBracet | LSqBra | RSqBra
  | Semic | Plus | Comma | Quot

type opinion = Pewny | Potoczny | Watpliwy | Archaiczny | Zly | Wulgarny | Domyslny
type negation = Negation | Aff | NegationUndef | NegationNA
type pred = Pred | PredNA
type aspect = Aspect of string | AspectUndef | AspectNA
type case = Case of string | Str | Part | CaseAgr | NomAgr | GenAgr | AllAgr | CaseUndef | AllUAgr | CaseUAgr
type comp = Comp of string | Zeby | Gdy | CompUndef
type comp_type = Int | Rel | Sub | Coord | CompTypeUndef | CompTypeAgr
type number = Number of string | NumberUndef | NumberAgr
type gender = Gender of string | GenderUndef | GenderAgr | Genders of string list
type grad = Grad of string | GradUndef
type refl = ReflEmpty | ReflSie
type acm = Acm of string | AcmUndef
type sem = Sem | NoSem
(* type req = Req | NReq | ReqUndef *)

type mood = (*Mood of*) string (*| MoodUndef*)
type tense = string
type aux = NoAux | PastAux | FutAux | ImpAux

type nsem = Common of string | Time

type gf = SUBJ | OBJ | ARG | CORE | NOSEM | ADJUNCT | RAISED | NOGF | CLAUSE | SENTENCE

type pos =
    SUBST of number * case
  | PREP of case
  | NUM of case * gender * acm
  | ADJ of number * case * gender * grad
  | ADV of grad
  | GER of number * case * gender * aspect * negation * refl
  | PACT of number * case * gender * aspect * negation * refl
  | PPAS of number * case * gender * aspect * negation
  | INF of aspect * negation * refl
  | QUB
  | COMPAR
  | COMP of comp_type
  | PERS of (*number * gender * aspect * person * *)negation * refl

type phrase =
    NP of case
  | PrepNP of sem * string * case
  | AdjP of case
  | PrepAdjP of sem * string * case
  | NumP of case
  | PrepNumP of sem * string * case
  | ComprepNP of sem * string
  | ComparNP of sem * string * case
  | ComparPP of sem * string
  | IP
  | CP of comp_type * comp
  | NCP of case * comp_type * comp
  | PrepNCP of sem * string * case * comp_type * comp
  | InfP of aspect (** req*)
  | PadvP
  | AdvP
  | FixedP of string
  | PrepP
  | Prep of string * case
  | Num of case * acm
  | Measure of case
  | Or
(*   | Refl *)
(*   | Recip *)
  | Qub
  | Adja
(*   | Nie *)
  | AuxPast
  | AuxFut
  | AuxImp
  | Aglt
  | Inclusion
  | Pro
  | ProNG
  | Null
  | X
  | Lex of string

type phrase_abbr =
    Xp of string
  | Advp of string
  | ComparP of string
  | Nonch
  | Distrp
  | Possp

type phrase_comp =
    Cp
  | Ncp of case
  | Prepncp of string * case

type lex =
    Lexeme of string
  | ORconcat of lex list
  | ORcoord of lex list
  | XOR of lex list
  | Elexeme of gender

type restr = Natr | Ratr | Ratrs | Ratr1 | Atr | Atr1 | NoRestr

type direction = Forward | Backward | Both

(*type lex_specs =
    NSpecs of number
  | AdvSpecs of grad
  | AdjSpecs of number * gender * grad
  | PpasSpecs of number * gender * negation
  | PactSpecs of number * gender * negation * refl
  | GerSpecs of number * negation * refl
  | CSpecs of negation * refl
  | NumSpecs of gender
  | EmptySpecs *)

(* type schema = ((string * string * string list) * string list * string list * morf list) list  *)
type schema_field = {gf: gf; role: string; role_attr: string; sel_prefs: string list;
                     cr: string list; ce: string list; dir: direction; morfs: morf list}

and morf =
    Phrase of phrase
  | E of phrase
  | LexPhrase of (pos * lex) list * (restr * schema_field list)
  | LexPhraseMode of string * (pos * lex) list * (restr * schema_field list)
  | PhraseAbbr of phrase_abbr * morf list
  | PhraseComp of phrase_comp * (comp_type * comp list)
  | LexPhraseId of string * pos * lex
  | LexArg of string * pos * string
(*  | LexRealization of morf * string*)
  | Raised of string list * direction * string list
  | Multi of phrase list

(* and mode = Mode of string * morf list | Pron of morf list | Misc *)

type frame_atrs =
    EmptyAtrs of string list
  | DefaultAtrs of string list * refl * opinion * negation * pred * aspect
  | ComprepAtrs of string
  | NounAtrs of string list * string * nsem (** string list*)
  | AdjAtrs of string list * case * string (** string * string list*)
  | PersAtrs of string list * string * negation * mood * tense * aux * aspect
  | GerAtrs of string list * string * negation * aspect
  | NonPersAtrs of string list * string * string * string * negation * aspect

type frame =
    Frame of frame_atrs * schema_field list
  | LexFrame of string * pos * restr * schema_field list
  | ComprepFrame of string * pos * restr * schema_field list
(*  | FrameR of frame_atrs * (string * string * string list * string list * morf list) list
  | LexFrameR of string * pos * restr * (string * string * string list * string list * morf list) list
  | ComprepFrameR of string * pos * restr * (string * string * string list * string list * morf list) list *)

module OrderedAbbr = struct
  type  t = phrase_abbr
  let compare = compare
end

(* module MorfSet = Xset.Make(OrderedMorf) *)
module AbbrMap = Xmap.Make(OrderedAbbr)

module OrderedComp = struct
  type  t = comp_type
  let compare = compare
end

(* module MorfSet = Xset.Make(OrderedMorf) *)
module CompMap = Xmap.Make(OrderedComp)

let resource_path =
  try Sys.getenv "ENIAM_RESOURCE_PATH"
  with Not_found -> "/usr/share/eniam"

let realizations_filename = resource_path ^ "/Walenty/phrase_types_expand_20150909.txt"

let walenty_filename =
  try Sys.getenv "WALENTY_LOCALIZATION"
  with Not_found -> "/usr/share/walenty/walenty.xml"

let subst_uncountable_lexemes_filename = resource_path ^ "/Walenty/subst_uncountable.dat"
let subst_uncountable_lexemes_filename2 = resource_path ^ "/Walenty/subst_uncountable_stare.dat"
let subst_container_lexemes_filename = resource_path ^ "/Walenty/subst_container.dat"
let subst_numeral_lexemes_filename = resource_path ^ "/Walenty/subst_numeral.dat"
let subst_time_lexemes_filename = resource_path ^ "/Walenty/subst_time.dat"