ENIAM_LCGgrammarPLtypes.ml 5.07 KB

Edit Raw Blame History

(*
 *  ENIAM_LCGgrammarPL is a library that provides LCG lexicon form Polish
 *  Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
 *
 *  This library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

type categories = {lemma: string; pos: string; pos2: string; cat: string list;
                   numbers: string list; cases: string list; genders: string list; persons: string list;
                   grads: string list; praeps: string list; acms: string list;
                   aspects: string list; negations: string list; moods: string list; tenses: string list;
                   nsyn: string list; nsem: string list;
                  }

type cat =
    Lemma | (*NewLemma |*) Pos | Pos2 | Cat | Number | Case | Gender | Person | Grad | Praep |
    Acm | Aspect | Negation | Mood | Tense | Nsyn | Nsem | Ctype |
    Inumber | Igender | Iperson | Nperson | Plemma |
    Unumber | Ucase | Ugender | Uperson

type rule_sem =
    BasicSem of cat list
  | RaisedSem of cat list * cat list
  | QuotSem of cat list
  | InclusionSem of cat list
  | ConjSem of cat list

type selector_relation = Eq | Neq (*| StrictEq*)

(* x="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jedną z wartości atrybutu x, reguła zostanie wykonana dla x z usuniętymi pozostałymi wartościami *)
(* x!="s" oznacza, że żeby reguła została użyta token musi mieć jako jedną z wartości atrybutu x symbol inny od "s", reguła zostanie wykonana dla x z usuniętą wartością "s" *)
(* x=="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jednyną z wartość atrybutu x *)

(* wzajemne zależności między kategoriami (np między case i person w subst) są rozstrzygane w ENIAMcategories *)

(* Basic oznacza że kwantyfikacja i term są generowane zgodnie ze standardowymi regułami:
   - kwantyfikacja przebiega po wszystkich zdefiniowanych kategoriariach i wartościach wziętych z cats
   - typ jest zadany bezpośrednio
   - term tworzy wierzchołek w strukturze zależnościowej etykietowany wszystkimi zdefiniowanymi kategoriami

   Quant oznacza że typ i term są generowane zgodnie ze standardowymi regułami:
   - kwantyfikacja jest zadana bezpośrednio
   - typ jest zadany bezpośrednio
   - term tworzy wierzchołek w strukturze zależnościowej etykietowany wszystkimi zdefiniowanymi kategoriami

*)

let empty_cats = {lemma=""; pos=""; pos2=""; cat=[];
                  numbers=[]; cases=[]; genders=[]; persons=[];
                  grads=[]; praeps=[]; acms=[]; aspects=[]; negations=[]; moods=[]; tenses=[];
                  nsyn=[]; nsem=[];
                 }

let string_of_cat = function
    Lemma -> "lemma"
  (* | NewLemma -> "newlemma" *)
  | Pos -> "pos"
  | Pos2 -> "pos2"
  | Cat -> "cat"
  | Number -> "number"
  | Case -> "case"
  | Gender -> "gender"
  | Person -> "person"
  | Grad -> "grad"
  | Praep -> "praep"
  | Acm -> "acm"
  | Aspect -> "aspect"
  | Negation -> "negation"
  | Mood -> "mood"
  | Tense -> "tense"
  | Nsyn -> "nsyn"
  | Nsem -> "nsem"
  | Ctype -> "ctype"
  | Inumber -> "inumber"
  | Igender -> "igender"
  | Iperson -> "iperson"
  | Nperson -> "nperson"
  | Plemma -> "plemma"
  | Unumber -> "unumber"
  | Ucase -> "ucase"
  | Ugender -> "ugender"
  | Uperson -> "uperson"

let resource_path =
  try Sys.getenv "ENIAM_RESOURCE_PATH"
  with Not_found -> "/usr/share/eniam"

(* FIXME: poprawić katalog *)
(*let subst_uncountable_lexemes_filename = resource_path ^ "/lexSemantics/subst_uncountable.dat"
  let subst_uncountable_lexemes_filename2 = resource_path ^ "/lexSemantics/subst_uncountable_stare.dat"
  let subst_container_lexemes_filename = resource_path ^ "/lexSemantics/subst_container.dat"
  let subst_numeral_lexemes_filename = resource_path ^ "/lexSemantics/subst_numeral.dat"
  let subst_time_lexemes_filename = resource_path ^ "/lexSemantics/subst_time.dat"*)
let subst_uncountable_lexemes_filename = resource_path ^ "/Walenty/subst_uncountable.dat"
let subst_uncountable_lexemes_filename2 = resource_path ^ "/Walenty/subst_uncountable_stare.dat"
let subst_container_lexemes_filename = resource_path ^ "/Walenty/subst_container.dat"
let subst_numeral_lexemes_filename = resource_path ^ "/Walenty/subst_numeral.dat"
let subst_time_lexemes_filename = resource_path ^ "/Walenty/subst_time.dat"

let proper_names_filename = resource_path ^ "/lexSemantics/proper_names_sgjp_polimorf.tab"
let proper_names_filename2 = resource_path ^ "/lexSemantics/proper_names.tab"