ENIAMcategoriesPL.ml 22.3 KB
(*
 *  ENIAM_LCGgrammarPL is a library that provides LCG lexicon form Polish
 *  Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
 *
 *  This library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

open ENIAMlexSemanticsTypes
open Xstd

let all_genders = ["m1";"m2";"m3";"f";"n1";"n2";"p1";"p2";"p3"]
let all_cases = ["nom";"gen";"dat";"acc";"inst";"loc";"voc"]
let all_persons = ["pri";"sec";"ter"]
let all_numbers = ["sg";"pl"]
(* FIXME: zamiast wszystkich możliwych wartości można używać Zero gdy nie ma uzgodnienia *)

let expand_numbers numbers =
  if Xlist.mem numbers "_" then ["sg";"pl"] else numbers

let expand_genders genders  =
  if Xlist.mem genders "_" then all_genders else genders

let expand_cases cases  =
  if Xlist.mem cases "_" || Xlist.mem cases "$C" then all_cases else cases

let expand_akcs akcs  =
  if Xlist.mem akcs "_" then ["akc";"nakc"] else akcs

let split_voc cases =
  Xlist.fold cases ([],[]) (fun (cases,voc) -> function
        "voc" -> cases, "voc" :: voc
      | s -> s :: cases, voc)

let subst_uncountable_lexemes = StringSet.of_list (File.load_lines subst_uncountable_lexemes_filename)
let subst_uncountable_lexemes2 = StringSet.of_list (File.load_lines subst_uncountable_lexemes_filename2)
let subst_container_lexemes = StringSet.of_list (File.load_lines subst_container_lexemes_filename)
let subst_numeral_lexemes = StringSet.of_list (File.load_lines subst_numeral_lexemes_filename)
let subst_time_lexemes = StringSet.of_list (File.load_lines subst_time_lexemes_filename)

let subst_pronoun_lexemes = StringSet.of_list ["co"; "kto"; "cokolwiek"; "ktokolwiek"; "nic"; "nikt"; "coś"; "ktoś"; "to"]
let adj_pronoun_lexemes = StringSet.of_list ["czyj"; "jaki"; "który"; "jakiś"; "ten"; "taki"]

(* let adj_quant_lexemes = StringSet.of_list ["każdy"; "wszelki"; "wszystek"; "żaden"; "jakiś"; "pewien"; "niektóry"; "jedyny"; "sam"] *)

let noun_type proper lemma pos =
  let nsyn =
    if proper then "proper" else
    if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then "pronoun" else
    if pos = "symbol" || pos = "date" || pos = "date-interval" || pos = "hour" || pos = "hour-minute" || pos = "hour-interval" || pos = "hour-minute-interval" ||
       pos = "year" || pos = "year-interval" || pos = "day" || pos = "day-interval" || pos = "day-month" || pos = "day-month-interval" ||
       pos = "match-result" || pos = "month-interval" || pos = "roman" || pos = "roman-interval" || pos = "url" || pos = "email" || pos = "obj-id" || pos = "date" then "proper" else
    if StringSet.mem subst_pronoun_lexemes lemma then "pronoun" else
    "common" in
  let nsem =
    if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then ["count"] else
    if StringSet.mem subst_time_lexemes lemma then ["time"] else
    let l = ["count"] in
    let l = if StringSet.mem subst_uncountable_lexemes lemma || StringSet.mem subst_uncountable_lexemes2 lemma then "mass" :: l else l in
    if StringSet.mem subst_container_lexemes lemma then "measure" :: l else l in
  [nsyn],nsem

let clarify_categories proper = function
    lemma,"subst",[numbers;cases;genders] ->
      let numbers = expand_numbers numbers in
      let cases = expand_cases cases in
      let genders = expand_genders genders in
      let cases,voc = split_voc cases in
      let nsyn,nsem = noun_type proper lemma "subst" in
      (if cases = [] then [] else
         [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @
      (if voc = [] then [] else
         [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; numbers=numbers; cases=cases; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}])
  | lemma,"depr",[numbers;cases;genders] ->
      let numbers = expand_numbers numbers in
      let cases = expand_cases cases in
      let genders = expand_genders genders in
      let cases,voc = split_voc cases in
      let nsyn,nsem = noun_type proper lemma "depr" in
      (if cases = [] then [] else
         [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; nsyn=nsyn; nsem=nsem}]) @
      (if voc = [] then [] else
         [{empty_cats with lemma=lemma; pos="subst"; pos2="noun"; numbers=numbers; cases=cases; genders=genders; persons=["sec"]; nsyn=nsyn; nsem=nsem}])
  | lemma,"ppron12",[numbers;cases;genders;persons] ->
      let numbers = expand_numbers numbers in
      let cases = expand_cases cases in
      let genders = expand_genders genders in
      [{empty_cats with lemma=lemma; pos="ppron12"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons}]
  | lemma,"ppron12",[numbers;cases;genders;persons;akcs] ->
      let numbers = expand_numbers numbers in
      let cases = expand_cases cases in
      let genders = expand_genders genders in
      [{empty_cats with lemma=lemma; pos="ppron12"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons}]
  | lemma,"ppron3",[numbers;cases;genders;persons] ->
      let numbers = expand_numbers numbers in
      let cases = expand_cases cases in
      let genders = expand_genders genders in
      [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=["praep-npraep"]}]
  | lemma,"ppron3",[numbers;cases;genders;persons;akcs] ->
      let numbers = expand_numbers numbers in
      let cases = expand_cases cases in
      let genders = expand_genders genders in
      [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=["praep-npraep"]}]
  | lemma,"ppron3",[numbers;cases;genders;persons;akcs;praep] ->
      let numbers = expand_numbers numbers in
      let cases = expand_cases cases in
      let genders = expand_genders genders in
      let praep = match praep with
        ["praep";"npraep"] -> ["praep-npraep"]
      | ["npraep";"praep"] -> ["praep-npraep"]
      | _ -> praep in
      [{empty_cats with lemma=lemma; pos="ppron3"; pos2="pron"; numbers=numbers; cases=cases; genders=genders; persons=persons; praeps=praep}]
  | lemma,"siebie",[cases] -> (* FIXME: czy tu określać numbers genders persons? *)
      let cases = expand_cases cases in
      [{empty_cats with lemma=lemma; pos="siebie"; pos2="pron"; numbers=all_numbers; cases=cases; genders=all_genders; persons=["ter"]}]
  | lemma,"prep",[cases;woks] ->
      if lemma = "jak" || lemma = "jako" || lemma = "niż" || lemma = "niczym" || lemma = "niby" || lemma = "co" || lemma = "zamiast" then
        [{empty_cats with lemma=lemma; pos="compar"; pos2="prep"}] else
      let cases = expand_cases cases in
      [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; cases=cases}]
  | lemma,"prep",[cases] ->
      if lemma = "jak" || lemma = "jako" || lemma = "niż" || lemma = "niczym" || lemma = "niby" || lemma = "co" || lemma = "zamiast" then
        [{empty_cats with lemma=lemma; pos="compar"; pos2="prep"}] else
      let cases = expand_cases cases in
      [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; cases=cases}]
  | lemma,"num",[numbers;cases;genders;acms] ->
      let numbers = expand_numbers numbers in
      let cases = expand_cases cases in
      let genders = expand_genders genders in
      [{empty_cats with lemma=lemma; pos="num"; pos2="num"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms}]
  | lemma,"intnum",[] ->
      let numbers,acms =
        if lemma = "1" || lemma = "-1" then ["sg"],["congr"] else
        let s = String.get lemma (String.length lemma - 1) in
        ["pl"],if s = '2' || s = '3' || s = '4' then ["rec";"congr"] else ["rec"] in
      [{empty_cats with lemma=lemma; pos="intnum"; pos2="num"; numbers=numbers; cases=all_cases; genders=all_genders; persons=["ter"]; acms=acms}]
  | lemma,"realnum",[] ->
      [{empty_cats with lemma=lemma; pos="realnum"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]}]
  | lemma,"intnum-interval",[] ->
      [{empty_cats with lemma=lemma; pos="intnum-interval"; pos2="num"; numbers=["pl"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec";"congr"]}]
  | lemma,"realnum-interval",[] ->
      [{empty_cats with lemma=lemma; pos="realnum-interval"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]}]
  | lemma,"symbol",[] ->
      [{empty_cats with lemma=lemma; pos="symbol"; pos2="noun"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]}]
  | lemma,"ordnum",[] ->
      [{empty_cats with lemma=lemma; pos="ordnum"; pos2="adj"; numbers=all_numbers; cases=all_cases; genders=all_genders; grads=["pos"]}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *)
  | lemma,"date",[] ->
      let nsyn,nsem = noun_type proper lemma "date" in
      [{empty_cats with lemma=lemma; pos="date"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"date-interval",[] ->
      let nsyn,nsem = noun_type proper lemma "date-interval" in
      [{empty_cats with lemma=lemma; pos="date-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"hour-minute",[] ->
      let nsyn,nsem = noun_type proper lemma "hour-minute" in
      [{empty_cats with lemma=lemma; pos="hour-minute"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"hour",[] ->
      let nsyn,nsem = noun_type proper lemma "hour" in
      [{empty_cats with lemma=lemma; pos="hour"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"hour-minute-interval",[] ->
      let nsyn,nsem = noun_type proper lemma "hour-minute-interval" in
      [{empty_cats with lemma=lemma; pos="hour-minute-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"hour-interval",[] ->
      let nsyn,nsem = noun_type proper lemma "hour-interval" in
      [{empty_cats with lemma=lemma; pos="hour-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"year",[] ->
      let nsyn,nsem = noun_type proper lemma "year" in
      [{empty_cats with lemma=lemma; pos="year"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"year-interval",[] ->
      let nsyn,nsem = noun_type proper lemma "year-interval" in
      [{empty_cats with lemma=lemma; pos="year-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"day",[] ->
      let nsyn,nsem = noun_type proper lemma "day" in
      [{empty_cats with lemma=lemma; pos="day"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"day-interval",[] ->
      let nsyn,nsem = noun_type proper lemma "day-interval" in
      [{empty_cats with lemma=lemma; pos="day-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"day-month",[] ->
      let nsyn,nsem = noun_type proper lemma "day-month" in
      [{empty_cats with lemma=lemma; pos="day-month"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"day-month-interval",[] ->
      let nsyn,nsem = noun_type proper lemma "day-month-interval" in
      [{empty_cats with lemma=lemma; pos="day-month-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"month-interval",[] ->
      let nsyn,nsem = noun_type proper lemma "month-interval" in
      [{empty_cats with lemma=lemma; pos="month-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"roman",[] ->
      let nsyn,nsem = noun_type proper lemma "roman" in
      [{empty_cats with lemma=lemma; pos="roman-ordnum"; pos2="adj"; numbers=all_numbers; cases=all_cases; genders=all_genders; grads=["pos"]};
       {empty_cats with lemma=lemma; pos="roman"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"roman-interval",[] ->
      let nsyn,nsem = noun_type proper lemma "roman-interval" in
      [{empty_cats with lemma=lemma; pos="roman-interval"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"match-result",[] ->
      let nsyn,nsem = noun_type proper lemma "match-result" in
      [{empty_cats with lemma=lemma; pos="match-result"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"url",[] ->
      let nsyn,nsem = noun_type proper lemma "url" in
      [{empty_cats with lemma=lemma; pos="url"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"email",[] ->
      let nsyn,nsem = noun_type proper lemma "email" in
      [{empty_cats with lemma=lemma; pos="email"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"obj-id",[] ->
      let nsyn,nsem = noun_type proper lemma "obj-id" in
      [{empty_cats with lemma=lemma; pos="obj-id"; pos2="symbol"; nsyn=nsyn; nsem=nsem}]
  | lemma,"adj",[numbers;cases;genders;grads] -> (* FIXME: adjsyn *)
      let numbers = expand_numbers numbers in
      let cases = expand_cases cases in
      let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in
      let genders = expand_genders genders in
      let pos,pos2 = if StringSet.mem adj_pronoun_lexemes lemma then "apron","pron" else "adj","adj" in
      [{empty_cats with lemma=lemma; pos=pos; pos2=pos2; numbers=numbers; cases=cases; genders=genders; grads=grads}] (* FIXME: czy dać możliwość więcej niż jednego stopnia *)
  | lemma,"adjc",[] ->
      [{empty_cats with lemma=lemma; pos="adjc"; pos2="adj"; numbers=["sg"]; cases=["pred"]; genders=["m1";"m2";"m3"]; grads=["pos"]}]
  | lemma,"adjp",[] ->
      [{empty_cats with lemma=lemma; pos="adjp"; pos2="adj"; numbers=all_numbers; cases=["postp"]; genders=all_genders; grads=["pos"]}]
  | lemma,"adja",[] -> [{empty_cats with lemma=lemma; pos="adja"; pos2="adja"}]
  | lemma,"adv",[grads] -> [{empty_cats with lemma=lemma; pos="adv"; pos2="adv"; grads=grads}]
  | lemma,"adv",[] -> [{empty_cats with lemma=lemma; pos="adv"; pos2="adv"; grads=["pos"]}]
  | lemma,"ger",[numbers;cases;genders;aspects;negations] ->
      let numbers = expand_numbers numbers in
      let cases = expand_cases cases in
      let genders = expand_genders genders in
      [{empty_cats with lemma=lemma; pos="ger"; pos2="verb"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; aspects=aspects; negations=negations}] (* FIXME: kwestia osoby przy voc *)
  | lemma,"pact",[numbers;cases;genders;aspects;negations] ->
      let numbers = expand_numbers numbers in
      let cases = expand_cases cases in
      let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in
      let genders = expand_genders genders in
      [{empty_cats with lemma=lemma; pos="pact"; pos2="verb"; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}]
  | lemma,"ppas",[numbers;cases;genders;aspects;negations] ->
      let numbers = expand_numbers numbers in
      let cases = expand_cases cases in
      let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in
      let genders = expand_genders genders in
      [{empty_cats with lemma=lemma; pos="ppas"; pos2="verb"; numbers=numbers; cases=cases; genders=genders; aspects=aspects; negations=negations}]
  | lemma,"fin",[numbers;persons;aspects] ->  (* FIXME: genders bez przymnogich *)
      let numbers = expand_numbers numbers in
      let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in
      let cats = {empty_cats with lemma=lemma; pos="fin"; pos2="verb"; numbers=numbers; genders=all_genders; persons=persons; negations=["aff"; "neg"]; moods=["indicative"]} in
      (Xlist.map aspects (function
            "imperf" -> {cats with aspects=["imperf"]; tenses=["pres"]}
          | "perf" -> {cats with aspects=["perf"]; tenses=["fut"]}
          | _ -> failwith "clarify_categories")) @
      (if persons2 = [] then [] else
        [{empty_cats with lemma=lemma; pos="fin"; pos2="verb"; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}])
  | lemma,"bedzie",[numbers;persons;aspects] ->
      let numbers = expand_numbers numbers in
      let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in
      [{empty_cats with lemma=lemma; pos="fin"; pos2="verb"; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}] @
      (if persons2 = [] then [] else
        [{empty_cats with lemma=lemma; pos="fin"; pos2="verb"; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}])
  | lemma,"praet",[numbers;genders;aspects;nagl] ->
      let numbers = expand_numbers numbers in
      let genders = expand_genders genders in
      [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @
      (if Xlist.mem aspects "imperf" then
        [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}]
       else [])
  | lemma,"praet",[numbers;genders;aspects] ->
      let numbers = expand_numbers numbers in
      let genders = expand_genders genders in
      [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["past"]}] @
      (if Xlist.mem aspects "imperf" then
        [{empty_cats with lemma=lemma; pos="praet"; pos2="verb"; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}]
       else [])
  | lemma,"winien",[numbers;genders;aspects] ->
      let numbers = expand_numbers numbers in
      let genders = expand_genders genders in
      [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative";"conditional"]; tenses=["pres"]};
       {empty_cats with lemma=lemma; pos="winien"; pos2="verb"; numbers=numbers; genders=genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}] @
      (if Xlist.mem aspects "imperf" then
        [{empty_cats with lemma=lemma; pos="winien"; pos2="verb"; numbers=numbers; genders=genders; persons=all_persons; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["fut"]}]
       else [])
  | lemma,"impt",[numbers;persons;aspects] ->
      let numbers = expand_numbers numbers in
      [{empty_cats with lemma=lemma; pos="impt"; pos2="verb"; numbers=numbers; genders=all_genders; persons=persons; aspects=aspects; negations=["aff"; "neg"]; moods=["imperative"]; tenses=["fut"]}]
  | lemma,"imps",[aspects] ->
      [{empty_cats with lemma=lemma; pos="imps"; pos2="verb"; numbers=all_numbers; genders=all_genders; persons=all_persons; aspects=aspects; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["past"]}]
  | lemma,"pred",[] -> (* FIXME: czy predykatyw zawsze jest niedokonany? *)
      [{empty_cats with lemma=lemma; pos="pred"; pos2="verb"; numbers=["sg"]; genders=["n2"]; persons=["ter"]; aspects=["imperf"]; negations=["aff"; "neg"]; moods=["indicative"]; tenses=["pres";"past";"fut"]}]
  | lemma,"aglt",[numbers;persons;aspects;wok] ->
      let numbers = expand_numbers numbers in
      [{empty_cats with lemma=lemma; pos="aglt"; pos2="verb"; numbers=numbers; persons=persons; aspects=aspects}]
  | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; aspects=aspects}]
  | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; aspects=aspects}]
  | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; aspects=aspects}]
  | lemma,"qub",[] -> [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"}]
  | lemma,"comp",[] -> [{empty_cats with lemma=lemma; pos="comp"; pos2="comp"}]
  | lemma,"conj",[] -> [{empty_cats with lemma=lemma; pos="conj"; pos2="conj"}]
  | lemma,"interj",[] -> [{empty_cats with lemma=lemma; pos="interj"; pos2="interj"}]
  | lemma,"burk",[] -> [{empty_cats with lemma=lemma; pos="burk"; pos2="burk"}]
  | ",","interp",[] -> [{empty_cats with lemma=","; pos="conj"; pos2="conj"}]
  | lemma,"interp",[] -> [{empty_cats with lemma=lemma; pos="interp"; pos2="interp"}]
  | lemma,"unk",[] ->
      [{empty_cats with lemma=lemma; pos="unk"; pos2="noun"; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}]
  | lemma,c,l -> failwith ("clarify_categories: " ^ lemma ^ ":" ^ c ^ ":" ^ (String.concat ":" (Xlist.map l (String.concat "."))))

(* FIXME: przenieść gdzieś indziej *)
let assign token =
  match token.ENIAMtokenizerTypes.token with
    ENIAMtokenizerTypes.Lemma(lemma,pos,interp) -> List.flatten (Xlist.map interp (fun interp -> clarify_categories false (lemma,pos,interp)))
  | ENIAMtokenizerTypes.Proper(lemma,pos,interp,_) -> List.flatten (Xlist.map interp (fun interp -> clarify_categories true (lemma,pos,interp)))
  | ENIAMtokenizerTypes.Interp lemma -> clarify_categories false (lemma,"interp",[])
  | _ -> []

let match_selector cats = function
    Lemma -> [cats.lemma]
  (* | NewLemma -> [] *)
  | Number -> cats.numbers
  | Case -> cats.cases
  | Gender -> cats.genders
  | Person -> cats.persons
  | Grad -> cats.grads
  | Praep -> cats.praeps
  | Acm -> cats.acms
  | Aspect -> cats.aspects
  | Negation -> cats.negations
  | Mood -> cats.moods
  | Tense -> cats.tenses
  | Nsyn -> cats.nsyn
  | Nsem -> cats.nsem
  | c -> failwith ("match_selector: " ^ string_of_cat c)

let set_selector cats vals = function
    Number -> {cats with numbers=vals}
  | Case -> {cats with cases=vals}
  | Gender -> {cats with genders=vals}
  | Person -> {cats with persons=vals}
  | Grad -> {cats with grads=vals}
  | Praep -> {cats with praeps=vals}
  | Acm -> {cats with acms=vals}
  | Aspect -> {cats with aspects=vals}
  | Negation -> {cats with negations=vals}
  | Mood -> {cats with moods=vals}
  | Tense -> {cats with tenses=vals}
  | Nsyn -> {cats with nsyn=vals}
  | Nsem -> {cats with nsem=vals}
  | c -> failwith ("set_selector: " ^ string_of_cat c)