Commit 27ff726e3f89173b19f0b1a410c79fa25f606ed2

Authored by Wojciech Jaworski
1 parent af8828aa

przetwarzenie realizacji z Walentego

lexSemantics/.gitignore 0 → 100644
  1 +test
... ...
lexSemantics/ENIAMlexSemantics.ml
1 1 (*
2 2 * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
5 5 *
6 6 * This library is free software: you can redistribute it and/or modify
7 7 * it under the terms of the GNU Lesser General Public License as published by
... ...
lexSemantics/ENIAMlexSemanticsData.ml
1 1 (*
2 2 * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
5 5 *
6 6 * This library is free software: you can redistribute it and/or modify
7 7 * it under the terms of the GNU Lesser General Public License as published by
... ...
lexSemantics/ENIAMlexSemanticsTypes.ml
1 1 (*
2 2 * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
5 5 *
6 6 * This library is free software: you can redistribute it and/or modify
7 7 * it under the terms of the GNU Lesser General Public License as published by
... ...
lexSemantics/ENIAMvalence.ml 0 → 100644
  1 +(*
  2 + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
  5 + *
  6 + * This library is free software: you can redistribute it and/or modify
  7 + * it under the terms of the GNU Lesser General Public License as published by
  8 + * the Free Software Foundation, either version 3 of the License, or
  9 + * (at your option) any later version.
  10 + *
  11 + * This library is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 + * GNU Lesser General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU Lesser General Public License
  17 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18 + *)
  19 +
  20 +open ENIAMwalTypes
  21 +open Xstd
  22 +
  23 +let simplify_pos = function
  24 + "subst" -> "noun"
  25 + | "depr" -> "noun"
  26 + | "psubst" -> "noun"
  27 + | "pdepr" -> "noun"
  28 + | "adj" -> "adj"
  29 + | "adjc" -> "adj"
  30 + | "adjp" -> "adj"
  31 + | "ger" -> "verb"
  32 + | "pact" -> "verb"
  33 + | "ppas" -> "verb"
  34 + | "fin" -> "verb"
  35 + | "bedzie" -> "verb"
  36 + | "praet" -> "verb"
  37 + | "winien" -> "verb"
  38 + | "impt" -> "verb"
  39 + | "imps" -> "verb"
  40 + | "inf" -> "verb"
  41 + | "pcon" -> "verb"
  42 + | "pant" -> "verb"
  43 + | "pred" -> "verb"
  44 + | "ppron12" -> "pron"
  45 + | "ppron3" -> "pron"
  46 + | "siebie" -> "pron"
  47 + | s -> s
  48 +
  49 +let transform_zeby = function
  50 + Aff -> [Comp "że"]
  51 + | Negation -> [Comp "że";Comp "żeby"]
  52 + | NegationUndef -> [Comp "że";Comp "żeby"]
  53 +
  54 +let transform_gdy = function
  55 + "indicative" -> [Comp "gdy"]
  56 + | "imperative" -> [Comp "gdy"]
  57 + | "conditional" -> [Comp "gdyby"]
  58 + | "gerundial" -> [Comp "gdy"]
  59 + | "" -> [Comp "gdy";Comp "gdyby"]
  60 + | s -> failwith ("transform_gdy: " ^ s)
  61 +
  62 +let transform_comp negation mood = function
  63 + Comp comp -> [Comp comp]
  64 + | Zeby -> transform_zeby negation
  65 + | Gdy -> transform_gdy mood
  66 + | CompUndef -> [CompUndef]
  67 +
  68 +let transform_str mood negation =
  69 + if mood = "gerundial" then [Case "gen"] else
  70 + match negation with
  71 + Aff -> [Case "acc"]
  72 + | Negation -> [Case "gen"]
  73 + | NegationUndef -> [Case "acc";Case "gen"]
  74 +
  75 +let transform_np_phrase lemma = function
  76 + NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)]
  77 + | NP(CaseAgr) -> [NP(CaseAgr)(*;NumP(CaseAgr)*)]
  78 + | NCP(Case c,ctype,comp) -> [NCP(Case c,ctype,comp)]
  79 + | AdjP(Case _) as morf -> [morf] (* tylko 'zagłada adjp(postp)' *)
  80 + | AdjP(CaseAgr) -> [AdjP(AllAgr)]
  81 + | AdjP(Str) -> [AdjP(AllAgr)] (* chyba błąd walentego, tylko 'barwa', 'bieda', 'głód', 'kolor', 'nędza', 'śmierć', 'usta' *)
  82 + | CP(ctype,comp) as morf -> [morf]
  83 + | PrepNP _ as morf -> [morf]
  84 + | PrepAdjP _ as morf -> [morf] (* to wygląda seryjny błąd w Walentym xp(abl[prepadjp(z,gen)]) *)
  85 + | ComprepNP _ as morf -> [morf]
  86 + | ComparP _ as morf -> [morf]
  87 + | PrepNCP _ as morf -> [morf]
  88 + | AdvP as morf -> [morf] (* występuje tylko w lematach: cyrk, trwałość x2, zagłada *)
  89 + | FixedP _ as morf -> [morf]
  90 + | Or as morf -> [morf]
  91 + (* | Pro as morf -> [morf] *)
  92 + | Null as morf -> [morf]
  93 + | phrase -> print_endline ("transform_np_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase]
  94 +
  95 +let transform_np_pos lemma = function
  96 + | SUBST(_,Case _) as morf -> [morf]
  97 + | PPRON3(_,Case _) as morf -> [morf]
  98 + | SUBST(_,CaseAgr) as morf -> [morf]
  99 + | SUBST(n,Str) -> [ADJ(n,AllAgr,GenderUndef,Grad "pos")] (* błąd walentym: 'zła godzina' *)
  100 + | ADJ(_,Case _,_,_) as morf -> [morf]
  101 + | ADJ(n,CaseAgr,GenderAgr,gr) -> [ADJ(n,AllAgr,GenderAgr,gr)]
  102 + | PACT(n,CaseAgr,g,a,neg) -> [PACT(n,AllAgr,g,a,neg)]
  103 + | PPAS(_,Case _,_,_,_) as morf -> [morf]
  104 + | PPAS(n,CaseAgr,g,a,neg) -> [PPAS(n,AllAgr,g,a,neg)]
  105 + | ADJ(n,Str,g,gr) -> [ADJ(n,AllAgr,g,gr)]
  106 + | PPAS(n,Str,g,a,neg) -> [PPAS(n,AllAgr,g,a,neg)]
  107 + | PREP(Case _) as morf -> [morf]
  108 + | ADV _ as morf -> [morf] (* tu trafiają przysłówkowe realizacje *)
  109 + | COMP _ as morf -> [morf]
  110 + | QUB as morf -> [morf]
  111 + | PERS _ as morf -> [morf]
  112 + | pos -> print_endline ("transform_np_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos]
  113 +
  114 +let transform_adj_phrase lemma = function
  115 + NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)]
  116 + | NP(Part) -> [NP(Case "gen");NP(Case "acc")(*;NumP(Case "gen");NumP(Case "acc")*)] (* jedno wystąpienie 'krewny' *)
  117 + | NCP(Case c,ctype,comp) -> [NCP(Case c,ctype,comp)]
  118 + | AdjP(CaseAgr) -> [AdjP(AllAgr)] (* jedno wystąpienie 'cały szczęśliwy', może się przydać podniesienie typu *)
  119 + | CP(ctype,comp) as morf -> [morf]
  120 + | PrepNP _ as morf -> [morf]
  121 + | PrepAdjP _ as morf -> [morf]
  122 + | ComprepNP _ as morf -> [morf]
  123 + | ComparP _ as morf -> [morf]
  124 + | PrepNCP _ as morf -> [morf]
  125 + | InfP _ as morf -> [morf]
  126 + | AdvP as morf -> [morf]
  127 + (* | FixedP _ as morf -> [morf]*)
  128 + | Or as morf -> [morf] (* jedno wystąpienie 'jednoznaczny' *)
  129 + (* | Pro as morf -> [morf] *)
  130 + | Null as morf -> [morf]
  131 + | morf -> print_endline ("transform_adj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf]
  132 +
  133 +let transform_adj_pos lemma = function
  134 + | ADJ(n,CaseAgr,g,gr) -> [ADJ(n,AllAgr,g,gr)]
  135 + | PREP(Case _) as morf -> [morf]
  136 + | ADV _ as morf -> [morf]
  137 + | QUB as morf -> [morf]
  138 + | morf -> print_endline ("transform_adj_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos morf); [morf]
  139 +
  140 +let transform_adv_phrase lemma = function
  141 + NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)]
  142 + | NCP(Case c,ctype,comp) -> [NCP(Case c,ctype,comp)]
  143 + | CP(ctype,comp) as morf -> [morf]
  144 + | PrepNP _ as morf -> [morf]
  145 + | PrepAdjP _ as morf -> [morf]
  146 + | ComprepNP _ as morf -> [morf]
  147 + | ComparP _ as morf -> [morf]
  148 + | PrepNCP _ as morf -> [morf]
  149 + | InfP _ as morf -> [morf]
  150 + | AdvP as morf -> [morf]
  151 +(* | Or as morf -> [morf]*)
  152 + (* | Pro as morf -> [morf] *)
  153 + | Null as morf -> [morf]
  154 +(* | AdjP(CaseAgr) as morf -> [morf]*)
  155 + (* | FixedP _ as morf -> [morf]*)
  156 + | morf -> print_endline ("transform_adv_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf]
  157 +
  158 +let transform_adv_pos lemma = function
  159 + SUBST(_,Case _) as morf -> [morf]
  160 + (* | ADJ(_,CaseAgr,_,_) as morf -> [morf]*)
  161 + | COMP _ as morf -> [morf]
  162 + | PREP(Case _) as morf -> [morf]
  163 + | COMPAR _ as morf -> [morf]
  164 + | ADV _ as morf -> [morf] (* tu trafiają przysłówkowe realizacje *)
  165 + | morf -> print_endline ("transform_adv_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos morf); [morf]
  166 +
  167 +let transform_prep_phrase lemma = function
  168 + | phrase -> print_endline ("transform_prep_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase]
  169 +
  170 +let transform_prep_pos lemma = function
  171 + | SUBST(_,Case _) as morf -> [morf]
  172 + | SIEBIE(Case _) as morf -> [morf]
  173 + | PPRON12(_,Case _) as morf -> [morf]
  174 + | PPRON3(_,Case _) as morf -> [morf]
  175 + | SUBST(n,Str) -> [SUBST(n,CaseAgr)]
  176 + | NUM(Case _,_) as morf -> [morf]
  177 + | ADJ(_,Case _,_,_) as morf -> [morf]
  178 + | GER(_,Case _,_,_,_) as morf -> [morf]
  179 + | PPAS(_,Case _,_,_,_) as morf -> [morf]
  180 +(* | ADV _ as morf -> [morf]
  181 + | QUB as morf -> [morf]*)
  182 + | pos -> print_endline ("transform_prep_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos]
  183 +
  184 +let transform_comprep_phrase lemma = function
  185 + NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)]
  186 + | NCP(Case c,ctype,comp) -> [NCP(Case c,ctype,comp)]
  187 + | PrepNP _ as morf -> [morf]
  188 + | PrepNCP _ as morf -> [morf]
  189 + | phrase -> print_endline ("transform_comprep_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase]
  190 +
  191 +let transform_comprep_pos lemma = function
  192 + | SUBST(_,Case _) as morf -> [morf]
  193 +(* | SUBST(n,Str) -> [SUBST(n,CaseAgr)]*)
  194 + | NUM(Case _,_) as morf -> [morf]
  195 +(* | ADJ(_,Case _,_,_) as morf -> [morf]
  196 + | GER(_,Case _,_,_,_,_) as morf -> [morf]
  197 + | PPAS(_,Case _,_,_,_) as morf -> [morf]
  198 + | ADV _ as morf -> [morf]
  199 + | QUB as morf -> [morf]*)
  200 + | pos -> print_endline ("transform_comprep_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos]
  201 +
  202 +let transform_compar_phrase lemma = function
  203 + | NP(Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> NP(Case case)) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: uzgodnić a komparatywem *)
  204 + | FixedP _ as morf -> [morf]
  205 + | phrase -> print_endline ("transform_compar_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase]
  206 +
  207 +let transform_compar_pos lemma = function
  208 + | SUBST(_,Case _) as morf -> [morf]
  209 + | ADJ(_,Case _,_,_) as morf -> [morf]
  210 + | PREP(Case _) as morf -> [morf]
  211 + | SUBST(Number n,Str) -> [SUBST(Number n,CaseUndef)]
  212 + | SUBST(NumberAgr,Str) -> [SUBST(NumberUndef,CaseUndef)]
  213 + | SUBST(NumberUndef,Str) -> [SUBST(NumberUndef,CaseUndef)]
  214 + | PPAS(_,Case _,_,_,_) as morf -> [morf]
  215 + | PPAS(NumberAgr,Str,GenderAgr,a,neg) -> [PPAS(NumberUndef,CaseUndef,GenderUndef,a,neg)] (* FIXME: ta sama sytuacja co w "jako" *)
  216 + | PPAS(NumberAgr,CaseAgr,GenderAgr,a,neg) -> [PPAS(NumberUndef,CaseUndef,GenderUndef,a,neg)] (* FIXME: ta sama sytuacja co w "jako" *)
  217 + | ADJ(NumberAgr,Str,GenderAgr,gr) -> [ADJ(NumberUndef,CaseUndef,GenderUndef,gr)] (* FIXME: ta sama sytuacja co w "jako" *)
  218 + | ADJ(NumberAgr,CaseAgr,GenderAgr,gr) -> [ADJ(NumberUndef,CaseUndef,GenderUndef,gr)] (* FIXME: ta sama sytuacja co w "jako" *)
  219 + | NUM(Case _,_) as morf -> [morf]
  220 + | pos -> print_endline ("transform_compar_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos]
  221 +
  222 +let transform_comp_phrase lemma = function
  223 + | phrase -> print_endline ("transform_comp_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase]
  224 +
  225 +let transform_comp_pos lemma = function
  226 + | PERS _ as morf -> [morf]
  227 + | pos -> print_endline ("transform_comp_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos]
  228 +
  229 +let transform_qub_phrase lemma = function
  230 + | phrase -> print_endline ("transform_qub_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase]
  231 +
  232 +let transform_qub_pos lemma = function
  233 + | QUB as morf -> [morf]
  234 + | pos -> print_endline ("transform_qub_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos]
  235 +
  236 +let transform_siebie_phrase lemma = function
  237 + | phrase -> print_endline ("transform_siebie_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase]
  238 +
  239 +let transform_siebie_pos lemma = function
  240 + | ADJ(NumberAgr,CaseAgr,GenderAgr,gr) -> [ADJ(NumberAgr,AllAgr,GenderAgr,gr)]
  241 + | pos -> print_endline ("transform_siebie_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos]
  242 +
  243 +let transform_pers_subj_phrase lemma negation mood = function (* FIXME: prepnp(na,loc) *)
  244 + | NP(Str) -> [NP(NomAgr)(*;NumP(NomAgr)*)]
  245 + | NP(Part) -> [NP(Case "gen");NP(Case "acc")(*;NumP(Case "gen");NumP(Case "acc")*)] (* tylko w 'nalewać', 'nalać', 'ponalewać', 'najechać','uzbierać' *)
  246 + | NCP(Str,ctype,comp) -> [NCP(NomAgr,ctype,comp)]
  247 + | CP(ctype,comp) as morf -> [morf]
  248 + | InfP _ as morf -> [morf]
  249 + | Or as morf -> [morf]
  250 + (* | Pro -> [ProNG] *)
  251 + | Null -> [Null]
  252 + | morf -> print_endline ("transform_pers_subj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf]
  253 +
  254 +let transform_pers_subj_pos lemma negation mood = function
  255 + (* COMP _ as morf -> [morf]*)
  256 + | SUBST(n,Str) -> [SUBST(n,NomAgr)]
  257 + | SUBST(n,Case "nom") -> [SUBST(n,NomAgr)] (* wygląda na błąd Walentego, ale nie ma znaczenia *)
  258 + | NUM(Str,g) -> [NUM(NomAgr,g)]
  259 + | NUM(Case "nom",g) -> [NUM(NomAgr,g)]
  260 +(* | ADJ(n,Str,g,gr) -> [ADJ(n,NomAgr,g,gr)]*)
  261 + | morf -> print_endline ("transform_pers_subj_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos morf); [morf]
  262 +
  263 +let transform_ger_subj_phrase lemma negation mood = function
  264 + | NP(Str) -> [NP(Case "gen");PrepNP("przez",Case "acc")(*;NumP(Case "gen")*)(*;PrepNumP("przez",Case "acc")*)] (* FIXME: czy przez:acc jest możliwe? *)
  265 + | NP(Part) -> [NP(Case "gen")(*;NP(Case "acc")*)(*;NumP(Case "gen");NumP(Case "acc")*)]
  266 + | NCP(Str,ctype,comp) -> [NCP(Case "gen",ctype,comp);PrepNCP("przez",Case "acc",ctype,comp)] (* FIXME: czy przez:acc jest możliwe? *)
  267 + | CP(ctype,comp) as morf -> [morf]
  268 + | InfP _ as morf -> [morf] (* FIXME: czy to jest możliwe? *)
  269 + | Or as morf -> [morf]
  270 + (* | Pro -> if control then [Pro] else [Null] *)
  271 + | Null -> [Null]
  272 + | morf -> print_endline ("transform_ger_subj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf]
  273 +
  274 +let transform_ger_subj_pos lemma negation mood = function (* FIXME: ADV(_) *)
  275 + (* COMP _ as morf -> [morf] (* FIXME: czy to jest możliwe? *)*)
  276 + | SUBST(n,Str) -> [SUBST(n,Case "gen")]
  277 + | SUBST(n,Case "nom") -> [SUBST(n,Case "gen")] (* wygląda na błąd Walentego, ale nie ma znaczenia *)
  278 + | NUM(Str,g) -> [NUM(Case "gen",g)]
  279 + | NUM(Case "nom",g) -> [NUM(Case "gen",g)]
  280 +(* | ADJ(n,Str,g,gr) -> [ADJ(n,Case "gen",g,gr)]*)
  281 + | morf -> print_endline ("transform_pers_subj_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos morf); [morf]
  282 +
  283 +let transform_ppas_subj_phrase lemma negation mood control = function
  284 + | NP(Str) -> [PrepNP("przez",Case "acc")(*;PrepNumP("przez",Case "acc")*)]
  285 + | NCP(Str,ctype,comp) -> [PrepNCP("przez",Case "acc",ctype,comp)]
  286 + | CP(ctype,comp) as morf -> [morf]
  287 + (* | Pro -> if control then [Pro] else [Null] *)
  288 + | morf -> print_endline ("transform_ppas_subj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf]
  289 +
  290 +let transform_pers_phrase lemma negation mood = function
  291 + | NP(Str) -> List.flatten (Xlist.map (transform_str mood negation) (fun case -> [NP case(*;NumP(case)*)]))
  292 + | NP(Part) -> [NP(Case "gen")] @ (if mood = "gerundial" then [] else [NP(Case "acc")(*;NumP(Case "gen");NumP(Case "acc")*)])
  293 + | NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)]
  294 + | NCP(Str,ctype,comp) -> List.flatten (Xlist.map (transform_str mood negation) (fun case -> [NCP(case,ctype,comp)]))
  295 + | NCP(Part,ctype,comp) -> List.flatten (Xlist.map (transform_str mood negation) (fun case -> [NCP(case,ctype,comp)]))
  296 + | NCP(Case case,ctype,comp) -> [NCP(Case case,ctype,comp)]
  297 + | AdjP(Str) -> Xlist.map (transform_str mood negation) (fun case -> AdjP case) (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *)
  298 + | AdjP CaseAgr as morf -> if mood = "gerundial" then [AdjP AllAgr] else (print_endline ("transform_pers_phrase2: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf])
  299 + | AdjP(Case _) as morf -> [morf] (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *)
  300 + | CP(ctype,comp) as morf -> [morf]
  301 + | PrepNP _ as morf -> [morf]
  302 + | PrepAdjP _ as morf -> [morf] (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *)
  303 + | ComprepNP _ as morf -> [morf]
  304 + | ComparP _ as morf -> [morf]
  305 + | PrepNCP _ as morf -> [morf]
  306 + | InfP _ as morf -> [morf]
  307 + | AdvP -> if mood = "gerundial" then [AdjP AllAgr] else [AdvP]
  308 + | FixedP _ as morf -> [morf]
  309 + | Or as morf -> [morf]
  310 + (* | Pro as morf -> [morf] *)
  311 + | Null as morf -> [morf]
  312 + | morf -> print_endline ("transform_pers_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf]
  313 +
  314 +let transform_pers_pos lemma negation mood = function
  315 + | SUBST(n,Str) -> Xlist.map (transform_str mood negation) (fun case -> SUBST(n,case))
  316 + | PPRON12(n,Str) -> Xlist.map (transform_str mood negation) (fun case -> PPRON12(n,case))
  317 + | PPRON3(n,Str) -> Xlist.map (transform_str mood negation) (fun case -> PPRON3(n,case))
  318 + | SIEBIE(Str) -> Xlist.map (transform_str mood negation) (fun case -> SIEBIE(case))
  319 + | NUM(Str,g) -> Xlist.map (transform_str mood negation) (fun case -> NUM(case,g))
  320 + | ADJ(n,Str,g,gr) -> Xlist.map (transform_str mood negation) (fun case -> ADJ(n,case,g,gr))
  321 +(* | PPAS(n,Str,g,a,neg) -> Xlist.map (transform_str negation) (fun case -> PPAS(n,Str,g,a,neg))*)
  322 + | SUBST(n,Part) -> [SUBST(n,Case "gen");SUBST(n,Case "acc")]
  323 + | ADJ(n,Part,g,gr) -> [ADJ(n,Case "gen",g,gr);ADJ(n,Case "acc",g,gr)]
  324 + | ADJ(n,CaseAgr,g,gr) as morf -> if lemma = "siedzieć" then [morf] else (print_endline ("transform_pers_pos2: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos morf); [morf]) (* FIXME *)
  325 + | SUBST(_,Case _) as morf -> [morf]
  326 + | PPRON12(_,Case _) as morf -> [morf]
  327 + | PPRON3(_,Case _) as morf -> [morf]
  328 + | SIEBIE(Case _) as morf -> [morf]
  329 + | NUM(Case _,_) as morf -> [morf]
  330 + | PREP _ as morf -> [morf]
  331 + | ADJ(_,Case _,_,_) as morf -> [morf]
  332 + | PPAS(_,Case _,_,_,_) as morf -> [morf]
  333 +(* | SUBST(n,CaseAgr) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> SUBST(n,Case case)) (* FIXME: sprawdzić kto kontroluje! *)
  334 + | ADJ(n,CaseAgr,g,gr) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> ADJ(n,Case case,g,gr)) (* FIXME: sprawdzić kto kontroluje! *)*)
  335 + | COMPAR _ as morf -> [morf]
  336 + | COMP _ as morf -> [morf]
  337 + | INF _ as morf -> [morf]
  338 + | QUB as morf -> [morf]
  339 + | ADV grad -> (*if mood = "gerundial" then [ADJ(NumberAgr,AllAgr,GenderAgr,grad)] else*) [ADV grad] (* FIXME: to nie poprawi lematu *)
  340 + | PERS _ as morf -> [morf]
  341 + | morf -> print_endline ("transform_pers_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos morf); [morf]
  342 +
  343 +let rec transform_comps negation mood = function
  344 + | CP(ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> CP(ctype,comp))
  345 + | NCP(case,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> NCP(case,ctype,comp))
  346 + | PrepNCP(prep,case,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> PrepNCP(prep,case,ctype,comp))
  347 + | E phrase -> Xlist.map (transform_comps negation mood phrase) (fun phrase -> E phrase)
  348 + | morf -> [morf]
  349 +
  350 +let compars = StringSet.of_list ["jak"; "jako"; "niż"; "niczym" ;"niby"; "co"; "zamiast"]
  351 +
  352 +let is_compar lex = StringSet.mem compars lex
  353 +
  354 +(* FIXME: pomijam uzgadnianie przypadku, liczby i rodzaju - wykonalne za pomocą kontroli *)
  355 +let transform_preps morf =
  356 + let morf = match morf with
  357 + | LexArg(id,lex,PREP c) -> if is_compar lex then LexArg(id,lex,COMPAR c) else LexArg(id,lex,PREP c)
  358 + | SimpleLexArg(lex,PREP c) -> if is_compar lex then SimpleLexArg(lex,COMPAR c) else SimpleLexArg(lex,PREP c)
  359 + | PrepNP(prep,c) -> if is_compar prep then ComparP(prep,c) else PrepNP(prep,c)
  360 + | PrepAdjP(prep,c) -> if is_compar prep then ComparP(prep,c) else PrepAdjP(prep,c)
  361 + | PrepNCP(prep,case,ctype,comp) as morf -> if is_compar prep then failwith "transform_preps" else morf
  362 + | morf -> morf in
  363 + match morf with
  364 + | ComparP(prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> ComparP(prep,Case case))
  365 + | ComparP _ -> failwith "transform_preps"
  366 + | LexArg(id,lex,COMPAR Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> LexArg(id,lex,COMPAR (Case case)))
  367 + | SimpleLexArg(lex,COMPAR Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> SimpleLexArg(lex,COMPAR (Case case)))
  368 + | LexArg(id,lex,COMPAR (Case _)) as morf -> [morf]
  369 + | SimpleLexArg(lex,COMPAR (Case _)) as morf -> [morf]
  370 + | LexArg(id,lex,COMPAR _) -> failwith "transform_preps"
  371 + | SimpleLexArg(lex,COMPAR _) -> failwith "transform_preps"
  372 + | PrepNP("per",Str) -> [PrepNP("per",Case "nom");PrepNP("per",Case "voc")] (* FIXME: voc do poprawienie w leksykonie *)
  373 + | PrepNP(_,Case _) as morf -> [morf]
  374 + | PrepAdjP(_,Case _) as morf -> [morf]
  375 + | PrepNCP(_,Case _,_,_) as morf -> [morf]
  376 + | PrepNP _ -> failwith "transform_preps"
  377 + | PrepAdjP _ -> failwith "transform_preps"
  378 + | PrepNCP _ -> failwith "transform_preps"
  379 + | LexArg(id,"w",PREP Str) -> [LexArg(id,"w",PREP (Case "acc"));LexArg(id,"w",PREP (Case "loc"));]
  380 + | SimpleLexArg("w",PREP Str) -> [SimpleLexArg("w",PREP (Case "acc"));SimpleLexArg("w",PREP (Case "loc"))]
  381 + | LexArg(id,lex,PREP (Case _)) as morf -> [morf]
  382 + | SimpleLexArg(lex,PREP (Case _)) as morf -> [morf]
  383 + | LexArg(id,lex,PREP _) -> failwith "transform_preps"
  384 + | SimpleLexArg(lex,PREP _) -> failwith "transform_preps"
  385 + | morf -> [morf]
  386 +
  387 +let transform_pers_schema lemma negation mood schema =
  388 + Xlist.map schema (fun s ->
  389 + {s with morfs =
  390 + let morfs = List.flatten (Xlist.map s.morfs (transform_comps negation mood)) in
  391 + let morfs = List.flatten (Xlist.map morfs transform_preps) in
  392 + if s.gf = SUBJ then List.flatten (Xlist.map morfs (function
  393 + | E phrase -> Xlist.map (transform_pers_subj_phrase lemma negation mood phrase) (fun phrase -> E phrase)
  394 + | LexArg(id,lex,pos) -> Xlist.map (transform_pers_subj_pos lemma negation mood pos) (fun pos -> LexArg(id,lex,pos))
  395 + | SimpleLexArg(lex,pos) -> Xlist.map (transform_pers_subj_pos lemma negation mood pos) (fun pos -> SimpleLexArg(lex,pos))
  396 + | phrase -> transform_pers_subj_phrase lemma negation mood phrase))
  397 + else List.flatten (Xlist.map s.morfs (function
  398 + | LexArg(id,lex,pos) -> Xlist.map (transform_pers_pos lemma negation mood pos) (fun pos -> LexArg(id,lex,pos))
  399 + | SimpleLexArg(lex,pos) -> Xlist.map (transform_pers_pos lemma negation mood pos) (fun pos -> SimpleLexArg(lex,pos))
  400 + | phrase -> transform_pers_phrase lemma negation mood phrase))})
  401 +
  402 +let transform_nosubj_schema lemma negation mood schema =
  403 + Xlist.map schema (fun s ->
  404 + {s with morfs =
  405 + let morfs = List.flatten (Xlist.map s.morfs (transform_comps negation mood)) in
  406 + let morfs = List.flatten (Xlist.map morfs transform_preps) in
  407 + if s.gf = SUBJ then [Null]
  408 + else List.flatten (Xlist.map morfs (function
  409 + | LexArg(id,lex,pos) -> Xlist.map (transform_pers_pos lemma negation mood pos) (fun pos -> LexArg(id,lex,pos))
  410 + | SimpleLexArg(lex,pos) -> Xlist.map (transform_pers_pos lemma negation mood pos) (fun pos -> SimpleLexArg(lex,pos))
  411 + | phrase -> transform_pers_phrase lemma negation mood phrase))})
  412 +
  413 +(* let transform_ger_adv_lex = function
  414 + | s -> print_endline ("transform_ger_adv_lex: " ^ s); s
  415 +
  416 +let transform_ger_adv_pos = function
  417 + | LexArg(id,lex,ADV grad) -> LexArg(id,transform_ger_adv_lex lex,ADJ(NumberAgr,AllAgr,GenderAgr,grad))
  418 + | SimpleLexArg(lex,ADV grad) -> SimpleLexArg(transform_ger_adv_lex lex,ADJ(NumberAgr,AllAgr,GenderAgr,grad))
  419 + | morf -> morf *)
  420 +
  421 +let transform_ger_schema lemma negation schema = (* FIXME: zakładam, że ger zeruje mood, czy to prawda? *)
  422 + Xlist.map schema (fun s ->
  423 + {s with morfs =
  424 + let morfs = List.flatten (Xlist.map s.morfs (transform_comps negation "gerundial")) in
  425 + let morfs = List.flatten (Xlist.map morfs transform_preps) in
  426 + (* let morfs = Xlist.map morfs transform_ger_adv_pos in *)
  427 + if s.gf = SUBJ then List.flatten (Xlist.map morfs (function
  428 + | E phrase -> Xlist.map (transform_ger_subj_phrase lemma negation "gerundial" phrase) (fun phrase -> E phrase)
  429 + | LexArg(id,lex,pos) -> Xlist.map (transform_ger_subj_pos lemma negation "gerundial" pos) (fun pos -> LexArg(id,lex,pos))
  430 + | SimpleLexArg(lex,pos) -> Xlist.map (transform_ger_subj_pos lemma negation "gerundial" pos) (fun pos -> SimpleLexArg(lex,pos))
  431 + | phrase -> transform_ger_subj_phrase lemma negation "gerundial" phrase))
  432 + else List.flatten (Xlist.map s.morfs (function
  433 + | LexArg(id,lex,pos) -> Xlist.map (transform_pers_pos lemma negation "gerundial" pos) (fun pos -> LexArg(id,lex,pos))
  434 + | SimpleLexArg(lex,pos) -> Xlist.map (transform_pers_pos lemma negation "gerundial" pos) (fun pos -> SimpleLexArg(lex,pos))
  435 + | phrase -> transform_pers_phrase lemma negation "gerundial" phrase))})
  436 +
  437 +let transform_ppas_schema lemma negation mood schema =
  438 + if not (Xlist.fold schema false (fun b p -> if p.gf = OBJ then true else b)) then raise Not_found else
  439 + Xlist.map schema (fun s ->
  440 + let morfs = List.flatten (Xlist.map s.morfs (transform_comps negation mood)) in
  441 + let morfs = List.flatten (Xlist.map morfs transform_preps) in
  442 + {s with morfs =
  443 + if s.gf = OBJ then [Null] else
  444 + if s.gf = SUBJ then List.flatten (Xlist.map morfs (function
  445 + | E phrase -> raise Not_found (* tylko 'obladzać' i 'oblodzić', chyba błąd *)
  446 + | LexArg(id,lex,SUBST(n,Str)) -> raise Not_found (* FIXME!!! *)
  447 + | SimpleLexArg(lex,SUBST(n,Str)) -> raise Not_found (* FIXME!!! *)
  448 + | phrase -> transform_ppas_subj_phrase lemma negation mood (s.cr <> [] || s.ce <> []) phrase))
  449 + else List.flatten (Xlist.map s.morfs (function
  450 + | LexArg(id,lex,pos) -> Xlist.map (transform_pers_pos lemma negation mood pos) (fun pos -> LexArg(id,lex,pos))
  451 + | SimpleLexArg(lex,pos) -> Xlist.map (transform_pers_pos lemma negation mood pos) (fun pos -> SimpleLexArg(lex,pos))
  452 + | phrase -> transform_pers_phrase lemma negation mood phrase))})
  453 +
  454 +let transform_num_schema acm schema =
  455 + Xlist.map schema (fun s ->
  456 + {s with morfs=List.flatten (Xlist.map s.morfs (function
  457 + | Null -> [Null]
  458 + | LexArg(id,lex,SUBST(NumberUndef,CaseUndef)) ->
  459 + (match acm with
  460 + "rec" -> [LexArg(id,lex,SUBST(NumberUndef,GenAgr))]
  461 + | "congr" -> [LexArg(id,lex,SUBST(NumberUndef,AllAgr))]
  462 + | _ -> failwith "transform_num_schema")
  463 + | SimpleLexArg(lex,SUBST(NumberUndef,CaseUndef)) ->
  464 + (match acm with
  465 + "rec" -> [SimpleLexArg(lex,SUBST(NumberUndef,GenAgr))]
  466 + | "congr" -> [SimpleLexArg(lex,SUBST(NumberUndef,AllAgr))]
  467 + | _ -> failwith "transform_num_schema")
  468 + | morf -> failwith ("transform_num_schema: " ^ ENIAMwalStringOf.phrase morf)))})
  469 +
  470 +let transform_schema pos lemma schema =
  471 + let phrase_fun,pos_fun = match pos with
  472 + "subst" -> transform_np_phrase,transform_np_pos
  473 + | "adj" -> transform_adj_phrase,transform_adj_pos
  474 + | "adv" -> transform_adv_phrase,transform_adv_pos
  475 + | "prep" -> transform_prep_phrase,transform_prep_pos
  476 + | "comprep" -> transform_comprep_phrase,transform_comprep_pos
  477 + | "compar" -> transform_compar_phrase,transform_compar_pos
  478 + | "comp" -> transform_comp_phrase,transform_comp_pos
  479 + | "qub" -> transform_qub_phrase,transform_qub_pos
  480 + | "siebie" -> transform_siebie_phrase,transform_siebie_pos
  481 + | _ -> failwith "transform_schema"
  482 + in
  483 + Xlist.map schema (fun s ->
  484 + let morfs = List.flatten (Xlist.map s.morfs (transform_comps NegationUndef "")) in (* FIXME: zależność od trybu warunkowego i negacji *)
  485 + {s with morfs=List.flatten (Xlist.map morfs (function
  486 + LexArg(id,lex,pos) -> Xlist.map (pos_fun lemma pos) (fun pos -> LexArg(id,lex,pos))
  487 + | SimpleLexArg(lex,pos) -> Xlist.map (pos_fun lemma pos) (fun pos -> SimpleLexArg(lex,pos))
  488 + | phrase -> phrase_fun lemma phrase))})
  489 +
  490 +let rec remove_adj_agr = function
  491 + [] -> []
  492 + | {morfs=[Null;AdjP(CaseAgr)]} :: l -> remove_adj_agr l
  493 + | {morfs=[Null;AdjP(Part)]} :: l -> remove_adj_agr l
  494 + | s :: l -> (*print_endline (ENIAMwalStringOf.schema [s]);*) s :: (remove_adj_agr l)
  495 +
  496 +let rec get_role gf = function
  497 + [] -> raise Not_found
  498 + | s :: l -> if s.gf = gf then s.role,s.role_attr else get_role gf l
  499 +
  500 +let expand_negation = function
  501 + Negation -> [Negation]
  502 + | Aff -> [Aff]
  503 + | NegationUndef -> [Negation;Aff]
  504 +
  505 +let expand_aspect = function
  506 + Aspect s -> [Aspect s]
  507 + | AspectUndef -> [Aspect "imperf";Aspect "perf"]
  508 +
  509 +let aspect_sel = function
  510 + Aspect s -> [ENIAM_LCGlexiconTypes.Aspect,ENIAM_LCGlexiconTypes.Eq,[s]]
  511 + | AspectUndef -> []
  512 +
  513 +open ENIAM_LCGlexiconTypes
  514 +
  515 +let transform_entry pos lemma negation pred aspect schema =
  516 + if pos = "subst" || pos = "depr" then (
  517 + if negation <> NegationUndef || pred <> PredFalse || aspect <> AspectUndef then failwith ("transform_entry 1");
  518 + [[],transform_schema "subst" lemma schema]) else
  519 + if pos = "adj" || pos = "adjc" || pos = "adjp" then (
  520 + if negation <> NegationUndef || aspect <> AspectUndef then failwith ("transform_entry 2");
  521 + let sel = match pred with PredTrue -> [Case,Eq,["pred"]] | _ -> [] in
  522 + [sel,transform_schema "adj" lemma schema]) else
  523 + if pos = "adv" || pos = "prep" || pos = "comprep" || pos = "comp" || pos = "compar" || pos = "qub" || pos = "siebie" then (
  524 + if negation <> NegationUndef || (*pred <> PredFalse ||*) aspect <> AspectUndef then failwith ("transform_entry 3"); (* FIXME: typy przysłówków *)
  525 + [[],transform_schema pos lemma schema]) else
  526 + if pred <> PredFalse then failwith ("transform_entry 4") else
  527 + if pos = "num" || pos = "intnum" then (
  528 + if negation <> NegationUndef || aspect <> AspectUndef then failwith ("transform_entry 5");
  529 + Xlist.map ["congr";"rec"] (fun acm ->
  530 + [Acm,Eq,[acm]],transform_num_schema acm schema)) else
  531 + List.flatten (Xlist.map (expand_negation negation) (fun negation ->
  532 + let sel = [Negation,Eq,[ENIAMwalStringOf.negation negation]] @ aspect_sel aspect in
  533 + if pos = "fin" || pos = "bedzie" then
  534 + [sel @ [Mood,Eq,["indicative"]],transform_pers_schema lemma negation "indicative" schema;
  535 + sel @ [Mood,Eq,["imperative"]],transform_pers_schema lemma negation "imperative" schema] else
  536 + if pos = "praet" || pos = "winien" then
  537 + [sel @ [Mood,Eq,["indicative"]],transform_pers_schema lemma negation "indicative" schema;
  538 + sel @ [Mood,Eq,["conditional"]],transform_pers_schema lemma negation "conditional" schema] else
  539 + if pos = "impt" then
  540 + [sel @ [Mood,Eq,["imperative"]],transform_nosubj_schema lemma negation "imperative" schema] else
  541 + if pos = "imps" then
  542 + [sel @ [Mood,Eq,["indicative"]],transform_nosubj_schema lemma negation "indicative" schema] else
  543 + if pos = "pred" then
  544 + [sel @ [Mood,Eq,["indicative"]],transform_pers_schema lemma negation "indicative" schema] else
  545 + if pos = "pcon" || pos = "pant" || pos = "inf" || pos = "pact" then
  546 + (* let role,role_attr = try get_role SUBJ schema with Not_found -> "Initiator","" in *)
  547 + [sel, transform_nosubj_schema lemma negation "indicative" schema] else
  548 + if pos = "ppas" then
  549 + try
  550 + (* let role,role_attr = try get_role OBJ schema with Not_found -> "Theme","" in *)
  551 + [sel, transform_ppas_schema lemma negation "indicative" schema]
  552 + with Not_found -> [] else
  553 + if pos = "ger" then
  554 + [sel,transform_ger_schema lemma negation schema] else
  555 + failwith ("transform_entry: " ^ pos)))
  556 +
  557 +let transform_lex_entry pos lemma = function
  558 + SimpleLexEntry(lemma,pos) -> [[],SimpleLexEntry(lemma,pos)]
  559 + | LexEntry(id,lemma,pos,NoRestr,schema) ->
  560 + Xlist.map (transform_entry pos lemma NegationUndef PredFalse AspectUndef schema) (fun (sel,schema) ->
  561 + sel,LexEntry(id,lemma,pos,NoRestr,schema))
  562 + | ComprepNPEntry(s,NoRestr,schema) ->
  563 + Xlist.map (transform_entry "comprep" s NegationUndef PredFalse AspectUndef schema) (fun (sel,schema) ->
  564 + sel,ComprepNPEntry(s,NoRestr,schema))
  565 + | LexEntry(id,lemma,pos,_,[]) as entry -> [[],entry] (* FIXME *)
  566 + | entry -> print_endline ("transform_lex_entry:" ^ ENIAMwalStringOf.lex_entry entry); [[],entry]
  567 +
  568 +(*let reduce_frame_negation lexemes = function
  569 + Negation -> StringMap.mem lexemes "nie"
  570 + | _ -> true
  571 +
  572 +let reduce_frame_mood lexemes = function
  573 + "conditional" -> StringMap.mem lexemes "by"
  574 + | _ -> true
  575 +
  576 +let reduce_frame_aux lexemes = function
  577 + NoAux -> true
  578 + | PastAux -> (try let poss = StringMap.find lexemes "być" in StringSet.mem poss "praet" with Not_found -> false)
  579 + | FutAux -> (try let poss = StringMap.find lexemes "być" in StringSet.mem poss "bedzie" with Not_found -> false)
  580 + | ImpAux -> StringMap.mem lexemes "niech" || StringMap.mem lexemes "niechaj" || StringMap.mem lexemes "niechże" || StringMap.mem lexemes "niechajże"
  581 +
  582 +let reduce_frame_atrs pos lexemes = function
  583 + Frame(NounAtrs _,_) -> true
  584 + | Frame(AdjAtrs _,_) -> true
  585 + | Frame(EmptyAtrs _,_) -> true
  586 + | Frame(PersAtrs(_,_,negation,mood,_,aux,_),_) -> reduce_frame_negation lexemes negation && reduce_frame_mood lexemes mood && reduce_frame_aux lexemes aux
  587 + | Frame(NonPersAtrs(_,_,_,_,negation,_),_) -> if pos = "pact" || pos = "ppas" then true else reduce_frame_negation lexemes negation
  588 + | Frame(GerAtrs(_,_,negation,_),_) -> reduce_frame_negation lexemes negation
  589 + | Frame(_,_) as frame -> failwith ("reduce_frame_atrs: " ^ ENIAMwalStringOf.frame "" frame)
  590 + | LexFrame _ -> true
  591 + | ComprepFrame _ -> true
  592 +
  593 +let rec reduce_frame_atrs_list pos lexemes = function
  594 + [] -> []
  595 + | frame :: l -> (if reduce_frame_atrs pos lexemes frame then [frame] else []) @ reduce_frame_atrs_list pos lexemes l
  596 +*)
  597 +
  598 +let _ =
  599 + let schemata = ENIAMwalReduce.merge_entries ENIAMwalParser.phrases ENIAMwalParser.schemata in
  600 + Entries.iter schemata (fun pos lemma (opinion,neg,pred,aspect,schema) ->
  601 + match pos with
  602 + "noun" -> ignore (transform_entry "subst" lemma neg pred aspect schema)
  603 + | "adj" -> ignore (transform_entry "adj" lemma neg pred aspect schema)
  604 + | "adv" -> ignore (transform_entry "adv" lemma neg pred aspect schema)
  605 + | "verb" ->
  606 + ignore (transform_entry "fin" lemma neg pred aspect schema);
  607 + ignore (transform_entry "praet" lemma neg pred aspect schema);
  608 + ignore (transform_entry "impt" lemma neg pred aspect schema);
  609 + ignore (transform_entry "imps" lemma neg pred aspect schema);
  610 + ignore (transform_entry "ger" lemma neg pred aspect schema);
  611 + ignore (transform_entry "pact" lemma neg pred aspect schema);
  612 + ignore (transform_entry "ppas" lemma neg pred aspect schema);
  613 + ignore (transform_entry "inf" lemma neg pred aspect schema);
  614 + ignore (transform_entry "pcon" lemma neg pred aspect schema);
  615 + ignore (transform_entry "pant" lemma neg pred aspect schema);
  616 + ignore (transform_entry "bedzie" lemma neg pred aspect schema);
  617 + ignore (transform_entry "winien" lemma neg pred aspect schema);
  618 + ignore (transform_entry "pred" lemma neg pred aspect schema);
  619 + ()
  620 + | _ -> failwith "unknown pos");
  621 + ignore (Entries.map ENIAMwalParser.entries transform_lex_entry);
  622 + ()
... ...
lexSemantics/ENIAMwalFrames.ml
1 1 (*
2   - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty".
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  2 + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
5 5 *
6 6 * This library is free software: you can redistribute it and/or modify
7 7 * it under the terms of the GNU Lesser General Public License as published by
... ... @@ -525,752 +525,6 @@ let remove_pro_args schema = (* FIXME: sprawdzić czy Pro i Null są zawsze na p
525 525 | s -> s :: schema))
526 526  
527 527  
528   -let simplify_pos = function
529   - "subst" -> "noun"
530   - | "depr" -> "noun"
531   - | "psubst" -> "noun"
532   - | "pdepr" -> "noun"
533   - | "adj" -> "adj"
534   - | "adjc" -> "adj"
535   - | "ger" -> "verb"
536   - | "pact" -> "verb"
537   - | "ppas" -> "verb"
538   - | "fin" -> "verb"
539   - | "bedzie" -> "verb"
540   - | "praet" -> "verb"
541   - | "winien" -> "verb"
542   - | "impt" -> "verb"
543   - | "imps" -> "verb"
544   - | "inf" -> "verb"
545   - | "pcon" -> "verb"
546   - | "pant" -> "verb"
547   - | "pred" -> "verb"
548   - | "ppron12" -> "pron"
549   - | "ppron3" -> "pron"
550   - | "siebie" -> "pron"
551   - | s -> s
552   -
553   -let transform_zeby = function
554   - Aff -> [Comp "że"]
555   - | Negation -> [Comp "że";Comp "żeby"]
556   - | NegationUndef -> [Comp "że";Comp "żeby"]
557   - | _ -> failwith "transform_zeby"
558   -
559   -let transform_gdy = function
560   - "indicative" -> [Comp "gdy"]
561   - | "imperative" -> [Comp "gdy"]
562   - | "conditional" -> [Comp "gdyby"]
563   - | "gerundial" -> [Comp "gdy"]
564   - | "" -> [Comp "gdy";Comp "gdyby"]
565   - | s -> failwith ("transform_gdy: " ^ s)
566   -
567   -let transform_comp negation mood = function
568   - Comp comp -> [Comp comp]
569   - | Zeby -> transform_zeby negation
570   - | Gdy -> transform_gdy mood
571   - | CompUndef -> [CompUndef](*failwith "transform_comp"*)
572   -
573   -let transform_str = function
574   - Aff -> [Case "acc"]
575   - | Negation -> [Case "gen"]
576   - | NegationUndef -> [Case "acc";Case "gen"]
577   - | _ -> failwith "transform_str"
578   -
579   -(* FIXME: wstawić wszędzie adj jako wariant PrepNP, ComprepNP i NP *)
580   -let transform_np_phrase = function
581   - NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)]
582   - | NP(CaseAgr) -> [NP(CaseAgr)(*;NumP(CaseAgr)*)]
583   - | AdjP(Case _) as morf -> [morf]
584   - | AdjP(CaseAgr) -> [AdjP(AllAgr)]
585   - | AdjP(AllAgr) -> [AdjP(AllAgr)]
586   - | AdjP(Str) -> [AdjP(AllAgr)]
587   - | PrepNP(sem,prep,Case case) -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)]
588   -(* | PrepNumP(_,Case _) as morf -> [morf] *)
589   - | ComprepNP _ as morf -> [morf]
590   - | ComparNP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> ComparNP(sem,prep,Case case))
591   - | ComparPP _ as morf -> [morf]
592   - | CP(ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> CP(ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji*)
593   - | NCP(Case c,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> NCP(Case c,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji*)
594   - | PrepNCP(sem,prep,Case case,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> PrepNCP(sem,prep,Case case,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji *)
595   - | PrepAdjP(sem,_,Case _) as morf -> [morf] (* to wygląda seryjny błąd w Walentym xp(abl[prepadjp(z,gen)]) *)
596   - | PrepNP(sem,prep,Str) -> List.flatten (Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)])) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *)
597   - | PrepAdjP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> PrepAdjP(sem,prep,Case case)) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *)
598   - | AdvP as morf -> [morf] (* FIXME: tu trafiają przysłówkowe realizacje, trzeba by je przetłumaczyć na przymiotniki *)
599   - | FixedP _ as morf -> [morf]
600   - | PrepP as morf -> [morf]
601   - | Or as morf -> [morf]
602   - | Pro as morf -> [morf]
603   - | Null as morf -> [morf]
604   - | phrase -> print_endline ("transform_np_phrase: " ^ ENIAMwalStringOf.phrase phrase); [phrase]
605   -
606   -let transform_np_pos = function
607   - | SUBST(_,Case _) as morf -> [morf]
608   - | SUBST(_,CaseAgr) as morf -> [morf]
609   - | ADJ(_,Case _,_,_) as morf -> [morf]
610   - | ADJ(n,CaseAgr,g,gr) -> [ADJ(n,AllAgr,g,gr)]
611   - | PACT(n,CaseAgr,g,a,neg,r) -> [PACT(n,AllAgr,g,a,neg,r)]
612   - | PPAS(_,Case _,_,_,_) as morf -> [morf]
613   - | PPAS(n,CaseAgr,g,a,neg) -> [PPAS(n,AllAgr,g,a,neg)]
614   - | ADJ(n,Str,g,gr) -> [ADJ(n,AllAgr,g,gr)]
615   - | PPAS(n,Str,g,a,neg) -> [PPAS(n,AllAgr,g,a,neg)]
616   - | PREP(Case _) as morf -> [morf]
617   - | ADV _ as morf -> [morf] (* FIXME: tu trafiają przysłówkowe realizacje, trzeba by je przetłumaczyć na przymiotniki *)
618   - | COMP _ as morf -> [morf]
619   - | QUB as morf -> [morf]
620   - | pos -> print_endline ("transform_np_pos: " ^ ENIAMwalStringOf.pos pos); [pos]
621   -
622   -let transform_adj_phrase = function
623   - NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)]
624   - | NP(Part) -> [NP(Case "gen");NP(Case "acc")(*;NumP(Case "gen");NumP(Case "acc")*)]
625   - | AdjP(CaseAgr) -> [AdjP(AllAgr)] (* jedno wystąpienie 'cały szczęśliwy', może się przydać podniesienie typu *)
626   - | PrepNP(sem,prep,Case case) -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)]
627   - | ComprepNP _ as morf -> [morf]
628   - | ComparNP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> ComparNP(sem,prep,Case case))
629   - | ComparPP _ as morf -> [morf]
630   - | CP(ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> CP(ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji*)
631   - | NCP(Case c,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> NCP(Case c,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji*)
632   - | PrepNCP(sem,prep,Case case,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> PrepNCP(sem,prep,Case case,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji *)
633   - | PrepAdjP(sem,_,Case _) as morf -> [morf]
634   - | PrepNP(sem,prep,Str) -> List.flatten (Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)])) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *)
635   - | PrepAdjP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> PrepAdjP(sem,prep,Case case)) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *)
636   - | InfP _ as morf -> [morf]
637   - | AdvP as morf -> [morf]
638   - | FixedP _ as morf -> [morf]
639   - | PrepP as morf -> [morf]
640   - | Or as morf -> [morf]
641   - | Pro as morf -> [morf]
642   - | Null as morf -> [morf]
643   - | morf -> print_endline ("transform_adj_phrase: " ^ ENIAMwalStringOf.phrase morf); [morf]
644   -
645   -let transform_adj_pos = function
646   - | SUBST(_,Case _) as morf -> [morf]
647   - | ADJ(n,CaseAgr,g,gr) -> [ADJ(n,AllAgr,g,gr)]
648   - | PREP(Case _) as morf -> [morf]
649   - | ADV _ as morf -> [morf]
650   - | morf -> print_endline ("transform_adj_pos: " ^ ENIAMwalStringOf.pos morf); [morf]
651   -
652   -let transform_prep_pos = function
653   - | SUBST(_,Case _) as morf -> [morf]
654   - | SUBST(n,Str) -> [SUBST(n,CaseAgr)]
655   - | NUM(Case _,_,_) as morf -> [morf]
656   - | ADJ(_,Case _,_,_) as morf -> [morf]
657   - | GER(_,Case _,_,_,_,_) as morf -> [morf]
658   - | PPAS(_,Case _,_,_,_) as morf -> [morf]
659   - | ADV _ as morf -> [morf]
660   - | QUB as morf -> [morf]
661   - | pos -> print_endline ("transform_prep_pos: " ^ ENIAMwalStringOf.pos pos); [pos]
662   -
663   -let transform_compar_phrase = function
664   - NP(Str) -> [NP CaseUndef(*;NumP(CaseUndef)*)] (* FIXME: ta sama sytuacja co w "jako" *)
665   - | FixedP _ as morf -> [morf]
666   - | phrase -> print_endline ("transform_compar_phrase: " ^ ENIAMwalStringOf.phrase phrase); [phrase]
667   -
668   -let transform_compar_pos = function
669   - | SUBST(_,Case _) as morf -> [morf]
670   - | ADJ(_,Case _,_,_) as morf -> [morf]
671   - | PREP(Case _) as morf -> [morf]
672   - | PPAS(_,Case _,_,_,_) as morf -> [morf]
673   - | SUBST(Number n,Str) -> [SUBST(Number n,CaseUndef)]
674   - | SUBST(NumberAgr,Str) -> [SUBST(NumberUndef,CaseUndef)]
675   - | SUBST(NumberUndef,Str) -> [SUBST(NumberUndef,CaseUndef)]
676   - | PPAS(NumberAgr,Str,GenderAgr,a,neg) -> [PPAS(NumberUndef,CaseUndef,GenderUndef,a,neg)] (* FIXME: ta sama sytuacja co w "jako" *)
677   - | PPAS(NumberAgr,CaseAgr,GenderAgr,a,neg) -> [PPAS(NumberUndef,CaseUndef,GenderUndef,a,neg)] (* FIXME: ta sama sytuacja co w "jako" *)
678   - | ADJ(NumberAgr,Str,GenderAgr,gr) -> [ADJ(NumberUndef,CaseUndef,GenderUndef,gr)] (* FIXME: ta sama sytuacja co w "jako" *)
679   - | ADJ(NumberAgr,CaseAgr,GenderAgr,gr) -> [ADJ(NumberUndef,CaseUndef,GenderUndef,gr)] (* FIXME: ta sama sytuacja co w "jako" *)
680   - | NUM(Case _,_,_) as morf -> [morf]
681   - | pos -> print_endline ("transform_compar_pos: " ^ ENIAMwalStringOf.pos pos); [pos]
682   -
683   -let transform_adv_phrase = function
684   - NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)]
685   - | PrepNP(sem,prep,Case case) -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)]
686   - | PrepNCP(sem,prep,Case case,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> PrepNCP(sem,prep,Case case,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji *)
687   - | ComprepNP _ as morf -> [morf]
688   - | CP(ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> CP(ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji*)
689   - | InfP _ as morf -> [morf]
690   - | AdvP as morf -> [morf]
691   - | Or as morf -> [morf]
692   - | Pro as morf -> [morf]
693   - | Null as morf -> [morf]
694   - | PrepAdjP(sem,_,Case _) as morf -> [morf]
695   - | PrepNP(sem,prep,Str) -> List.flatten (Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)])) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *)
696   - | PrepAdjP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> PrepAdjP(sem,prep,Case case)) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *)
697   - | ComparNP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> ComparNP(sem,prep,Case case))
698   - | ComparPP _ as morf -> [morf]
699   -(* | AdjP(CaseAgr) as morf -> [morf] *)
700   -(* | NCP(Case c,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> NCP(Case c,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji*)
701   - | PrepNCP(prep,Case case,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> PrepNCP(prep,Case case,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji *)
702   - | FixedP _ as morf -> [morf]*)
703   - | morf -> print_endline ("transform_adv_phrase: " ^ ENIAMwalStringOf.phrase morf); [morf]
704   -
705   -let transform_adv_pos = function
706   -(* | SUBST(_,Case _) as morf -> [morf]
707   - | ADJ(_,CaseAgr,_,_) as morf -> [morf]*)
708   - COMP _ as morf -> [morf]
709   - | PREP(Case _) as morf -> [morf]
710   - | ADV _ as morf -> [morf]
711   - | morf -> print_endline ("transform_adv_pos: " ^ ENIAMwalStringOf.pos morf); [morf]
712   -
713   -(*| Prepnp("jako",Str) as morf -> morf
714   - | Prepnp("jak",Str) as morf -> morf
715   - | Prepnp("niczym",Str) as morf -> morf
716   - | Prepadjp("jako",Str) as morf -> morf
717   - | Prepadjp("jak",Str) as morf -> morf
718   - | Prepadjp("niczym",Str) as morf -> morf
719   - | Compar "jako" as morf -> morf
720   - | Compar "jak" as morf -> morf
721   - | Compar "niczym" as morf -> morf
722   - | Compar "niż" as morf -> morf*)
723   -
724   -let transform_pers_subj_phrase negation mood = function (* FIXME: prepnp(na,loc) *)
725   - | NP(Str) -> [NP(NomAgr)(*;NumP(NomAgr)*)]
726   - | NCP(Str,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> NCP(NomAgr,ctype,comp))
727   - | CP(ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> CP(ctype,comp))
728   - | InfP _ as morf -> [morf]
729   - | Or as morf -> [morf]
730   - | NP(Part) -> [NP(Case "gen")(*;NP(Case "acc")*)(*;NumP(Case "gen");NumP(Case "acc")*)]
731   - | Pro -> [ProNG]
732   - | morf -> print_endline ("transform_pers_subj_phrase: " ^ ENIAMwalStringOf.phrase morf); [morf]
733   -
734   -let transform_pers_subj_pos negation mood = function
735   - COMP _ as morf -> [morf]
736   - | SUBST(n,Str) -> [SUBST(n,NomAgr)]
737   - | SUBST(n,Case "nom") -> [SUBST(n,NomAgr)] (* wygląda na błąd Walentego, ale nie ma znaczenia *)
738   - | NUM(Str,g,AcmUndef) -> [NUM(NomAgr,g,AcmUndef)]
739   - | ADJ(n,Str,g,gr) -> [ADJ(n,NomAgr,g,gr)]
740   - | morf -> print_endline ("transform_ger_subj_pos: " ^ ENIAMwalStringOf.pos morf); [morf]
741   -
742   -let transform_ger_subj_phrase negation mood control = function
743   - | NP(Str) -> [NP(Case "gen");PrepNP(NoSem,"przez",Case "acc")(*;NumP(Case "gen")*)(*;PrepNumP("przez",Case "acc")*)] (* FIXME: czy przez:acc jest możliwe? *)
744   - | NCP(Str,ctype,comp) -> List.flatten (Xlist.map (transform_comp negation mood comp) (fun comp -> [NCP(Case "gen",ctype,comp);PrepNCP(NoSem,"przez",Case "acc",ctype,comp)])) (* FIXME: czy przez:acc jest możliwe? *)
745   - | CP(ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> CP(ctype,comp)) (* FIXME: czy to jest możliwe? *)
746   - | InfP _ as morf -> [morf] (* FIXME: czy to jest możliwe? *)
747   - | Or as morf -> [morf]
748   - | NP(Part) -> [NP(Case "gen")(*;NP(Case "acc")*)(*;NumP(Case "gen");NumP(Case "acc")*)]
749   - | Pro -> if control then [Pro] else [Null]
750   - | morf -> print_endline ("transform_ger_subj_phrase: " ^ ENIAMwalStringOf.phrase morf); [morf]
751   -
752   -let transform_ger_subj_pos negation mood = function (* FIXME: ADV(_) *)
753   - COMP _ as morf -> [morf] (* FIXME: czy to jest możliwe? *)
754   - | SUBST(n,Str) -> [SUBST(n,Case "gen")]
755   - | SUBST(n,Case "nom") -> [SUBST(n,Case "gen")] (* wygląda na błąd Walentego, ale nie ma znaczenia *)
756   - | NUM(Str,g,AcmUndef) -> [NUM(Case "gen",g,AcmUndef)]
757   - | ADJ(n,Str,g,gr) -> [ADJ(n,Case "gen",g,gr)]
758   - | morf -> print_endline ("transform_pers_subj_pos: " ^ ENIAMwalStringOf.pos morf); [morf]
759   -
760   -let transform_ppas_subj_phrase negation mood control = function
761   - | NP(Str) -> [PrepNP(NoSem,"przez",Case "acc")(*;PrepNumP("przez",Case "acc")*)]
762   - | NCP(Str,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> PrepNCP(NoSem,"przez",Case "acc",ctype,comp))
763   - | CP(ctype,comp) -> [Null] (* zakładam, że w ramie jest też NCP *)
764   - | Pro -> if control then [Pro] else [Null]
765   - | morf -> print_endline ("transform_ppas_subj_phrase: " ^ ENIAMwalStringOf.phrase morf); [morf]
766   -
767   -let transform_pers_phrase negation mood = function
768   - | NP(Str) -> List.flatten (Xlist.map (transform_str negation) (fun case -> [NP case(*;NumP(case)*)]))
769   - | AdjP(Str) -> Xlist.map (transform_str negation) (fun case -> AdjP case) (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *)
770   - | NCP(Str,ctype,comp) -> List.flatten (Xlist.map (transform_str negation) (fun case -> Xlist.map (transform_comp negation mood comp) (fun comp -> NCP(case,ctype,comp))))
771   - | NP(Part) -> [NP(Case "gen");NP(Case "acc")(*;NumP(Case "gen");NumP(Case "acc")*)]
772   - | NCP(Part,ctype,comp) -> List.flatten (Xlist.map (transform_comp negation mood comp) (fun comp -> [NCP(Case "gen",ctype,comp);NCP(Case "acc",ctype,comp)]))
773   - | NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)]
774   - | PrepNP(sem,prep,Case case) -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)]
775   -(* | PrepNumP(_,Case _) as morf -> [morf] *)
776   - | ComprepNP _ as morf -> [morf]
777   - | NCP(Case case,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> NCP(Case case,ctype,comp))
778   - | PrepNCP(sem,prep,Case case,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> PrepNCP(sem,prep,Case case,ctype,comp))
779   - | AdjP(Case _) as morf -> [morf] (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *)
780   - | PrepAdjP(sem,_,Case _) as morf -> [morf] (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *)
781   - | PrepNP(sem,prep,Str) -> List.flatten (Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)])) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *)
782   - | PrepAdjP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> PrepAdjP(sem,prep,Case case)) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *)
783   - | ComparNP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> ComparNP(sem,prep,Case case))
784   - | ComparPP _ as morf -> [morf]
785   - | CP(ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> CP(ctype,comp))
786   - | InfP _ as morf -> [morf]
787   - | PadvP as morf -> [morf]
788   - | AdvP -> if mood = "gerundial" then [AdjP AllAgr] else [AdvP]
789   - | FixedP _ as morf -> [morf]
790   - | PrepP as morf -> [morf]
791   - | Or as morf -> [morf]
792   - | Lex "się" as morf -> [morf]
793   -(* | Refl as morf -> [morf] *)
794   -(* | Recip as morf -> [morf] *)
795   - | Pro as morf -> [morf]
796   - | Null as morf -> [morf]
797   - | morf -> print_endline ("transform_pers_phrase: " ^ ENIAMwalStringOf.phrase morf); [morf]
798   -
799   -let transform_pers_pos negation mood = function
800   - | SUBST(n,Str) -> Xlist.map (transform_str negation) (fun case -> SUBST(n,case))
801   - | NUM(Str,g,a) -> Xlist.map (transform_str negation) (fun case -> NUM(case,g,a))
802   - | ADJ(n,Str,g,gr) -> Xlist.map (transform_str negation) (fun case -> ADJ(n,case,g,gr))
803   - | PPAS(n,Str,g,a,neg) -> Xlist.map (transform_str negation) (fun case -> PPAS(n,Str,g,a,neg))
804   - | SUBST(n,Part) -> [SUBST(n,Case "gen");SUBST(n,Case "acc")]
805   - | SUBST(_,Case _) as morf -> [morf]
806   - | NUM(Case _,_,_) as morf -> [morf]
807   - | PREP(Case _) as morf -> [morf]
808   - | ADJ(_,Case _,_,_) as morf -> [morf]
809   - | PREP(Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> PREP(Case case)) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *)
810   - | SUBST(n,CaseAgr) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> SUBST(n,Case case)) (* FIXME: sprawdzić kto kontroluje! *)
811   - | ADJ(n,CaseAgr,g,gr) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> ADJ(n,Case case,g,gr)) (* FIXME: sprawdzić kto kontroluje! *)
812   - | COMPAR as morf -> [morf]
813   - | COMP _ as morf -> [morf]
814   - | INF _ as morf -> [morf]
815   - | ADV grad -> if mood = "gerundial" then [ADJ(NumberAgr,AllAgr,GenderAgr,grad)] else [ADV grad]
816   - | morf -> print_endline ("transform_pers_pos: " ^ ENIAMwalStringOf.pos morf); [morf]
817   -
818   -let transform_pers_schema negation mood schema =
819   - Xlist.map schema (fun s ->
820   - {s with morfs =
821   - if s.gf = SUBJ then List.flatten (Xlist.map s.morfs (function
822   - Phrase phrase -> Xlist.map (transform_pers_subj_phrase negation mood phrase) (fun phrase -> Phrase phrase)
823   - | E phrase -> Xlist.map (transform_pers_subj_phrase negation mood phrase) (fun phrase -> E phrase)
824   - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_subj_pos negation mood pos) (fun pos -> LexArg(id,pos,lex))
825   - | _ -> failwith "transform_fin_schema"))
826   - else List.flatten (Xlist.map s.morfs (function
827   - Phrase phrase -> Xlist.map (transform_pers_phrase negation mood phrase) (fun phrase -> Phrase phrase)
828   - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *)
829   - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation mood pos) (fun pos -> LexArg(id,pos,lex))
830   - | _ -> failwith "transform_fin_schema"))})
831   -
832   -let transform_impt_schema negation mood schema =
833   - Xlist.map schema (fun s ->
834   - {s with morfs =
835   - if s.gf = SUBJ then [Phrase ProNG]
836   - else List.flatten (Xlist.map s.morfs (function
837   - Phrase phrase -> Xlist.map (transform_pers_phrase negation mood phrase) (fun phrase -> Phrase phrase)
838   - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *)
839   - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation mood pos) (fun pos -> LexArg(id,pos,lex))
840   - | _ -> failwith "transform_impt_schema"))})
841   -
842   -let transform_imps_schema negation mood schema =
843   - Xlist.map schema (fun s ->
844   - {s with morfs =
845   - if s.gf = SUBJ then [Phrase Pro]
846   - else List.flatten (Xlist.map s.morfs (function
847   - Phrase phrase -> Xlist.map (transform_pers_phrase negation mood phrase) (fun phrase -> Phrase phrase)
848   - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *)
849   - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation mood pos) (fun pos -> LexArg(id,pos,lex))
850   - | _ -> failwith "transform_imps_chema"))})
851   -
852   -let transform_ger_schema negation schema = (* FIXME: zakładam, że ger zeruje mood, czy to prawda? *)
853   - Xlist.map schema (fun s ->
854   - {s with morfs =
855   - if s.gf = SUBJ then List.flatten (Xlist.map s.morfs (function
856   - Phrase phrase -> Xlist.map (transform_ger_subj_phrase negation "gerundial" (s.cr <> [] || s.ce <> []) phrase) (fun phrase -> Phrase phrase)
857   - | E phrase -> Xlist.map (transform_ger_subj_phrase negation "gerundial" (s.cr <> [] || s.ce <> []) phrase) (fun phrase -> E phrase)
858   - | LexArg(id,pos,lex) -> Xlist.map (transform_ger_subj_pos negation "gerundial" pos) (fun pos -> LexArg(id,pos,lex))
859   - | _ -> failwith "transform_fin_schema"))
860   - else List.flatten (Xlist.map s.morfs (function
861   - Phrase phrase -> Xlist.map (transform_pers_phrase negation "gerundial" phrase) (fun phrase -> Phrase phrase)
862   - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *)
863   - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation "gerundial" pos) (fun pos -> LexArg(id,pos,lex))
864   - | _ -> failwith "transform_fin_schema"))})
865   -
866   -let transform_padv_schema negation mood pro schema =
867   - Xlist.map schema (fun s ->
868   - {s with morfs =
869   - if s.gf = SUBJ then if s.ce = [] then if pro then [Phrase Pro] else [Phrase Null] else [Phrase Null] else
870   - List.flatten (Xlist.map s.morfs (function
871   - Phrase phrase -> Xlist.map (transform_pers_phrase negation mood phrase) (fun phrase -> Phrase phrase)
872   - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *)
873   - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation mood pos) (fun pos -> LexArg(id,pos,lex))
874   - | _ -> failwith "transform_fin_schema"))})
875   -
876   -let transform_pact_schema negation mood schema =
877   - Xlist.map schema (fun s ->
878   - {s with morfs =
879   - if s.gf = SUBJ then [Phrase Null]
880   - else List.flatten (Xlist.map s.morfs (function
881   - Phrase phrase -> Xlist.map (transform_pers_phrase negation mood phrase) (fun phrase -> Phrase phrase)
882   - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *)
883   - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation mood pos) (fun pos -> LexArg(id,pos,lex))
884   - | _ -> failwith "transform_pact_schema"))})
885   -
886   -let transform_ppas_schema negation mood schema =
887   - Xlist.map schema (fun s ->
888   - {s with morfs =
889   - if s.gf = OBJ then [Phrase Null] else
890   - if s.gf = SUBJ then List.flatten (Xlist.map s.morfs (function
891   - Phrase phrase -> Xlist.map (transform_ppas_subj_phrase negation mood (s.cr <> [] || s.ce <> []) phrase) (fun phrase -> Phrase phrase)
892   - | E phrase -> Xlist.map (transform_ppas_subj_phrase negation mood (s.cr <> [] || s.ce <> []) phrase) (fun phrase -> E phrase)
893   - | LexArg(id,SUBST(n,Str),lex) -> raise Not_found (* FIXME!!! *)
894   - | _ -> failwith "transform_ppas_schema"))
895   - else List.flatten (Xlist.map s.morfs (function
896   - Phrase phrase -> Xlist.map (transform_pers_phrase negation mood phrase) (fun phrase -> Phrase phrase)
897   - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *)
898   - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation mood pos) (fun pos -> LexArg(id,pos,lex))
899   - | _ -> failwith "transform_ppas_schema"))})
900   -
901   -let add_padv schema =
902   - List.flatten (Xlist.map schema (fun s ->
903   - if s.gf = SUBJ then
904   - match s.cr with
905   - [] -> [{s with cr=["3"]}; let s = adjunct_schema_field "" Both [Phrase Null;Phrase PadvP] in {s with ce=["3"]}]
906   - | [cr] -> [s; let s = adjunct_schema_field "" Both [Phrase Null;Phrase PadvP] in {s with ce=[cr]}]
907   - | _ -> failwith "add_padv"
908   - else [s]))
909   -
910   -let transform_np_schema schema =
911   - Xlist.map schema (fun s ->
912   - {s with morfs=List.flatten (Xlist.map s.morfs (function
913   - Phrase phrase -> Xlist.map (transform_np_phrase phrase) (fun phrase -> Phrase phrase)
914   -(* | LexArg(id,ADV _,lex) as morf -> print_endline (ENIAMwalStringOf.morf morf); [morf] *)
915   - | LexArg(id,pos,lex) -> Xlist.map (transform_np_pos pos) (fun pos -> LexArg(id,pos,lex))
916   - | Multi[AdjP AllAgr] -> [Multi[AdjP AllAgr]]
917   - | _ -> failwith "transform_np_schema"))})
918   -
919   -let transform_num_schema acm schema =
920   - Xlist.map schema (fun s ->
921   - {s with morfs=List.flatten (Xlist.map s.morfs (function (* kierunek argumentu został dodany w expand_lexicalizations_morfs *)
922   - | Phrase Pro -> [Phrase Pro]
923   - | LexArg(id,SUBST(NumberUndef,CaseUndef),lex) ->
924   - (match acm with
925   - Acm "rec" -> [LexArg(id,SUBST(NumberUndef,GenAgr),lex)]
926   - | Acm "congr" -> [LexArg(id,SUBST(NumberUndef,AllAgr),lex)]
927   - | _ -> failwith "transform_num_schema")
928   - | morf -> failwith ("transform_num_schema: " ^ ENIAMwalStringOf.morf morf)))})
929   -
930   -let transform_adj_schema schema =
931   - Xlist.map schema (fun s ->
932   - {s with morfs=List.flatten (Xlist.map s.morfs (function
933   - Phrase phrase -> Xlist.map (transform_adj_phrase phrase) (fun phrase -> Phrase phrase)
934   - | LexArg(id,pos,lex) -> Xlist.map (transform_adj_pos pos) (fun pos -> LexArg(id,pos,lex))
935   - | _ -> failwith "transform_adj_schema"))})
936   -
937   -let transform_adv_schema schema =
938   - Xlist.map schema (fun s ->
939   - {s with morfs=List.flatten (Xlist.map s.morfs (function
940   - Phrase phrase -> Xlist.map (transform_adv_phrase phrase) (fun phrase -> Phrase phrase)
941   - | LexArg(id,pos,lex) -> Xlist.map (transform_adv_pos pos) (fun pos -> LexArg(id,pos,lex))
942   - | _ -> failwith "transform_adv_schema"))})
943   -
944   -let transform_prep_schema schema =
945   - Xlist.map schema (fun s ->
946   - {s with morfs=List.flatten (Xlist.map s.morfs (function
947   - Phrase(NumP(case)) -> [Phrase(NumP(case))]
948   - | LexArg(id,pos,lex) -> Xlist.map (transform_prep_pos pos) (fun pos -> LexArg(id,pos,lex))
949   - | morf -> failwith ("transform_prep_schema: " ^ ENIAMwalStringOf.morf morf)))})
950   -
951   -let transform_compar_schema schema =
952   - Xlist.map schema (fun s ->
953   - {s with morfs=List.flatten (Xlist.map s.morfs (function
954   - Phrase phrase -> Xlist.map (transform_compar_phrase phrase) (fun phrase -> Phrase phrase)
955   - | LexArg(id,pos,lex) -> Xlist.map (transform_compar_pos pos) (fun pos -> LexArg(id,pos,lex))
956   - | morf -> failwith ("transform_compar_schema: " ^ ENIAMwalStringOf.morf morf)))})
957   -
958   -let transform_comp_schema schema = (* kierunek argumentu został dodany w expand_lexicalizations_morfs *)
959   - Xlist.map schema (fun s ->
960   - {s with morfs=List.flatten (Xlist.map s.morfs (function
961   - | LexArg(_,PERS _,_) as morf -> [morf]
962   - | morf -> failwith ("transform_comp_schema: " ^ ENIAMwalStringOf.morf morf)))})
963   -
964   -let transform_qub_schema schema =
965   - Xlist.map schema (fun s ->
966   - {s with morfs=List.flatten (Xlist.map s.morfs (function
967   - | LexArg(_,PERS _,_) as morf -> [morf]
968   - | morf -> failwith ("transform_qub_schema: " ^ ENIAMwalStringOf.morf morf)))})
969   -
970   -let rec remove_adj_agr = function
971   - [] -> []
972   - | {morfs=[Phrase Null;Phrase(AdjP(CaseAgr))]} :: l -> remove_adj_agr l
973   - | {morfs=[Phrase Null;Phrase(AdjP(Part))]} :: l -> remove_adj_agr l
974   - | s :: l -> (*print_endline (ENIAMwalStringOf.schema [s]);*) s :: (remove_adj_agr l)
975   -
976   -let rec get_role gf = function
977   - [] -> raise Not_found
978   - | s :: l -> if s.gf = gf then s.role,s.role_attr else get_role gf l
979   -
980   -let expand_negation = function
981   - Negation -> [Negation]
982   - | Aff -> [Aff]
983   - | NegationUndef -> [Negation;Aff]
984   - | NegationNA -> failwith "expand_negation"
985   -
986   -let expand_aspect = function
987   - Aspect s -> [Aspect s]
988   - | AspectUndef -> [Aspect "imperf";Aspect "perf"]
989   - | AspectNA -> failwith "expand_aspect"
990   -
991   -let load_list filename =
992   - Str.split (Str.regexp "\n") (File.load_file filename)
993   -
994   -let subst_uncountable_lexemes = StringSet.of_list (load_list subst_uncountable_lexemes_filename)
995   -let subst_uncountable_lexemes2 = StringSet.of_list (load_list subst_uncountable_lexemes_filename2)
996   -let subst_container_lexemes = StringSet.of_list (load_list subst_container_lexemes_filename)
997   -let subst_numeral_lexemes = StringSet.of_list (load_list subst_numeral_lexemes_filename)
998   -let subst_time_lexemes = StringSet.of_list (load_list subst_time_lexemes_filename)
999   -
1000   -let subst_pronoun_lexemes = StringSet.of_list ["co"; "kto"; "cokolwiek"; "ktokolwiek"; "nic"; "nikt"; "coś"; "ktoś"; "to"]
1001   -let adj_pronoun_lexemes = StringSet.of_list ["czyj"; "jaki"; "który"; "jakiś"; "ten"; "taki"]
1002   -
1003   -(* let adj_quant_lexemes = StringSet.of_list ["każdy"; "wszelki"; "wszystek"; "żaden"; "jakiś"; "pewien"; "niektóry"; "jedyny"; "sam"] *)
1004   -
1005   -let empty_valence_lexemes = StringSet.union subst_pronoun_lexemes adj_pronoun_lexemes
1006   -
1007   -
1008   -let noun_type lemma pos =
1009   - let nsyn =
1010   - if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then "pronoun" else
1011   - if pos = "psubst" || pos = "pdepr" || pos = "date" then "proper" else
1012   - if StringSet.mem subst_pronoun_lexemes lemma then "pronoun" else
1013   - "common" in
1014   - let nsem =
1015   - if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then [Common "count"] else
1016   - if StringSet.mem subst_time_lexemes lemma then [Time] else
1017   - let l = ["count"] in
1018   - let l = if StringSet.mem subst_uncountable_lexemes lemma || StringSet.mem subst_uncountable_lexemes2 lemma then "mass" :: l else l in
1019   - let l = if StringSet.mem subst_container_lexemes lemma then "measure" :: l else l in
1020   - Xlist.map l (fun s -> Common s) in
1021   - nsyn,nsem
1022   -
1023   -let adj_type lemma = (* FIXME: typy przymiotników wymagają zbadania - przejrzenia listy przymiotników *)
1024   - let adjsyn = if StringSet.mem adj_pronoun_lexemes lemma then "pronoun" else "common" in (* FIXME: dodać heurystykę uwzględniającą wielkość liter aby wykrywać proper np. Oświęcimski*)
1025   - adjsyn
1026   -
1027   -let transform_frame lexeme pos = function (* FIXME: dodać tutaj typy rzeczowników *)
1028   - Frame(DefaultAtrs(meanings,refl,opinion,negation,pred,aspect),schema) as frame ->
1029   - if pos = "subst" || pos = "depr" || pos = "psubst" || pos = "pdepr" || pos = "ppron12" || pos = "ppron3" || pos = "siebie" then (
1030   - if refl <> ReflEmpty || negation <> NegationNA || pred <> PredNA || aspect <> AspectNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame);
1031   - let nsyn,nsem(*,typ*) = noun_type lexeme pos in
1032   - let schema = if nsyn = "pronoun" then [] else (remove_adj_agr schema) @ noun_adjuncts in (* FIXME: remove_adj_agr jest w słowniku tymczasowo *)
1033   -(* List.flatten (Xlist.map typ (fun typ -> *)
1034   - Xlist.map nsem (fun nsem -> Frame(NounAtrs(meanings,nsyn,nsem(*,typ*)),transform_np_schema schema)))(* ))*) else
1035   - if pos = "symbol" || pos = "date" || pos = "date-interval" || pos = "hour" || pos = "hour-minute" || pos = "hour-interval" || pos = "hour-minute-interval" ||
1036   - pos = "year" || pos = "year-interval" || pos = "day" || pos = "day-interval" || pos = "day-month" || pos = "day-month-interval" ||
1037   - pos = "match-result" || pos = "month-interval" || pos = "roman" || pos = "roman-interval" || pos = "url" || pos = "email" || pos = "obj-id" then
1038   - let nsyn,nsem = "proper",[Common "count"] in
1039   - Xlist.map nsem (fun nsem -> Frame(NounAtrs(meanings,nsyn,nsem),transform_np_schema schema)) else
1040   - if pos = "adj" || pos = "adjc" || pos = "adjp" || pos = "ordnum" then (
1041   - if refl <> ReflEmpty || negation <> NegationNA || aspect <> AspectNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame);
1042   - let adjsyn(*,adjsem,typ*) = adj_type lexeme in
1043   - let schema = if pos = "adjp" || pos = "ordnum" then schema else if adjsyn = "pronoun" then [] else schema @ adj_adjuncts in
1044   - let case = match pred with Pred -> Case "pred" | PredNA -> CaseUndef in
1045   -(* Xlist.map typ (fun typ -> *)
1046   - [Frame(AdjAtrs(meanings,case,adjsyn(*,adjsem,typ*)),transform_adj_schema schema)])(* )*) else
1047   - if pos = "adv" then (
1048   - if refl <> ReflEmpty || negation <> NegationNA || pred <> PredNA || aspect <> AspectNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); (* FIXME: typy przysłówków *)
1049   - [Frame(EmptyAtrs meanings,transform_adv_schema (remove_adj_agr schema))]) else
1050   - if pos = "fin" then (
1051   - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame);
1052   - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in
1053   - let schema = (add_padv schema) @ verb_adjuncts in
1054   - List.flatten (Xlist.map (expand_negation negation) (fun negation ->
1055   - Xlist.map (expand_aspect aspect) (function
1056   - Aspect "imperf" -> Frame(PersAtrs(meanings,s,negation,"indicative","pres",NoAux,Aspect "imperf"), transform_pers_schema negation "indicative" schema)
1057   - | Aspect "perf" -> Frame(PersAtrs(meanings,s,negation,"indicative","fut",NoAux,Aspect "perf"), transform_pers_schema negation "indicative" schema)
1058   - | _ -> failwith "transform_frame") @
1059   - [Frame(PersAtrs(meanings,s,negation,"imperative","fut",ImpAux,aspect), transform_pers_schema negation "imperative" schema)]))) else
1060   - if pos = "bedzie" then (
1061   - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame);
1062   - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in
1063   - let schema = (add_padv schema) @ verb_adjuncts in
1064   - List.flatten (Xlist.map (expand_negation negation) (fun negation ->
1065   - Xlist.map (expand_aspect aspect) (function
1066   - Aspect "imperf" -> Frame(PersAtrs(meanings,s,negation,"indicative","fut",NoAux,Aspect "imperf"), transform_pers_schema negation "indicative" schema)
1067   - | Aspect "perf" -> Frame(PersAtrs(meanings,s,negation,"indicative","fut",NoAux,Aspect "perf"), transform_pers_schema negation "indicative" schema) (* FIXME: niepotrzebne *)
1068   - | _ -> failwith "transform_frame")))) else
1069   - if pos = "praet" then (
1070   - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame);
1071   - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in
1072   - let schema = (add_padv schema) @ verb_adjuncts in
1073   - List.flatten (Xlist.map (expand_negation negation) (fun negation ->
1074   - List.flatten (Xlist.map (expand_aspect aspect) (function
1075   - Aspect "imperf" ->
1076   - [Frame(PersAtrs(meanings,s,negation,"indicative","past",NoAux,Aspect "imperf"), transform_pers_schema negation "indicative" schema);
1077   - Frame(PersAtrs(meanings,s,negation,"conditional","past",NoAux,Aspect "imperf"), transform_pers_schema negation "conditional" schema);
1078   - Frame(PersAtrs(meanings,s,negation,"indicative","fut",FutAux,Aspect "imperf"), transform_pers_schema negation "indicative" schema)]
1079   - | Aspect "perf" ->
1080   - [Frame(PersAtrs(meanings,s,negation,"indicative","past",NoAux,Aspect "perf"), transform_pers_schema negation "indicative" schema);
1081   - Frame(PersAtrs(meanings,s,negation,"conditional","past",NoAux,Aspect "perf"), transform_pers_schema negation "conditional" schema)]
1082   - | _ -> failwith "transform_frame"))))) else
1083   - if pos = "winien" then (
1084   - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame);
1085   - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in
1086   - let schema = (add_padv schema) @ verb_adjuncts in
1087   - List.flatten (Xlist.map (expand_negation negation) (fun negation ->
1088   - List.flatten (Xlist.map (expand_aspect aspect) (fun aspect ->
1089   - [Frame(PersAtrs(meanings,s,negation,"indicative","pres",NoAux,aspect), transform_pers_schema negation "indicative" schema);
1090   - Frame(PersAtrs(meanings,s,negation,"conditional","past",NoAux,aspect), transform_pers_schema negation "conditional" schema);
1091   - Frame(PersAtrs(meanings,s,negation,"indicative","past",PastAux,aspect), transform_pers_schema negation "indicative" schema)]))))) else
1092   - if pos = "impt" then (
1093   - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame);
1094   - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in
1095   - let schema = (add_padv schema) @ verb_adjuncts in
1096   - Xlist.map (expand_negation negation) (fun negation ->
1097   - Frame(PersAtrs(meanings,s,negation,"imperative","fut",NoAux,aspect),transform_impt_schema negation "imperative" schema))) else
1098   - if pos = "imps" then (
1099   - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame);
1100   - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in
1101   - let schema = (add_padv schema) @ verb_adjuncts in
1102   - Xlist.map (expand_negation negation) (fun negation ->
1103   - Frame(PersAtrs(meanings,s,negation,"indicative","past",NoAux,aspect),transform_imps_schema negation "indicative" schema))) else
1104   - if pos = "pred" then (
1105   - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame);
1106   - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in
1107   - let schema = (add_padv schema) @ verb_adjuncts in
1108   - List.flatten (Xlist.map (expand_negation negation) (fun negation ->
1109   - [Frame(PersAtrs(meanings,s,negation,"indicative","pres",NoAux,aspect), transform_pers_schema negation "indicative" schema);
1110   - Frame(PersAtrs(meanings,s,negation,"indicative","fut",FutAux,aspect), transform_pers_schema negation "indicative" schema);
1111   - Frame(PersAtrs(meanings,s,negation,"indicative","past",PastAux,aspect), transform_pers_schema negation "indicative" schema)]))) else
1112   - if pos = "pcon" || pos = "pant" || pos = "inf" then (
1113   - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame);
1114   - let role,role_attr = try get_role SUBJ schema with Not_found -> "Initiator","" in
1115   - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in
1116   - let schema = schema @ verb_adjuncts in
1117   - Xlist.map (expand_negation negation) (fun negation ->
1118   - Frame(NonPersAtrs(meanings,s,role,role_attr,negation,aspect),transform_padv_schema negation "indicative" true schema))) else
1119   - if pos = "pact" then (
1120   - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame);
1121   - try
1122   - let role,role_attr = try get_role SUBJ schema with Not_found -> "Initiator","" in
1123   - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in
1124   - let schema = schema @ verb_adjuncts in
1125   - Xlist.map (expand_negation negation) (fun negation ->
1126   - Frame(NonPersAtrs(meanings,s,role,role_attr,negation,aspect),transform_pact_schema negation "indicative" schema))
1127   - with Not_found -> []) else
1128   - if pos = "ppas" then (
1129   - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame);
1130   - try
1131   - let role,role_attr = try get_role OBJ schema with Not_found -> "Theme","" in
1132   - let s,schema = if refl = ReflSie then raise Not_found else lexeme, schema in
1133   - let schema = schema @ verb_adjuncts in
1134   - Xlist.map (expand_negation negation) (fun negation ->
1135   - Frame(NonPersAtrs(meanings,s,role,role_attr,negation,aspect),transform_ppas_schema negation "indicative" schema))
1136   - with Not_found -> []) else
1137   - if pos = "ger" then (
1138   - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame);
1139   - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in (* FIXME: czy ger może mieć niesemantyczne się? *)
1140   - let schema = schema @ verb_adjuncts in
1141   - Xlist.map (expand_negation negation) (fun negation ->
1142   - Frame(GerAtrs(meanings,s,negation,aspect),transform_ger_schema negation schema))) else
1143   - failwith ("transform_frame: " ^ pos)
1144   - | LexFrame(id,pos,NoRestr,schema) as frame ->
1145   - (match pos with
1146   - SUBST _ -> [LexFrame(id,pos,NoRestr,transform_np_schema schema)]
1147   - | PREP _ -> [LexFrame(id,pos,NoRestr,transform_prep_schema schema)]
1148   - | NUM(c,g,AcmUndef) ->
1149   - Xlist.map [Acm "congr";Acm "rec"] (fun acm ->
1150   - LexFrame(id,NUM(c,g,acm),NoRestr,transform_num_schema acm schema))
1151   - | ADJ(n,c,g,gr) -> [LexFrame(id,pos,NoRestr,transform_adj_schema schema)]
1152   - | ADV(gr) -> [LexFrame(id,pos,NoRestr,transform_adv_schema schema)]
1153   - | GER(n,c,g,a,negation,ReflEmpty) ->
1154   - Xlist.map (expand_negation negation) (fun negation ->
1155   - LexFrame(id,GER(n,c,g,a,negation,ReflEmpty),NoRestr,transform_ger_schema negation schema))
1156   - | PACT(n,c,g,a,negation,ReflEmpty) ->
1157   - Xlist.map (expand_negation negation) (fun negation ->
1158   - LexFrame(id,PACT(n,c,g,a,negation,ReflEmpty),NoRestr,transform_pact_schema negation "indicative" schema))
1159   - | PPAS(n,c,g,a,negation) ->
1160   - Xlist.map (expand_negation negation) (fun negation ->
1161   - LexFrame(id,PPAS(n,c,g,a,negation),NoRestr,transform_ppas_schema negation "indicative" schema))
1162   - | INF(a,negation,r) ->
1163   - Xlist.map (expand_negation negation) (fun negation ->
1164   - LexFrame(id,INF(a,negation,r),NoRestr,transform_padv_schema negation "indicative" false schema))
1165   - | QUB -> [LexFrame(id,pos,NoRestr,transform_qub_schema schema)]
1166   - | COMPAR -> [LexFrame(id,pos,NoRestr,transform_compar_schema schema)]
1167   - | COMP _ -> [LexFrame(id,pos,NoRestr,transform_comp_schema schema)]
1168   - | PERS(negation,r) ->
1169   - Xlist.map (expand_negation negation) (fun negation ->
1170   - LexFrame(id,PERS(negation,r),NoRestr,transform_pers_schema negation "indicative" schema))
1171   - | _ -> failwith ("transform_frame:" ^ ENIAMwalStringOf.frame lexeme frame))
1172   - | ComprepFrame(s,pos,NoRestr,schema) as frame ->
1173   - (match pos with
1174   - PREP _ -> [ComprepFrame(s,pos,NoRestr,transform_prep_schema schema)]
1175   - | ADV _ -> [ComprepFrame(s,pos,NoRestr,transform_adv_schema schema)]
1176   - | _ -> failwith ("transform_frame:" ^ ENIAMwalStringOf.frame lexeme frame))
1177   - | frame -> failwith ("transform_frame:" ^ ENIAMwalStringOf.frame lexeme frame)
1178   -
1179   -let reduce_frame_negation lexemes = function
1180   - Negation -> StringMap.mem lexemes "nie"
1181   - | _ -> true
1182   -
1183   -let reduce_frame_mood lexemes = function
1184   - "conditional" -> StringMap.mem lexemes "by"
1185   - | _ -> true
1186   -
1187   -let reduce_frame_aux lexemes = function
1188   - NoAux -> true
1189   - | PastAux -> (try let poss = StringMap.find lexemes "być" in StringSet.mem poss "praet" with Not_found -> false)
1190   - | FutAux -> (try let poss = StringMap.find lexemes "być" in StringSet.mem poss "bedzie" with Not_found -> false)
1191   - | ImpAux -> StringMap.mem lexemes "niech" || StringMap.mem lexemes "niechaj" || StringMap.mem lexemes "niechże" || StringMap.mem lexemes "niechajże"
1192   -
1193   -let reduce_frame_atrs pos lexemes = function
1194   - Frame(NounAtrs _,_) -> true
1195   - | Frame(AdjAtrs _,_) -> true
1196   - | Frame(EmptyAtrs _,_) -> true
1197   - | Frame(PersAtrs(_,_,negation,mood,_,aux,_),_) -> reduce_frame_negation lexemes negation && reduce_frame_mood lexemes mood && reduce_frame_aux lexemes aux
1198   - | Frame(NonPersAtrs(_,_,_,_,negation,_),_) -> if pos = "pact" || pos = "ppas" then true else reduce_frame_negation lexemes negation
1199   - | Frame(GerAtrs(_,_,negation,_),_) -> reduce_frame_negation lexemes negation
1200   - | Frame(_,_) as frame -> failwith ("reduce_frame_atrs: " ^ ENIAMwalStringOf.frame "" frame)
1201   - | LexFrame _ -> true
1202   - | ComprepFrame _ -> true
1203   -
1204   -let rec reduce_frame_atrs_list pos lexemes = function
1205   - [] -> []
1206   - | frame :: l -> (if reduce_frame_atrs pos lexemes frame then [frame] else []) @ reduce_frame_atrs_list pos lexemes l
1207   -
1208   -let find_frames lexemes =
1209   -(* print_endline "find_frames 1"; *)
1210   - let valence = StringMap.fold lexemes StringMap.empty (fun valence lexeme poss ->
1211   -(* let poss = StringSet.fold poss StringSet.empty (fun poss pos -> StringSet.add poss (simplify_pos pos)) in *)
1212   -(* Printf.printf "find_frame: %s |%s|\n" s (String.concat " " (StringSet.to_list lexemes)); *)
1213   - StringSet.fold poss valence (fun valence pos ->
1214   - let valence =
1215   - let frames_sem = try StringMap.find (StringMap.find walenty (simplify_pos pos)) lexeme with Not_found -> [] in
1216   -(* if frames_sem <> [] then Printf.printf "%s %s in TEI\n%!" lexeme pos; *)
1217   - if frames_sem <> [] then
1218   - Xlist.fold frames_sem valence (fun valence frame ->
1219   - convert_frame_sem expands subtypes equivs lexemes valence lexeme pos frame)
1220   - else
1221   - let frames = match simplify_pos pos with
1222   - "verb" -> ((*try StringMap.find verb_frames lexeme with Not_found ->*) ["verb","","","","",""])
1223   - | "noun" -> ((*try StringMap.find noun_frames lexeme with Not_found ->*) if StringSet.mem empty_valence_lexemes lexeme then ["empty","","","","",""] else ["noun","","","","",""])
1224   - | "adj" -> ((*try StringMap.find adj_frames lexeme with Not_found ->*) if StringSet.mem empty_valence_lexemes lexeme then ["empty","","","","",""] else ["adj","","","","",""])
1225   - | "adv" -> ((*try StringMap.find adv_frames lexeme with Not_found ->*) ["adv","","","","",""])
1226   - | "pron" -> ["empty","","","","",""]
1227   - | "adjp" -> ["empty","","","","",""]
1228   - | "ordnum" -> ["empty","","","","",""]
1229   - | "symbol" -> ["empty","","","","",""]
1230   - | "date" -> ["date","","","","",""]
1231   - | "date-interval" -> ["empty","","","","",""]
1232   - | "hour" -> ["hour","","","","",""]
1233   - | "hour-minute" -> ["hour","","","","",""]
1234   - | "hour-interval" -> ["empty","","","","",""]
1235   - | "hour-minute-interval" -> ["empty","","","","",""]
1236   - | "year" -> ["empty","","","","",""]
1237   - | "year-interval" -> ["empty","","","","",""]
1238   - | "day" -> ["day","","","","",""]
1239   - | "day-interval" -> ["day","","","","",""]
1240   - | "day-month" -> ["date2","","","","",""]
1241   - | "day-month-interval" -> ["empty","","","","",""]
1242   - | "match-result" -> ["empty","","","","",""]
1243   - | "month-interval" -> ["empty","","","","",""]
1244   - | "roman" -> ["empty","","","","",""]
1245   - | "roman-interval" -> ["empty","","","","",""]
1246   - | "url" -> ["empty","","","","",""]
1247   - | "email" -> ["empty","","","","",""]
1248   - | "obj-id" -> ["empty","","","","",""]
1249   - | _ -> [] in
1250   -(* if frames = [] then valence else
1251   - Printf.printf "find_frame: %s |l|=%d\n" s (Xlist.size l); *)
1252   - Xlist.fold frames valence (fun valence frame ->
1253   - convert_frame expands subtypes equivs lexemes valence lexeme pos frame) in
1254   - Xlist.fold ((*try StringMap.find compreps lexeme with Not_found ->*) []) valence (fun valence (cpos,frame) -> (* FIXME: na razie przyimki złożone są wyłączone *)
1255   - if cpos = pos then convert_comprep_frame expands subtypes equivs lexemes valence lexeme pos frame else valence))) in
1256   -(* print_endline "find_frames 2"; *)
1257   - let valence = StringMap.mapi valence (fun lexeme poss ->
1258   - StringMap.mapi poss (fun pos frames ->
1259   - List.flatten (Xlist.map frames (fun frame ->
1260   -(* print_endline ("find_frames: " ^ ENIAMwalStringOf.frame lexeme frame); *)
1261   - expand_restr valence lexeme pos frame)))) in
1262   -(* print_endline "find_frames 3"; *)
1263   - let valence = StringMap.mapi valence (fun lexeme poss ->
1264   - StringMap.mapi poss (fun pos frames ->
1265   - reduce_frame_atrs_list pos lexemes (List.flatten (Xlist.map frames (transform_frame lexeme pos))))) in
1266   -(* let valence = StringMap.mapi valence (fun lexeme poss ->
1267   - StringMap.mapi poss (fun pos frames ->
1268   - Xlist.map frames (assign_thematic_role pos))) in*)
1269   -(* StringMap.iter valence (fun lexeme poss ->
1270   - StringMap.iter poss (fun pos frames ->
1271   - Xlist.iter frames (fun frame -> print_endline (ENIAMwalStringOf.frame lexeme frame))));*)
1272   -(* print_endline "find_frames 4"; *)
1273   - valence
1274 528  
1275 529 (*let _ =
1276 530 let valence = Xlist.fold all_frames StringMap.empty (fun valence (pos,frame_map) ->
... ...
lexSemantics/ENIAMwalParser.ml
1 1 (*
2   - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty".
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  2 + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
5 5 *
6 6 * This library is free software: you can redistribute it and/or modify
7 7 * it under the terms of the GNU Lesser General Public License as published by
... ... @@ -156,7 +156,7 @@ let parse_negation = function
156 156 (* | [Text ""] -> NegationNA *)
157 157 | l -> failwith ("parse_negation: " ^ string_of_token_list l)
158 158  
159   -let parse_refl = function
  159 +(* let parse_refl = function
160 160 (* [] -> ReflEmpty
161 161 | [Text "się"] -> ReflSie
162 162 | [Text ""] -> ReflEmpty
... ... @@ -164,7 +164,7 @@ let parse_refl = function
164 164 | [Text "true"] -> ReflSie *)
165 165 | [Text "nosię"] -> ReflFalse
166 166 | [Text "się"] -> ReflTrue
167   - | l -> failwith ("parse_refl: " ^ string_of_token_list l)
  167 + | l -> failwith ("parse_refl: " ^ string_of_token_list l) *)
168 168  
169 169 let parse_ctype = function
170 170 [Text "int"] -> Int
... ... @@ -172,11 +172,11 @@ let parse_ctype = function
172 172 | [Text "_"] -> CompTypeUndef
173 173 | l -> failwith ("parse_ctype: " ^ string_of_token_list l)
174 174  
175   -let parse_acm = function
  175 +(* let parse_acm = function
176 176 (* [Text "int"] -> Int
177 177 | [Text "rel"] -> Rel *)
178 178 | [Text "_"] -> AcmUndef
179   - | l -> failwith ("parse_acm: " ^ string_of_token_list l)
  179 + | l -> failwith ("parse_acm: " ^ string_of_token_list l) *)
180 180  
181 181 let parse_comp = function
182 182 | [Text "co"] -> Comp "co" (* subst qub prep comp *)
... ... @@ -234,17 +234,17 @@ let parse_pos = function
234 234 | "PPRON3",[number;case] -> PPRON3(parse_number number,parse_case case)
235 235 | "SIEBIE",[case] -> SIEBIE(parse_case case)
236 236 | "PREP",[case] -> PREP(parse_case case)
237   - | "NUM",[case;gender;acm] -> NUM(parse_case case,parse_gender gender,parse_acm acm)
  237 + | "NUM",[case;gender] -> NUM(parse_case case,parse_gender gender)
238 238 | "ADJ",[number;case;gender;grad] -> ADJ(parse_number number,parse_case case,parse_gender gender,parse_grad grad)
239 239 | "ADV",[grad] -> ADV(parse_grad grad)
240   - | "GER",[number;case;gender;aspect;negation;refl] -> GER(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation,parse_refl refl)
  240 + | "GER",[number;case;gender;aspect;negation] -> GER(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation)
241 241 | "PPAS",[number;case;gender;aspect;negation] -> PPAS(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation)
242   - | "PACT",[number;case;gender;aspect;negation;refl] -> PACT(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation,parse_refl refl)
243   - | "INF",[aspect;negation;refl] -> INF(parse_aspect aspect,parse_negation negation,parse_refl refl)
  242 + | "PACT",[number;case;gender;aspect;negation] -> PACT(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation)
  243 + | "INF",[aspect;negation] -> INF(parse_aspect aspect,parse_negation negation)
244 244 | "QUB",[] -> QUB
245   - | "COMPAR",[] -> COMPAR
  245 + | "COMPAR",[] -> COMPAR Str
246 246 | "COMP",[ctype] -> COMP(parse_ctype ctype)
247   - | "PERS",[negation;refl] -> PERS(parse_negation negation,parse_refl refl)
  247 + | "PERS",[negation] -> PERS(parse_negation negation)
248 248 | s,ll -> failwith ("parse_pos: " ^ s ^ "(" ^ String.concat "," (Xlist.map ll string_of_token_list) ^ ")")
249 249  
250 250 let rec parse_phrase = function
... ... @@ -253,7 +253,7 @@ let rec parse_phrase = function
253 253 | "adjp",[case] -> AdjP(parse_case case)
254 254 | "prepadjp",[[Text prep]; case] -> PrepAdjP(prep,parse_case case)
255 255 | "comprepnp",[[Text prep]] -> ComprepNP prep
256   - | "comparp",[[Text prep]] -> ComparP prep
  256 + | "comparp",[[Text prep]] -> ComparP(prep,Str)
257 257 | "cp",[ctype;comp] -> CP(parse_ctype ctype,parse_comp comp)
258 258 | "ncp",[case;ctype;comp] -> NCP(parse_case case,parse_ctype ctype,parse_comp comp)
259 259 | "prepncp",[[Text prep];case;ctype;comp] -> PrepNCP(prep,parse_case case,parse_ctype ctype,parse_comp comp)
... ...
lexSemantics/ENIAMwalReduce.ml
1 1 (*
2   - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty".
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  2 + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
5 5 *
6 6 * This library is free software: you can redistribute it and/or modify
7 7 * it under the terms of the GNU Lesser General Public License as published by
... ... @@ -24,7 +24,7 @@ let create_phrase_reqs s (reqs,noreqs) = function
24 24 | PrepNP(prep,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
25 25 | PrepAdjP(prep,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
26 26 | PrepNCP(prep,_,_,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
27   - | ComparP(prep) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
  27 + | ComparP(prep,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
28 28 | FixedP(prep) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
29 29 | SimpleLexArg(lex,_) -> StringMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs
30 30 | LexArg(_,lex,_) -> StringMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs
... ... @@ -35,7 +35,7 @@ let create_phrase_reqs2 s (reqs,noreqs) = function
35 35 | PrepNP(prep,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
36 36 | PrepAdjP(prep,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
37 37 | PrepNCP(prep,_,_,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
38   - | ComparP(prep) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
  38 + | ComparP(prep,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
39 39 | FixedP(prep) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs
40 40 | SimpleLexArg(lex,_) -> IntMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs
41 41 | LexArg(_,lex,_) -> IntMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs
... ... @@ -105,7 +105,7 @@ let reduce_phrase (test_comprep_reqs,test_comprep_reqs2,test_lexarg_reqs,test_le
105 105 | PrepNP(prep,case) as phrase -> if test_lexemes prep then phrase else raise Not_found
106 106 | PrepAdjP(prep,case) as phrase -> if test_lexemes prep then phrase else raise Not_found
107 107 | ComprepNP(prep) as phrase -> if test_comprep_reqs prep && test_comprep_reqs2 prep then phrase else raise Not_found
108   - | ComparP(prep) as phrase -> if test_lexemes prep then phrase else raise Not_found
  108 + | ComparP(prep,case) as phrase -> if test_lexemes prep then phrase else raise Not_found
109 109 | CP(ctype,comp) -> CP(ctype,reduce_comp test_lexemes comp)
110 110 | NCP(case,ctype,comp) -> if test_lexemes "to" then NCP(case,ctype,reduce_comp test_lexemes comp) else raise Not_found
111 111 | PrepNCP(prep,case,ctype,comp) -> if test_lexemes prep && test_lexemes "to" then PrepNCP(prep,case,ctype,reduce_comp test_lexemes comp) else raise Not_found
... ... @@ -204,7 +204,10 @@ let entries,schemata,connected =
204 204 - usunięcie adjunctów
205 205 - uwzględnienie cech morfoskładniowych
206 206 - scalenie schematów
207   - - dodanie adjunctów
  207 + - dodanie adjunctów - pamiętać o padvp
  208 +*)
  209 +(* TODO
  210 + - leksykalizacje bez schema
208 211 *)
209 212  
210 213 (* let _ =
... ...
lexSemantics/ENIAMwalStringOf.ml
1 1 (*
2   - * ENIAMwalenty, a converter for Polish Valence Dictionary "Walenty".
  2 + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
3 3 * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4 4 * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
5 5 *
... ... @@ -55,11 +55,11 @@ let case = function
55 55 | Part -> "part"
56 56 | CaseAgr -> "agr"
57 57 (* | CaseUAgr -> "uagr"
58   - | NomAgr -> "nomagr"
59   - | GenAgr -> "genagr"
60   - | AllAgr -> "allagr"
61 58 | AllUAgr -> "alluagr" *)
62 59 | CaseUndef -> "_"
  60 + | AllAgr -> "allagr"
  61 + | NomAgr -> "nomagr"
  62 + | GenAgr -> "genagr"
63 63  
64 64 let rec comp = function
65 65 Comp s -> s
... ... @@ -87,15 +87,15 @@ let grad = function
87 87 Grad s -> s
88 88 | GradUndef -> "_"
89 89  
90   -let refl = function
  90 +(* let refl = function
91 91 (* ReflEmpty -> "" *)
92 92 | ReflTrue -> "się"
93 93 | ReflFalse -> "nosię"
94   - | ReflUndef -> "_"
  94 + | ReflUndef -> "_" *)
95 95  
96   -let acm = function
  96 +(* let acm = function
97 97 Acm s -> s
98   - | AcmUndef -> "_"
  98 + | AcmUndef -> "_" *)
99 99  
100 100 let gf = function
101 101 SUBJ -> "subj"
... ... @@ -108,17 +108,17 @@ let pos = function
108 108 | PPRON3(n,c) -> "PPRON3(" ^ number n ^ "," ^ case c ^ ")"
109 109 | SIEBIE(c) -> "SIEBIE(" ^ case c ^ ")"
110 110 | PREP(c) -> "PREP(" ^ case c ^ ")"
111   - | NUM(c,g,a) -> "NUM(" ^ case c ^ "," ^ gender g ^ "," ^ acm a ^ ")"
  111 + | NUM(c,g) -> "NUM(" ^ case c ^ "," ^ gender g ^ ")"
112 112 | ADJ(n,c,g,gr) -> "ADJ(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ grad gr ^ ")"
113 113 | ADV(gr) -> "ADV(" ^ grad gr ^ ")"
114   - | GER(n,c,g,a,neg,r) -> "GER(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ "," ^ refl r ^ ")"
115   - | PACT(n,c,g,a,neg,r) -> "PACT(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ "," ^ refl r ^ ")"
  114 + | GER(n,c,g,a,neg) -> "GER(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ ")"
  115 + | PACT(n,c,g,a,neg) -> "PACT(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ ")"
116 116 | PPAS(n,c,g,a,neg) -> "PPAS(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ ")"
117   - | INF(a,n,r) -> "INF(" ^ aspect a ^ "," ^ negation n ^ "," ^ refl r ^ ")"
  117 + | INF(a,n) -> "INF(" ^ aspect a ^ "," ^ negation n ^ ")"
118 118 | QUB -> "QUB"
119   - | COMPAR -> "COMPAR"
  119 + | COMPAR c -> "COMPAR(" ^ case c ^ ")"
120 120 | COMP(c) -> "COMP(" ^ comp_type c ^ ")"
121   - | PERS(n,r) -> "PERS(" ^ negation n ^ "," ^ refl r ^ ")"
  121 + | PERS(n) -> "PERS(" ^ negation n ^ ")"
122 122 | FIXED -> "FIXED"
123 123  
124 124 let rec phrase = function
... ... @@ -129,7 +129,7 @@ let rec phrase = function
129 129 (* | NumP(c) -> "nump(" ^ case c ^ ")"
130 130 | PrepNumP(prep,c) -> "prepnump(" ^ prep ^ "," ^ case c ^ ")" *)
131 131 | ComprepNP(prep) -> "comprepnp(" ^ prep ^ ")"
132   - | ComparP(prep) -> "comparp(" ^ prep ^ ")"
  132 + | ComparP(prep,c) -> "comparp(" ^ prep ^ "," ^ case c ^ ")"
133 133 | CP(ct,co) -> "cp(" ^ comp_type ct ^ "," ^ comp co ^ ")"
134 134 | NCP(c,ct,co) -> "ncp(" ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")"
135 135 | PrepNCP(prep,c,ct,co) -> "prepncp(" ^ prep ^ "," ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")"
... ...
lexSemantics/ENIAMwalTypes.ml
1 1 (*
2   - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty".
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  2 + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
5 5 *
6 6 * This library is free software: you can redistribute it and/or modify
7 7 * it under the terms of the GNU Lesser General Public License as published by
... ... @@ -24,14 +24,14 @@ type opinion = Pewny | Potoczny | Watpliwy | Archaiczny | Zly | Wulgarny | Nieok
24 24 type negation = Negation | Aff | NegationUndef (*| NegationNA*)
25 25 type pred = PredTrue | PredFalse | PredUndef (*| PredNA*)
26 26 type aspect = Aspect of string | AspectUndef (*| AspectNA*)
27   -type case = Case of string | Str | Part | CaseAgr (*| NomAgr | GenAgr | AllAgr*) | CaseUndef (*| AllUAgr | CaseUAgr*)
  27 +type case = Case of string | Str | Part | CaseAgr | CaseUndef (*| AllUAgr | CaseUAgr*) | GenAgr | NomAgr | AllAgr
28 28 type comp = Comp of string | Zeby | Gdy | CompUndef
29 29 type comp_type = Int | Rel | CompTypeUndef (*| CompTypeAgr*)
30 30 type number = Number of string | NumberUndef | NumberAgr
31 31 type gender = Gender of string | GenderUndef | GenderAgr | Genders of string list
32 32 type grad = Grad of string | GradUndef
33   -type refl = (*ReflEmpty |*) ReflTrue | ReflFalse | ReflUndef
34   -type acm = Acm of string | AcmUndef
  33 +(* type refl = (*ReflEmpty |*) ReflTrue | ReflFalse | ReflUndef *)
  34 +(* type acm = Acm of string | AcmUndef *)
35 35  
36 36 (*type mood = (*Mood of*) string (*| MoodUndef*)
37 37 type tense = string
... ... @@ -47,17 +47,17 @@ type pos =
47 47 | PPRON3 of number * case
48 48 | SIEBIE of case
49 49 | PREP of case
50   - | NUM of case * gender * acm
  50 + | NUM of case * gender
51 51 | ADJ of number * case * gender * grad
52 52 | ADV of grad
53   - | GER of number * case * gender * aspect * negation * refl
54   - | PACT of number * case * gender * aspect * negation * refl
  53 + | GER of number * case * gender * aspect * negation
  54 + | PACT of number * case * gender * aspect * negation
55 55 | PPAS of number * case * gender * aspect * negation
56   - | INF of aspect * negation * refl
  56 + | INF of aspect * negation
57 57 | QUB
58   - | COMPAR
  58 + | COMPAR of case
59 59 | COMP of comp_type
60   - | PERS of (*number * gender * aspect * person * *)negation * refl
  60 + | PERS of (*number * gender * aspect * person * *)negation
61 61 | FIXED
62 62  
63 63 type phrase =
... ... @@ -68,7 +68,7 @@ type phrase =
68 68 (* | NumP of case
69 69 | PrepNumP of string * case *)
70 70 | ComprepNP of string
71   - | ComparP of string (** case*)
  71 + | ComparP of string * case
72 72 | CP of comp_type * comp
73 73 | NCP of case * comp_type * comp
74 74 | PrepNCP of string * case * comp_type * comp
... ... @@ -128,8 +128,8 @@ let empty_meaning = {mng_id = (-1);
128 128 | GerAtrs of meaning list * string * negation * aspect
129 129 | NonPersAtrs of meaning list * string * string * string * negation * aspect *)
130 130  
131   -type schema = {sch_id: int; opinion: opinion; reflexiveMark: refl; aspect: aspect;
132   - negativity: negation; predicativity: pred; positions: position list; text_rep: string}
  131 +(* type schema = {sch_id: int; opinion: opinion; reflexiveMark: refl; aspect: aspect;
  132 + negativity: negation; predicativity: pred; positions: position list; text_rep: string} *)
133 133  
134 134 type lex_entry =
135 135 (* Frame of frame_atrs * position list *)
... ...
lexSemantics/ENIAMwalenty.ml
1 1 (*
2   - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty".
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  2 + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
5 5 *
6 6 * This library is free software: you can redistribute it and/or modify
7 7 * it under the terms of the GNU Lesser General Public License as published by
... ...
lexSemantics/README
... ... @@ -2,8 +2,7 @@ ENIAMsemValence Version 1.0 :
2 2 -----------------------
3 3  
4 4 ENIAMsemValence is a library that assigns tokens with lexicosemantic information.
5   -It recognizes named entities and assigns thematic roles,
6   -senses, valence and other semantic information to tokens.
  5 +It assigns thematic roles, word senses, valence and other semantic information to tokens.
7 6  
8 7 Install
9 8 -------
... ...
lexSemantics/TODO
... ... @@ -3,3 +3,56 @@
3 3 - sprawdzić czy walencja nazw własnych jest dobrze zrobiona.
4 4 - trzeba zrobić słownik nazw własnych
5 5 - trzeba poprawić selekcję preferencji selecyjnych: jeśli podrzędnikiem jest zaimek nie muszą jawnie występować wśród sensów.
  6 +
  7 +- błędy w realizacjach
  8 +xp(abl[prepadjp(z,gen)]
  9 +na korzyść - na niekorzyść
  10 +xp(mod[prepnp(jako,str)]) -> xp(mod[compar(jako)])
  11 +xp(mod[prepadjp(jako,str)]) -> xp(mod[compar(jako)])
  12 +na sposób - zgłoszone
  13 +
  14 +cyrk advp(misc) [54480]
  15 +banalnie - pred
  16 +subst woda lex(880,woda,subst) {lex(święcony,ADJ(agr,gen,agr,pos))}
  17 +
  18 +uciąć: pewny: _: : perf: subj{np(str)} + obj{lex(np(str),sg,'gałąź',ratr1({lex(cp(rel[który]),aff,'siedzieć',,ratr(subj{np(str)}+{lex(adjp(agr),agr,agr,pos,'sam',natr)}+{lex(prepadjp(na,loc),sg,f,pos,'który',natr)}))}))}
  19 +
  20 +np(str) -> adjp(agr)
  21 +przychodzić: pewny: _: : imperf: subj{lex(np(str),sg,'godzina',ratr1({lex(np(str),agr,'zły',natr)}))} + {prepnp(na,acc)}
  22 +
  23 +Czym jest podmiot podniesiony w poniższych ramach i jak się zachowuje w stronie biernej?
  24 +obladzać: pewny: _: : imperf: subj{E} + obj{np(str)} + {np(dat)}
  25 +oblodzić: pewny: _: : perf: subj{E} + obj{np(str)} + {np(dat)}
  26 +
  27 +Czy 'zły' nie powinien być tu przymiotnikiem?
  28 +przychodzić: pewny: _: : imperf: subj{lex(np(str),sg,'godzina',ratr1({lex(np(str),agr,'zły',natr)}))} + {prepnp(na,acc)}
  29 +przyjść: pewny: _: : perf: subj{lex(np(str),sg,'godzina',ratr1({lex(np(str),agr,'zły',natr)}))} + {prepnp(na,acc)}
  30 +
  31 +Czy zamiast adjp(str) -> adjp(agr)
  32 +kreślić: pewny: _: : imperf: subj{np(str)} + obj{np(str)} + {lex(np(inst),pl,XOR('barwa','kolor'),ratr({adjp(str)}))}
  33 +malować: pewny: _: : imperf: subj{np(str)} + obj{np(str)} + {lex(np(inst),pl,XOR('barwa','kolor'),ratr({adjp(str)}))}
  34 +namalować: pewny: _: : perf: subj{np(str)} + obj{np(str)} + {lex(np(inst),pl,XOR('barwa','kolor'),ratr({adjp(str)}))}
  35 +odmalować: pewny: _: : perf: subj{np(str)} + obj{np(str)} + {lex(np(inst),pl,XOR('barwa','kolor'),ratr({adjp(str)}))}
  36 +odmalowywać: pewny: _: : imperf: subj{np(str)} + obj{np(str)} + {lex(np(inst),pl,XOR('barwa','kolor'),ratr({adjp(str)}))}
  37 +zajrzeć: pewny: _: : perf: subj{lex(np(str),sg,XOR('bieda','głód','nędza','śmierć'),atr({adjp(str)}))} + {np(dat)} + {lex(prepnp(w,acc),pl,'oko',natr)}
  38 +chwycić: pewny: _: : perf: subj{np(str)} + {lex(np(inst),pl,'usta',atr({adjp(str)}))} + {lex(np(str),sg,'powietrze',atr({adjp(agr)}))}
  39 +chwytać: pewny: _: : imperf: subj{np(str)} + {lex(np(inst),pl,'usta',atr({adjp(str)}))} + {lex(np(str),sg,'powietrze',atr({adjp(agr)}))}
  40 +łapać: pewny: _: : imperf: subj{np(str)} + obj{lex(np(str),sg,'powietrze',atr({adjp(agr)}))} + {lex(np(inst),pl,'usta',atr({adjp(str)}))}
  41 +złapać: pewny: _: : perf: subj{np(str)} + obj{lex(np(str),sg,'powietrze',atr({adjp(agr)}))} + {lex(np(inst),pl,'usta',atr({adjp(str)}))}
  42 +
  43 +
  44 +Jak się zachowuje podmiot zdaniowy w stronie biernej? - o to już pytałem
  45 +podciąć: pewny: _: : perf: subj{cp(gdy)} + obj{lex(np(str),pl,'skrzydło',natr)} + {np(dat)}
  46 +podciąć: pewny: _: : perf: subj{cp(jak)} + obj{lex(np(str),pl,'skrzydło',natr)} + {np(dat)}
  47 +podciąć: pewny: _: : perf: subj{cp(kiedy)} + obj{lex(np(str),pl,'skrzydło',natr)} + {np(dat)}
  48 +podcinać: pewny: _: : imperf: subj{cp(gdy)} + obj{lex(np(str),pl,'skrzydło',natr)} + {np(dat)}
  49 +podcinać: pewny: _: : imperf: subj{cp(jak)} + obj{lex(np(str),pl,'skrzydło',natr)} + {np(dat)}
  50 +podcinać: pewny: _: : imperf: subj{cp(kiedy)} + obj{lex(np(str),pl,'skrzydło',natr)} + {np(dat)}
  51 +sprawiać: pewny: _: : imperf: subj{np(str);cp(że);ncp(str,że)} + obj{np(str)} + {np(dat)}
  52 +sprawić: pewny: _: : perf: subj{np(str);cp(że);ncp(str,że)} + obj{np(str)} + {np(dat)}
  53 +zaskakiwać: pewny: _: : imperf: subj{np(str);cp(int);cp(że);ncp(str,int);ncp(str,że)} + obj{np(str)}
  54 +zaskoczyć: pewny: _: : perf: subj{np(str);cp(int);cp(że);ncp(str,int);ncp(str,że)} + obj{np(str)}
  55 +zwracać: pewny: _: : imperf: subj{cp(int)} + obj{lex(np(str),sg,'uwaga',atr({possp}))}
  56 +zwracać: pewny: _: : imperf: subj{cp(że)} + obj{lex(np(str),sg,'uwaga',atr({possp}))}
  57 +zwrócić: pewny: _: : perf: subj{cp(int)} + obj{lex(np(str),sg,'uwaga',atr({possp}))}
  58 +zwrócić: pewny: _: : perf: subj{cp(że)} + obj{lex(np(str),sg,'uwaga',atr({possp}))}
... ...
lexSemantics/entries.ml
1 1 (*
2   - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty".
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  2 + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
5 5 *
6 6 * This library is free software: you can redistribute it and/or modify
7 7 * it under the terms of the GNU Lesser General Public License as published by
... ...
lexSemantics/makefile
... ... @@ -3,7 +3,7 @@ OCAMLOPT=ocamlopt
3 3 OCAMLDEP=ocamldep
4 4 INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam
5 5 OCAMLFLAGS=$(INCLUDES) -g
6   -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa #eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-plWordnet.cmxa eniam-lcg-parser.cmxa #eniam-lcg-grammar-pl.cmxa #eniam-lexSemantics.cmxa
  6 +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa #eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-plWordnet.cmxa #eniam-lexSemantics.cmxa
7 7 INSTALLDIR=`ocamlc -where`/eniam
8 8  
9 9 SOURCES= ENIAMlexSemanticsTypes.ml ENIAMcategories.ml ENIAMlexSemanticsData.ml ENIAMlexSemantics.ml
... ... @@ -28,8 +28,8 @@ eniam-lexSemantics.cmxa: $(SOURCES)
28 28  
29 29 # test: test.ml
30 30 # $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml
31   -test: entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml test.ml
32   - $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml test.ml
  31 +test: entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml test.ml
  32 + $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml test.ml
33 33  
34 34  
35 35 .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx
... ...
lexSemantics/test.ml
1 1 (*
2 2 * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
5 5 *
6 6 * This library is free software: you can redistribute it and/or modify
7 7 * it under the terms of the GNU Lesser General Public License as published by
... ...
lexSemantics/test2.ml
1 1 (*
2   - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty".
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  2 + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
5 5 *
6 6 * This library is free software: you can redistribute it and/or modify
7 7 * it under the terms of the GNU Lesser General Public License as published by
... ...
subsyntax/README
... ... @@ -4,6 +4,7 @@ ENIAMsubsyntax Version 1.1 :
4 4 ENIAMsubsyntax is a library that
5 5 - performs tokenization, lemmatization, part of speech tagging;
6 6 - detects MWE and abbreviations;
  7 +- recognizes named entities;
7 8 - splits text into sentences.
8 9  
9 10 Install
... ...
walenty/ENIAMwalConnect.ml
... ... @@ -72,7 +72,7 @@ let connect entry =
72 72 let phrases = process_morfs position.morfs in
73 73 let morfs = Xlist.fold phrase_ids [] (fun morfs phrase_id ->
74 74 try IntMap.find phrases phrase_id :: morfs
75   - with Not_found -> Printf.printf "%s\n%!" entry.form_orth;morfs) in
  75 + with Not_found -> if entry.form_orth <> "podobać" then Printf.printf "connect: %s\n%!" entry.form_orth;morfs) in
76 76 {position with role=arg.role; role_attr=arg.role_attribute; sel_prefs=sel_prefs;
77 77 morfs=List.rev morfs} :: conn_positions)) in
78 78 (* let meanings = List.rev (Xlist.rev_map frame.meanings (fun id ->
... ...
walenty/ENIAMwalGenerate.ml
... ... @@ -20,8 +20,19 @@
20 20 open ENIAMwalTypes
21 21 open Xstd
22 22  
  23 +let correct_walenty entry =
  24 + if entry.form_orth = "podobać" then
  25 + {entry with schemata=Xlist.map entry.schemata (fun s ->
  26 + {s with positions=Xlist.map s.positions (fun p ->
  27 + if p.gf=SUBJ then {p with morfs=List.flatten (Xlist.map p.morfs (function
  28 + MorfId 126 -> []
  29 + | m -> [m]))}
  30 + else p)})}
  31 + else entry
  32 +
23 33 let load_walenty walenty_filename expands_filename =
24 34 let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in
  35 + let walenty = Xlist.rev_map walenty correct_walenty in
25 36 let expands = ENIAMwalTEI.load_expands expands_filename in
26 37 let meanings =
27 38 Xlist.fold walenty IntMap.empty (fun meanings entry ->
... ...
walenty/ENIAMwalLex.ml
... ... @@ -41,6 +41,39 @@ let rec split_elexeme = function
41 41 genders,[ORcoord(List.rev l)]
42 42 | Elexeme gender -> [gender],[]
43 43  
  44 +let rec get_lexemes = function
  45 + Lexeme s -> [s]
  46 + | ORconcat l -> List.flatten (Xlist.map l get_lexemes)
  47 + | ORcoord l -> List.flatten (Xlist.map l get_lexemes)
  48 + | XOR l -> List.flatten (Xlist.map l get_lexemes)
  49 + | Elexeme gender -> failwith "get_lexemes"
  50 +
  51 +let rec remove_list set = function
  52 + [] -> []
  53 + | s :: l -> if Xlist.mem set s then remove_list set l else s :: (remove_list set l)
  54 +
  55 +let rec check_lexemes_morfs l = function
  56 + LexPhrase(lexs,(_,schema)) ->
  57 + let l = Xlist.fold lexs l (fun l (_,lex) ->
  58 + remove_list (get_lexemes lex) l) in
  59 + check_lexemes_schema l schema
  60 + | _ -> l
  61 +
  62 +and check_lexemes_schema l schema =
  63 + Xlist.fold schema l (fun l s ->
  64 + Xlist.fold s.morfs l check_lexemes_morfs)
  65 +
  66 +let add_refl_restr (restr,schema) =
  67 + (match restr with
  68 + Natr -> Ratr
  69 + | Atr1 -> Atr
  70 + | Atr -> Atr
  71 + | Ratr1 -> Ratr
  72 + | Ratr -> Ratr
  73 + | Ratrs -> Ratrs
  74 + | NoRestr -> failwith "add_refl_restr"),
  75 + position [LexPhrase([QUB,Lexeme "się"],(Natr,[]))] :: schema
  76 +
44 77 let rec expand_lexicalizations_schema schema =
45 78 Xlist.map schema (fun s ->
46 79 {s with morfs=expand_lexicalizations_morfs s.morfs})
... ... @@ -51,15 +84,7 @@ and expand_lexicalizations_morfs morfs = (* uproszczenie polegające na zezwolen
51 84 LexPhrase(pos_lex,(restr,schema)) -> LexPhrase(pos_lex,(restr,expand_lexicalizations_schema schema))
52 85 | morf -> morf in
53 86 match morf with
54   - (* LexPhrase([ADV _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf] *)
55   - (* | LexPhrase([PREP _,_;SUBST _,_],(_,schema)) -> if remove_trivial_args schema <> [] then print_endline (ENIAMwalStringOf.morf morf); [morf] *)
56   - (* | LexPhrase([PREP _,_;GER _,_],(_,schema)) -> if remove_trivial_args schema <> [] then print_endline (ENIAMwalStringOf.morf morf); [morf] *)
57   - (* | LexPhrase([NUM _,_;_],(_,schema)) -> if remove_trivial_args schema <> [] then print_endline (ENIAMwalStringOf.morf morf); [morf] *)
58   - (* | LexPhrase([PREP _,_;NUM _,_;_],(_,schema)) -> if remove_trivial_args schema <> [] then print_endline (ENIAMwalStringOf.morf morf); [morf] *)
59   - (* | LexPhrase([PREP _,_;ADJ _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf]
60   - | LexPhrase([PREP _,_;PPAS _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf]
61   - | LexPhrase([PREP _,_;PACT _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf] *)
62   - (* | Phrase(PrepNumP(prep,case)) -> [LexPhrase([PREP case,Lexeme prep],(Ratrs,[position(*2*) [Phrase(NumP(case))]]))] *)
  87 +(* | Phrase(PrepNumP(prep,case)) -> [LexPhrase([PREP case,Lexeme prep],(Ratrs,[position(*2*) [Phrase(NumP(case))]]))] *)
63 88 | Phrase(PrepNumP(prep,case)) -> [Phrase(PrepNP(prep,case))] (* FIXME: celowe uproszczenie *)
64 89 | LexPhrase([PREP pcase,plex;SUBST(n,c),slex],(Atr1,[{morfs=[LexPhrase([QUB,_],_)]} as s])) ->
65 90 (* print_endline (ENIAMwalStringOf.morf morf); *)
... ... @@ -74,62 +99,30 @@ and expand_lexicalizations_morfs morfs = (* uproszczenie polegające na zezwolen
74 99 [LexPhrase([PREP pcase,plex],(Ratrs,[position [LexPhrase([SUBST(n,c),slex],(Natr,[]))];s(*{s with dir=Backward}*)]))]
75 100 | LexPhrase([PREP pcase,plex;pos,lex],restr) ->
76 101 [LexPhrase([PREP pcase,plex],(Ratrs,[position [LexPhrase([pos,lex],restr)]]))]
77   - | LexPhrase([PREP pcase,plex;NUM(c,g,a),nlex;pos,lex],restr) ->
  102 + | LexPhrase([PREP pcase,plex;NUM(c,g),nlex;pos,lex],restr) ->
78 103 let genders,lexs = split_elexeme lex in
79 104 Xlist.map genders (fun gender ->
80   - LexPhrase([PREP pcase,plex],(Ratrs,[position [LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[(*num*)position [Phrase Null(*Pro*)]]))]]))) @ (*FIXME*)
  105 + LexPhrase([PREP pcase,plex],(Ratrs,[position [LexPhrase([NUM(c,gender),nlex],(Ratrs,[(*num*)position [Phrase Null(*Pro*)]]))]]))) @ (*FIXME*)
81 106 Xlist.map lexs (fun lex ->
82   - LexPhrase([PREP pcase,plex],(Ratrs,[position [LexPhrase([NUM(c,g,a),nlex],(Ratrs,[(*num*)position [LexPhrase([pos,lex],restr)]]))]])))
83   - | LexPhrase([NUM(c,g,a),nlex;pos,lex],restr) ->
  107 + LexPhrase([PREP pcase,plex],(Ratrs,[position [LexPhrase([NUM(c,g),nlex],(Ratrs,[(*num*)position [LexPhrase([pos,lex],restr)]]))]])))
  108 + | LexPhrase([NUM(c,g),nlex;pos,lex],restr) ->
84 109 let genders,lexs = split_elexeme lex in
85 110 Xlist.map genders (fun gender ->
86   - LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[(*num*)position [Phrase Null(*Pro*)]]))) @
  111 + LexPhrase([NUM(c,gender),nlex],(Ratrs,[(*num*)position [Phrase Null(*Pro*)]]))) @
87 112 Xlist.map lexs (fun lex ->
88   - LexPhrase([NUM(c,g,a),nlex],(Ratrs,[(*num*)position [LexPhrase([pos,lex],restr)]])))
  113 + LexPhrase([NUM(c,g),nlex],(Ratrs,[(*num*)position [LexPhrase([pos,lex],restr)]])))
  114 + | LexPhrase([INF(a,n),lex;QUB,Lexeme "się"],restr) -> [LexPhrase([INF(a,n),lex],add_refl_restr restr)]
  115 + | LexPhrase([COMP ctype,clex;pos,lex;QUB,Lexeme "się"],restr) ->
  116 + if Xlist.size (check_lexemes_schema (get_lexemes clex) (snd restr)) = 0 then
  117 + [LexPhrase([pos,lex],add_refl_restr restr)]
  118 + else [LexPhrase([COMP ctype,clex],(Ratrs,[(*std*)position (*Forward*) [LexPhrase([pos,lex],add_refl_restr restr)]]))]
89 119 | LexPhrase([COMP ctype,clex;pos,lex],restr) ->
90   - [LexPhrase([COMP ctype,clex],(Ratrs,[(*std*)position (*Forward*) [LexPhrase([pos,lex],restr)]]))]
91   - | LexPhrase([SUBST(n,c),slex;COMP ctype,clex;pos,lex],restr) ->
92   - [LexPhrase([SUBST(n,c),slex],(Ratrs,[(*std*)position (*Forward*) [LexPhrase([COMP ctype,clex],(Ratrs,[(*std*)position (*Forward*) [LexPhrase([pos,lex],restr)]]))]]))] (* FIXME: poprawić po zrobieniu NCP *)
  120 + if Xlist.size (check_lexemes_schema (get_lexemes clex) (snd restr)) = 0 then
  121 + [LexPhrase([pos,lex],restr)]
  122 + else [LexPhrase([COMP ctype,clex],(Ratrs,[(*std*)position (*Forward*) [LexPhrase([pos,lex],restr)]]))]
93 123 | LexPhrase(_::_::_,_) -> failwith ("expand_lexicalizations_morfs: " ^ ENIAMwalStringOf.morf morf)
94   - (* | LexPhrase([PREP pcase,plex;SUBST(n,c),slex],(Atr1,[gf,cr,ce,[LexPhrase([QUB,lex],arestr)]])) ->
95   - (* print_endline (ENIAMwalStringOf.morf morf); *)
96   - [LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))]]));
97   - LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))];gf,cr,ce,[LexPhrase([QUB,lex],arestr)]]))]
98   - | LexPhrase([PREP(pcase),plex;SUBST(n,c),slex],(Atr1,[gf,cr,ce,[LexPhrase([ADV gr,lex],arestr)]])) ->
99   - (* print_endline (ENIAMwalStringOf.morf morf); *)
100   - [LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))]]));
101   - LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))];gf,cr,ce,[LexPhrase([ADV gr,lex],arestr)]]))]
102   - | LexPhrase([PREP pcase,plex;SUBST(n,c),slex],(Ratr1,[gf,cr,ce,[LexPhrase([ADV gr,lex],arestr)]])) ->
103   - (* print_endline (ENIAMwalStringOf.morf morf); *)
104   - [LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))];gf,cr,ce,[LexPhrase([ADV gr,lex],arestr)]]))]
105   - | LexPhrase([PREP pcase,plex;pos,lex],restr) ->
106   - [LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))]
107   - | LexPhrase([PREP pcase,plex;NUM(c,g,a),nlex;pos,lex],restr) ->
108   - let genders,lexs = split_elexeme lex in
109   - Xlist.map genders (fun gender ->
110   - LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[("OBJ","QUANT-ARG",["T"]),[],[],[Phrase Pro]]))]]))) @
111   - Xlist.map lexs (fun lex ->
112   - LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([NUM(c,g,a),nlex],(Ratrs,[("OBJ","QUANT-ARG",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))]])))
113   - | LexPhrase([NUM(c,g,a),nlex;pos,lex],restr) ->
114   - let genders,lexs = split_elexeme lex in
115   - Xlist.map genders (fun gender ->
116   - LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[("OBJ","QUANT-ARG",["T"]),[],[],[Phrase Pro]]))) @
117   - Xlist.map lexs (fun lex ->
118   - LexPhrase([NUM(c,g,a),nlex],(Ratrs,[("OBJ","QUANT-ARG",["T"]),[],[],[LexPhrase([pos,lex],restr)]])))
119   - | LexPhrase([COMP ctype,clex;pos,lex],restr) ->
120   - [LexPhrase([COMP ctype,clex],(Ratrs,[("C","",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))]
121   - | LexPhrase([SUBST(n,c),slex;COMP ctype,clex;pos,lex],restr) ->
122   - [LexPhrase([SUBST(n,c),slex],(Ratrs,[("OBJ","",["T"]),[],[],[LexPhrase([COMP ctype,clex],(Ratrs,[("C","",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))]]))]
123   - | LexPhrase(_::_::_,_) -> failwith ("expand_lexicalizations_morfs: " ^ ENIAMwalStringOf.morf morf)*)
124 124 | morf -> [morf]))
125 125  
126   -let rec get_lexemes = function
127   - Lexeme s -> [s]
128   - | ORconcat l -> List.flatten (Xlist.map l get_lexemes)
129   - | ORcoord l -> List.flatten (Xlist.map l get_lexemes)
130   - | XOR l -> List.flatten (Xlist.map l get_lexemes)
131   - | Elexeme gender -> failwith "get_lexemes"
132   -
133 126 let winien = StringSet.of_list ["winien"; "rad"; "powinien"; "nierad"; "niekontent"; "kontent"; "gotów"]
134 127 let pred = StringSet.of_list ["żal"; "śmiech"; "znać"; "wstyd"; "wolno"; "widać"; "wiadomo";
135 128 "warto"; "trzeba"; "trza"; "słychać"; "szkoda"; "strach"; "stać"; "sposób"; "potrzeba"; "pora";
... ... @@ -148,7 +141,11 @@ let get_pos lex = function
148 141 | "się" -> ["qub"]
149 142 | _ -> ["subst"])
150 143 | PREP _ -> ["prep"]
151   - | NUM _ -> ["num"]
  144 + | NUM _ ->
  145 + (try
  146 + let _ = int_of_string lex in
  147 + ["intnum"]
  148 + with _ -> ["num"])
152 149 | ADV _ -> ["adv"]
153 150 | ADJ _ -> ["adj"]
154 151 | GER _ -> ["ger"]
... ...
walenty/ENIAMwalStringOf.ml
... ... @@ -93,9 +93,9 @@ let refl = function
93 93 | ReflFalse -> "nosię"
94 94 | ReflUndef -> "_"
95 95  
96   -let acm = function
  96 +(* let acm = function
97 97 Acm s -> s
98   - | AcmUndef -> "_"
  98 + | AcmUndef -> "_" *)
99 99  
100 100 let gf = function
101 101 SUBJ -> "subj"
... ... @@ -108,17 +108,17 @@ let pos = function
108 108 | PPRON3(n,c) -> "PPRON3(" ^ number n ^ "," ^ case c ^ ")"
109 109 | SIEBIE(c) -> "SIEBIE(" ^ case c ^ ")"
110 110 | PREP(c) -> "PREP(" ^ case c ^ ")"
111   - | NUM(c,g,a) -> "NUM(" ^ case c ^ "," ^ gender g ^ "," ^ acm a ^ ")"
  111 + | NUM(c,g) -> "NUM(" ^ case c ^ "," ^ gender g ^ (*"," ^ acm a ^*) ")"
112 112 | ADJ(n,c,g,gr) -> "ADJ(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ grad gr ^ ")"
113 113 | ADV(gr) -> "ADV(" ^ grad gr ^ ")"
114   - | GER(n,c,g,a,neg,r) -> "GER(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ "," ^ refl r ^ ")"
115   - | PACT(n,c,g,a,neg,r) -> "PACT(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ "," ^ refl r ^ ")"
  114 + | GER(n,c,g,a,neg) -> "GER(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg (*^ "," ^ refl r*) ^ ")"
  115 + | PACT(n,c,g,a,neg) -> "PACT(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg (*^ "," ^ refl r*) ^ ")"
116 116 | PPAS(n,c,g,a,neg) -> "PPAS(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ ")"
117   - | INF(a,n,r) -> "INF(" ^ aspect a ^ "," ^ negation n ^ "," ^ refl r ^ ")"
  117 + | INF(a,n) -> "INF(" ^ aspect a ^ "," ^ negation n (*^ "," ^ refl r*) ^ ")"
118 118 | QUB -> "QUB"
119 119 | COMPAR -> "COMPAR"
120 120 | COMP(c) -> "COMP(" ^ comp_type c ^ ")"
121   - | PERS(n,r) -> "PERS(" ^ negation n ^ "," ^ refl r ^ ")"
  121 + | PERS(n) -> "PERS(" ^ negation n ^ (*"," ^ refl r ^*) ")"
122 122 | FIXED -> "FIXED"
123 123  
124 124 let phrase = function
... ... @@ -136,7 +136,7 @@ let phrase = function
136 136 | InfP(a) -> "infp(" ^ aspect a (*^ req r*) ^ ")"
137 137 | AdvP -> "advp"
138 138 | FixedP s -> "fixed(" ^ s ^ ")"
139   - | Num(c,a) -> "num(" ^ case c ^ "," ^ acm a ^ ")"
  139 + (* | Num(c,a) -> "num(" ^ case c ^ "," ^ acm a ^ ")" *)
140 140 | Or -> "or"
141 141 | Qub -> "qub"
142 142 (* | Pro -> "pro"
... ...
walenty/ENIAMwalTEI.ml
... ... @@ -225,11 +225,14 @@ let process_lex_phrase lemma = function
225 225 | PrepNP(prep,case),number,GenderUndef,GradUndef,NegationUndef,ReflUndef -> [PREP case,Lexeme prep;SUBST(number,case),lemma]
226 226 | AdjP(case),number,gender,grad,NegationUndef,ReflUndef -> [ADJ(number,case,gender,grad),lemma]
227 227 | PrepAdjP(prep,case),number,gender,grad,NegationUndef,ReflUndef -> [PREP case,Lexeme prep;ADJ(number,case,gender,grad),lemma]
228   - | InfP(aspect),NumberUndef,GenderUndef,GradUndef,negation,refl -> [INF(aspect,negation,refl),lemma]
  228 + | InfP(aspect),NumberUndef,GenderUndef,GradUndef,negation,ReflTrue -> [INF(aspect,negation),lemma;QUB,Lexeme "się"]
  229 + | InfP(aspect),NumberUndef,GenderUndef,GradUndef,negation,refl -> [INF(aspect,negation),lemma]
229 230 | PpasP(case),number,gender,GradUndef,negation,ReflUndef -> [PPAS(number,case,gender,AspectUndef,negation),lemma]
230 231 | PrepPpasP(prep,case),number,gender,GradUndef,negation,ReflUndef -> [PREP case,Lexeme prep;PPAS(number,case,gender,AspectUndef,negation),lemma]
231   - | PactP(case),number,gender,GradUndef,negation,refl -> [PACT(number,case,gender,AspectUndef,negation,refl),lemma]
232   - | PrepGerP(prep,case),number,GenderUndef,GradUndef,negation,refl -> [PREP case,Lexeme prep;GER(number,case,GenderUndef,AspectUndef,negation,refl),lemma]
  232 + | PactP(case),number,gender,GradUndef,negation,ReflTrue -> [PACT(number,case,gender,AspectUndef,negation),lemma;QUB,Lexeme "się"]
  233 + | PactP(case),number,gender,GradUndef,negation,refl -> [PACT(number,case,gender,AspectUndef,negation),lemma]
  234 + | PrepGerP(prep,case),number,GenderUndef,GradUndef,negation,ReflTrue -> [PREP case,Lexeme prep;GER(number,case,GenderUndef,AspectUndef,negation),lemma;QUB,Lexeme "się"]
  235 + | PrepGerP(prep,case),number,GenderUndef,GradUndef,negation,refl -> [PREP case,Lexeme prep;GER(number,case,GenderUndef,AspectUndef,negation),lemma]
233 236 | Qub,NumberUndef,GenderUndef,GradUndef,NegationUndef,ReflUndef -> [QUB,lemma]
234 237 | AdvP,NumberUndef,GenderUndef,grad,NegationUndef,ReflUndef -> [ADV grad,lemma]
235 238 | phrase,number,gender,grad,negation,reflex ->
... ... @@ -248,10 +251,14 @@ let rec process_lex lex = function
248 251 | PhraseAbbr(Advp mode,[]),[],lemma,Lexeme "" ->
249 252 let poss = process_lex_phrase lemma (AdvP,lex.lex_number,lex.lex_gender,lex.lex_degree,lex.lex_negation,lex.lex_reflex) in
250 253 LexPhrase(poss,lex.lex_modification)
251   - | Phrase (NumP(case)),[],lemma,num_lemma -> LexPhrase([NUM(case,GenderUndef,AcmUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification)
252   - | Phrase (PrepNumP(prep,case)),[],lemma,num_lemma -> LexPhrase([PREP case,Lexeme prep;NUM(case,GenderUndef,AcmUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification)
253   - | PhraseComp(Cp,(ctype,[Comp comp])),[],lemma,Lexeme "" -> LexPhrase([COMP ctype,Lexeme comp;PERS(lex.lex_negation,lex.lex_reflex),lemma],lex.lex_modification)
254   - | PhraseComp(Cp,(ctype,[Comp comp1;Comp comp2])),[],lemma,Lexeme "" -> LexPhrase([COMP ctype,XOR[Lexeme comp1;Lexeme comp2];PERS(lex.lex_negation,lex.lex_reflex),lemma],lex.lex_modification)
  254 + | Phrase (NumP(case)),[],lemma,num_lemma -> LexPhrase([NUM(case,GenderUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification)
  255 + | Phrase (PrepNumP(prep,case)),[],lemma,num_lemma -> LexPhrase([PREP case,Lexeme prep;NUM(case,GenderUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification)
  256 + | PhraseComp(Cp,(ctype,[Comp comp])),[],lemma,Lexeme "" ->
  257 + if lex.lex_reflex = ReflTrue then LexPhrase([COMP ctype,Lexeme comp;PERS(lex.lex_negation),lemma;QUB,Lexeme "się"],lex.lex_modification)
  258 + else LexPhrase([COMP ctype,Lexeme comp;PERS(lex.lex_negation),lemma],lex.lex_modification)
  259 + | PhraseComp(Cp,(ctype,[Comp comp1;Comp comp2])),[],lemma,Lexeme "" ->
  260 + if lex.lex_reflex = ReflTrue then LexPhrase([COMP ctype,XOR[Lexeme comp1;Lexeme comp2];PERS(lex.lex_negation),lemma;QUB,Lexeme "się"],lex.lex_modification)
  261 + else LexPhrase([COMP ctype,XOR[Lexeme comp1;Lexeme comp2];PERS(lex.lex_negation),lemma],lex.lex_modification)
255 262 | Phrase phrase,[],lemma,Lexeme "" ->
256 263 let poss = process_lex_phrase lemma (phrase,lex.lex_number,lex.lex_gender,lex.lex_degree,lex.lex_negation,lex.lex_reflex) in
257 264 LexPhrase(poss,lex.lex_modification)
... ...
walenty/ENIAMwalTypes.ml
... ... @@ -31,7 +31,7 @@ type number = Number of string | NumberUndef | NumberAgr
31 31 type gender = Gender of string | GenderUndef | GenderAgr | Genders of string list
32 32 type grad = Grad of string | GradUndef
33 33 type refl = ReflEmpty | ReflTrue | ReflFalse | ReflUndef
34   -type acm = Acm of string | AcmUndef
  34 +(* type acm = Acm of string | AcmUndef *)
35 35  
36 36 type gf = SUBJ | OBJ | ARG
37 37  
... ... @@ -41,17 +41,17 @@ type pos =
41 41 | PPRON3 of number * case
42 42 | SIEBIE of case
43 43 | PREP of case
44   - | NUM of case * gender * acm
  44 + | NUM of case * gender (* acm*)
45 45 | ADJ of number * case * gender * grad
46 46 | ADV of grad
47   - | GER of number * case * gender * aspect * negation * refl
48   - | PACT of number * case * gender * aspect * negation * refl
  47 + | GER of number * case * gender * aspect * negation (** refl*)
  48 + | PACT of number * case * gender * aspect * negation (* refl*)
49 49 | PPAS of number * case * gender * aspect * negation
50   - | INF of aspect * negation * refl
  50 + | INF of aspect * negation (* refl*)
51 51 | QUB
52 52 | COMPAR
53 53 | COMP of comp_type
54   - | PERS of (*number * gender * aspect * person * *)negation * refl
  54 + | PERS of (*number * gender * aspect * person * *)negation (* refl*)
55 55 | FIXED
56 56  
57 57 type phrase =
... ... @@ -69,7 +69,7 @@ type phrase =
69 69 | InfP of aspect
70 70 | AdvP
71 71 | FixedP of string
72   - | Num of case * acm
  72 + (* | Num of case (* acm*) *)
73 73 | Or
74 74 | Qub
75 75 (* | Pro
... ...