diff --git a/lexSemantics/.gitignore b/lexSemantics/.gitignore new file mode 100644 index 0000000..9daeafb --- /dev/null +++ b/lexSemantics/.gitignore @@ -0,0 +1 @@ +test diff --git a/lexSemantics/ENIAMlexSemantics.ml b/lexSemantics/ENIAMlexSemantics.ml index e56c4a7..dfe38f3 100644 --- a/lexSemantics/ENIAMlexSemantics.ml +++ b/lexSemantics/ENIAMlexSemantics.ml @@ -1,7 +1,7 @@ (* * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences * * This library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by diff --git a/lexSemantics/ENIAMlexSemanticsData.ml b/lexSemantics/ENIAMlexSemanticsData.ml index 37593b6..3b2d2a9 100644 --- a/lexSemantics/ENIAMlexSemanticsData.ml +++ b/lexSemantics/ENIAMlexSemanticsData.ml @@ -1,7 +1,7 @@ (* * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences * * This library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by diff --git a/lexSemantics/ENIAMlexSemanticsTypes.ml b/lexSemantics/ENIAMlexSemanticsTypes.ml index aa01c73..4f93e4e 100644 --- a/lexSemantics/ENIAMlexSemanticsTypes.ml +++ b/lexSemantics/ENIAMlexSemanticsTypes.ml @@ -1,7 +1,7 @@ (* * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences * * This library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by diff --git a/lexSemantics/ENIAMvalence.ml b/lexSemantics/ENIAMvalence.ml new file mode 100644 index 0000000..8fb851f --- /dev/null +++ b/lexSemantics/ENIAMvalence.ml @@ -0,0 +1,622 @@ +(* + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences + * + * This library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + *) + +open ENIAMwalTypes +open Xstd + +let simplify_pos = function + "subst" -> "noun" + | "depr" -> "noun" + | "psubst" -> "noun" + | "pdepr" -> "noun" + | "adj" -> "adj" + | "adjc" -> "adj" + | "adjp" -> "adj" + | "ger" -> "verb" + | "pact" -> "verb" + | "ppas" -> "verb" + | "fin" -> "verb" + | "bedzie" -> "verb" + | "praet" -> "verb" + | "winien" -> "verb" + | "impt" -> "verb" + | "imps" -> "verb" + | "inf" -> "verb" + | "pcon" -> "verb" + | "pant" -> "verb" + | "pred" -> "verb" + | "ppron12" -> "pron" + | "ppron3" -> "pron" + | "siebie" -> "pron" + | s -> s + +let transform_zeby = function + Aff -> [Comp "że"] + | Negation -> [Comp "że";Comp "żeby"] + | NegationUndef -> [Comp "że";Comp "żeby"] + +let transform_gdy = function + "indicative" -> [Comp "gdy"] + | "imperative" -> [Comp "gdy"] + | "conditional" -> [Comp "gdyby"] + | "gerundial" -> [Comp "gdy"] + | "" -> [Comp "gdy";Comp "gdyby"] + | s -> failwith ("transform_gdy: " ^ s) + +let transform_comp negation mood = function + Comp comp -> [Comp comp] + | Zeby -> transform_zeby negation + | Gdy -> transform_gdy mood + | CompUndef -> [CompUndef] + +let transform_str mood negation = + if mood = "gerundial" then [Case "gen"] else + match negation with + Aff -> [Case "acc"] + | Negation -> [Case "gen"] + | NegationUndef -> [Case "acc";Case "gen"] + +let transform_np_phrase lemma = function + NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)] + | NP(CaseAgr) -> [NP(CaseAgr)(*;NumP(CaseAgr)*)] + | NCP(Case c,ctype,comp) -> [NCP(Case c,ctype,comp)] + | AdjP(Case _) as morf -> [morf] (* tylko 'zagłada adjp(postp)' *) + | AdjP(CaseAgr) -> [AdjP(AllAgr)] + | AdjP(Str) -> [AdjP(AllAgr)] (* chyba błąd walentego, tylko 'barwa', 'bieda', 'głód', 'kolor', 'nędza', 'śmierć', 'usta' *) + | CP(ctype,comp) as morf -> [morf] + | PrepNP _ as morf -> [morf] + | PrepAdjP _ as morf -> [morf] (* to wygląda seryjny błąd w Walentym xp(abl[prepadjp(z,gen)]) *) + | ComprepNP _ as morf -> [morf] + | ComparP _ as morf -> [morf] + | PrepNCP _ as morf -> [morf] + | AdvP as morf -> [morf] (* występuje tylko w lematach: cyrk, trwałość x2, zagłada *) + | FixedP _ as morf -> [morf] + | Or as morf -> [morf] + (* | Pro as morf -> [morf] *) + | Null as morf -> [morf] + | phrase -> print_endline ("transform_np_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase] + +let transform_np_pos lemma = function + | SUBST(_,Case _) as morf -> [morf] + | PPRON3(_,Case _) as morf -> [morf] + | SUBST(_,CaseAgr) as morf -> [morf] + | SUBST(n,Str) -> [ADJ(n,AllAgr,GenderUndef,Grad "pos")] (* błąd walentym: 'zła godzina' *) + | ADJ(_,Case _,_,_) as morf -> [morf] + | ADJ(n,CaseAgr,GenderAgr,gr) -> [ADJ(n,AllAgr,GenderAgr,gr)] + | PACT(n,CaseAgr,g,a,neg) -> [PACT(n,AllAgr,g,a,neg)] + | PPAS(_,Case _,_,_,_) as morf -> [morf] + | PPAS(n,CaseAgr,g,a,neg) -> [PPAS(n,AllAgr,g,a,neg)] + | ADJ(n,Str,g,gr) -> [ADJ(n,AllAgr,g,gr)] + | PPAS(n,Str,g,a,neg) -> [PPAS(n,AllAgr,g,a,neg)] + | PREP(Case _) as morf -> [morf] + | ADV _ as morf -> [morf] (* tu trafiają przysłówkowe realizacje *) + | COMP _ as morf -> [morf] + | QUB as morf -> [morf] + | PERS _ as morf -> [morf] + | pos -> print_endline ("transform_np_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos] + +let transform_adj_phrase lemma = function + NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)] + | NP(Part) -> [NP(Case "gen");NP(Case "acc")(*;NumP(Case "gen");NumP(Case "acc")*)] (* jedno wystąpienie 'krewny' *) + | NCP(Case c,ctype,comp) -> [NCP(Case c,ctype,comp)] + | AdjP(CaseAgr) -> [AdjP(AllAgr)] (* jedno wystąpienie 'cały szczęśliwy', może się przydać podniesienie typu *) + | CP(ctype,comp) as morf -> [morf] + | PrepNP _ as morf -> [morf] + | PrepAdjP _ as morf -> [morf] + | ComprepNP _ as morf -> [morf] + | ComparP _ as morf -> [morf] + | PrepNCP _ as morf -> [morf] + | InfP _ as morf -> [morf] + | AdvP as morf -> [morf] + (* | FixedP _ as morf -> [morf]*) + | Or as morf -> [morf] (* jedno wystąpienie 'jednoznaczny' *) + (* | Pro as morf -> [morf] *) + | Null as morf -> [morf] + | morf -> print_endline ("transform_adj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf] + +let transform_adj_pos lemma = function + | ADJ(n,CaseAgr,g,gr) -> [ADJ(n,AllAgr,g,gr)] + | PREP(Case _) as morf -> [morf] + | ADV _ as morf -> [morf] + | QUB as morf -> [morf] + | morf -> print_endline ("transform_adj_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos morf); [morf] + +let transform_adv_phrase lemma = function + NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)] + | NCP(Case c,ctype,comp) -> [NCP(Case c,ctype,comp)] + | CP(ctype,comp) as morf -> [morf] + | PrepNP _ as morf -> [morf] + | PrepAdjP _ as morf -> [morf] + | ComprepNP _ as morf -> [morf] + | ComparP _ as morf -> [morf] + | PrepNCP _ as morf -> [morf] + | InfP _ as morf -> [morf] + | AdvP as morf -> [morf] +(* | Or as morf -> [morf]*) + (* | Pro as morf -> [morf] *) + | Null as morf -> [morf] +(* | AdjP(CaseAgr) as morf -> [morf]*) + (* | FixedP _ as morf -> [morf]*) + | morf -> print_endline ("transform_adv_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf] + +let transform_adv_pos lemma = function + SUBST(_,Case _) as morf -> [morf] + (* | ADJ(_,CaseAgr,_,_) as morf -> [morf]*) + | COMP _ as morf -> [morf] + | PREP(Case _) as morf -> [morf] + | COMPAR _ as morf -> [morf] + | ADV _ as morf -> [morf] (* tu trafiają przysłówkowe realizacje *) + | morf -> print_endline ("transform_adv_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos morf); [morf] + +let transform_prep_phrase lemma = function + | phrase -> print_endline ("transform_prep_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase] + +let transform_prep_pos lemma = function + | SUBST(_,Case _) as morf -> [morf] + | SIEBIE(Case _) as morf -> [morf] + | PPRON12(_,Case _) as morf -> [morf] + | PPRON3(_,Case _) as morf -> [morf] + | SUBST(n,Str) -> [SUBST(n,CaseAgr)] + | NUM(Case _,_) as morf -> [morf] + | ADJ(_,Case _,_,_) as morf -> [morf] + | GER(_,Case _,_,_,_) as morf -> [morf] + | PPAS(_,Case _,_,_,_) as morf -> [morf] +(* | ADV _ as morf -> [morf] + | QUB as morf -> [morf]*) + | pos -> print_endline ("transform_prep_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos] + +let transform_comprep_phrase lemma = function + NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)] + | NCP(Case c,ctype,comp) -> [NCP(Case c,ctype,comp)] + | PrepNP _ as morf -> [morf] + | PrepNCP _ as morf -> [morf] + | phrase -> print_endline ("transform_comprep_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase] + +let transform_comprep_pos lemma = function + | SUBST(_,Case _) as morf -> [morf] +(* | SUBST(n,Str) -> [SUBST(n,CaseAgr)]*) + | NUM(Case _,_) as morf -> [morf] +(* | ADJ(_,Case _,_,_) as morf -> [morf] + | GER(_,Case _,_,_,_,_) as morf -> [morf] + | PPAS(_,Case _,_,_,_) as morf -> [morf] + | ADV _ as morf -> [morf] + | QUB as morf -> [morf]*) + | pos -> print_endline ("transform_comprep_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos] + +let transform_compar_phrase lemma = function + | NP(Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> NP(Case case)) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: uzgodnić a komparatywem *) + | FixedP _ as morf -> [morf] + | phrase -> print_endline ("transform_compar_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase] + +let transform_compar_pos lemma = function + | SUBST(_,Case _) as morf -> [morf] + | ADJ(_,Case _,_,_) as morf -> [morf] + | PREP(Case _) as morf -> [morf] + | SUBST(Number n,Str) -> [SUBST(Number n,CaseUndef)] + | SUBST(NumberAgr,Str) -> [SUBST(NumberUndef,CaseUndef)] + | SUBST(NumberUndef,Str) -> [SUBST(NumberUndef,CaseUndef)] + | PPAS(_,Case _,_,_,_) as morf -> [morf] + | PPAS(NumberAgr,Str,GenderAgr,a,neg) -> [PPAS(NumberUndef,CaseUndef,GenderUndef,a,neg)] (* FIXME: ta sama sytuacja co w "jako" *) + | PPAS(NumberAgr,CaseAgr,GenderAgr,a,neg) -> [PPAS(NumberUndef,CaseUndef,GenderUndef,a,neg)] (* FIXME: ta sama sytuacja co w "jako" *) + | ADJ(NumberAgr,Str,GenderAgr,gr) -> [ADJ(NumberUndef,CaseUndef,GenderUndef,gr)] (* FIXME: ta sama sytuacja co w "jako" *) + | ADJ(NumberAgr,CaseAgr,GenderAgr,gr) -> [ADJ(NumberUndef,CaseUndef,GenderUndef,gr)] (* FIXME: ta sama sytuacja co w "jako" *) + | NUM(Case _,_) as morf -> [morf] + | pos -> print_endline ("transform_compar_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos] + +let transform_comp_phrase lemma = function + | phrase -> print_endline ("transform_comp_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase] + +let transform_comp_pos lemma = function + | PERS _ as morf -> [morf] + | pos -> print_endline ("transform_comp_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos] + +let transform_qub_phrase lemma = function + | phrase -> print_endline ("transform_qub_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase] + +let transform_qub_pos lemma = function + | QUB as morf -> [morf] + | pos -> print_endline ("transform_qub_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos] + +let transform_siebie_phrase lemma = function + | phrase -> print_endline ("transform_siebie_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase] + +let transform_siebie_pos lemma = function + | ADJ(NumberAgr,CaseAgr,GenderAgr,gr) -> [ADJ(NumberAgr,AllAgr,GenderAgr,gr)] + | pos -> print_endline ("transform_siebie_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos] + +let transform_pers_subj_phrase lemma negation mood = function (* FIXME: prepnp(na,loc) *) + | NP(Str) -> [NP(NomAgr)(*;NumP(NomAgr)*)] + | NP(Part) -> [NP(Case "gen");NP(Case "acc")(*;NumP(Case "gen");NumP(Case "acc")*)] (* tylko w 'nalewać', 'nalać', 'ponalewać', 'najechać','uzbierać' *) + | NCP(Str,ctype,comp) -> [NCP(NomAgr,ctype,comp)] + | CP(ctype,comp) as morf -> [morf] + | InfP _ as morf -> [morf] + | Or as morf -> [morf] + (* | Pro -> [ProNG] *) + | Null -> [Null] + | morf -> print_endline ("transform_pers_subj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf] + +let transform_pers_subj_pos lemma negation mood = function + (* COMP _ as morf -> [morf]*) + | SUBST(n,Str) -> [SUBST(n,NomAgr)] + | SUBST(n,Case "nom") -> [SUBST(n,NomAgr)] (* wygląda na błąd Walentego, ale nie ma znaczenia *) + | NUM(Str,g) -> [NUM(NomAgr,g)] + | NUM(Case "nom",g) -> [NUM(NomAgr,g)] +(* | ADJ(n,Str,g,gr) -> [ADJ(n,NomAgr,g,gr)]*) + | morf -> print_endline ("transform_pers_subj_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos morf); [morf] + +let transform_ger_subj_phrase lemma negation mood = function + | NP(Str) -> [NP(Case "gen");PrepNP("przez",Case "acc")(*;NumP(Case "gen")*)(*;PrepNumP("przez",Case "acc")*)] (* FIXME: czy przez:acc jest możliwe? *) + | NP(Part) -> [NP(Case "gen")(*;NP(Case "acc")*)(*;NumP(Case "gen");NumP(Case "acc")*)] + | NCP(Str,ctype,comp) -> [NCP(Case "gen",ctype,comp);PrepNCP("przez",Case "acc",ctype,comp)] (* FIXME: czy przez:acc jest możliwe? *) + | CP(ctype,comp) as morf -> [morf] + | InfP _ as morf -> [morf] (* FIXME: czy to jest możliwe? *) + | Or as morf -> [morf] + (* | Pro -> if control then [Pro] else [Null] *) + | Null -> [Null] + | morf -> print_endline ("transform_ger_subj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf] + +let transform_ger_subj_pos lemma negation mood = function (* FIXME: ADV(_) *) + (* COMP _ as morf -> [morf] (* FIXME: czy to jest możliwe? *)*) + | SUBST(n,Str) -> [SUBST(n,Case "gen")] + | SUBST(n,Case "nom") -> [SUBST(n,Case "gen")] (* wygląda na błąd Walentego, ale nie ma znaczenia *) + | NUM(Str,g) -> [NUM(Case "gen",g)] + | NUM(Case "nom",g) -> [NUM(Case "gen",g)] +(* | ADJ(n,Str,g,gr) -> [ADJ(n,Case "gen",g,gr)]*) + | morf -> print_endline ("transform_pers_subj_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos morf); [morf] + +let transform_ppas_subj_phrase lemma negation mood control = function + | NP(Str) -> [PrepNP("przez",Case "acc")(*;PrepNumP("przez",Case "acc")*)] + | NCP(Str,ctype,comp) -> [PrepNCP("przez",Case "acc",ctype,comp)] + | CP(ctype,comp) as morf -> [morf] + (* | Pro -> if control then [Pro] else [Null] *) + | morf -> print_endline ("transform_ppas_subj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf] + +let transform_pers_phrase lemma negation mood = function + | NP(Str) -> List.flatten (Xlist.map (transform_str mood negation) (fun case -> [NP case(*;NumP(case)*)])) + | NP(Part) -> [NP(Case "gen")] @ (if mood = "gerundial" then [] else [NP(Case "acc")(*;NumP(Case "gen");NumP(Case "acc")*)]) + | NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)] + | NCP(Str,ctype,comp) -> List.flatten (Xlist.map (transform_str mood negation) (fun case -> [NCP(case,ctype,comp)])) + | NCP(Part,ctype,comp) -> List.flatten (Xlist.map (transform_str mood negation) (fun case -> [NCP(case,ctype,comp)])) + | NCP(Case case,ctype,comp) -> [NCP(Case case,ctype,comp)] + | AdjP(Str) -> Xlist.map (transform_str mood negation) (fun case -> AdjP case) (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *) + | AdjP CaseAgr as morf -> if mood = "gerundial" then [AdjP AllAgr] else (print_endline ("transform_pers_phrase2: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf]) + | AdjP(Case _) as morf -> [morf] (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *) + | CP(ctype,comp) as morf -> [morf] + | PrepNP _ as morf -> [morf] + | PrepAdjP _ as morf -> [morf] (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *) + | ComprepNP _ as morf -> [morf] + | ComparP _ as morf -> [morf] + | PrepNCP _ as morf -> [morf] + | InfP _ as morf -> [morf] + | AdvP -> if mood = "gerundial" then [AdjP AllAgr] else [AdvP] + | FixedP _ as morf -> [morf] + | Or as morf -> [morf] + (* | Pro as morf -> [morf] *) + | Null as morf -> [morf] + | morf -> print_endline ("transform_pers_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf] + +let transform_pers_pos lemma negation mood = function + | SUBST(n,Str) -> Xlist.map (transform_str mood negation) (fun case -> SUBST(n,case)) + | PPRON12(n,Str) -> Xlist.map (transform_str mood negation) (fun case -> PPRON12(n,case)) + | PPRON3(n,Str) -> Xlist.map (transform_str mood negation) (fun case -> PPRON3(n,case)) + | SIEBIE(Str) -> Xlist.map (transform_str mood negation) (fun case -> SIEBIE(case)) + | NUM(Str,g) -> Xlist.map (transform_str mood negation) (fun case -> NUM(case,g)) + | ADJ(n,Str,g,gr) -> Xlist.map (transform_str mood negation) (fun case -> ADJ(n,case,g,gr)) +(* | PPAS(n,Str,g,a,neg) -> Xlist.map (transform_str negation) (fun case -> PPAS(n,Str,g,a,neg))*) + | SUBST(n,Part) -> [SUBST(n,Case "gen");SUBST(n,Case "acc")] + | ADJ(n,Part,g,gr) -> [ADJ(n,Case "gen",g,gr);ADJ(n,Case "acc",g,gr)] + | ADJ(n,CaseAgr,g,gr) as morf -> if lemma = "siedzieć" then [morf] else (print_endline ("transform_pers_pos2: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos morf); [morf]) (* FIXME *) + | SUBST(_,Case _) as morf -> [morf] + | PPRON12(_,Case _) as morf -> [morf] + | PPRON3(_,Case _) as morf -> [morf] + | SIEBIE(Case _) as morf -> [morf] + | NUM(Case _,_) as morf -> [morf] + | PREP _ as morf -> [morf] + | ADJ(_,Case _,_,_) as morf -> [morf] + | PPAS(_,Case _,_,_,_) as morf -> [morf] +(* | SUBST(n,CaseAgr) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> SUBST(n,Case case)) (* FIXME: sprawdzić kto kontroluje! *) + | ADJ(n,CaseAgr,g,gr) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> ADJ(n,Case case,g,gr)) (* FIXME: sprawdzić kto kontroluje! *)*) + | COMPAR _ as morf -> [morf] + | COMP _ as morf -> [morf] + | INF _ as morf -> [morf] + | QUB as morf -> [morf] + | ADV grad -> (*if mood = "gerundial" then [ADJ(NumberAgr,AllAgr,GenderAgr,grad)] else*) [ADV grad] (* FIXME: to nie poprawi lematu *) + | PERS _ as morf -> [morf] + | morf -> print_endline ("transform_pers_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos morf); [morf] + +let rec transform_comps negation mood = function + | CP(ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> CP(ctype,comp)) + | NCP(case,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> NCP(case,ctype,comp)) + | PrepNCP(prep,case,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> PrepNCP(prep,case,ctype,comp)) + | E phrase -> Xlist.map (transform_comps negation mood phrase) (fun phrase -> E phrase) + | morf -> [morf] + +let compars = StringSet.of_list ["jak"; "jako"; "niż"; "niczym" ;"niby"; "co"; "zamiast"] + +let is_compar lex = StringSet.mem compars lex + +(* FIXME: pomijam uzgadnianie przypadku, liczby i rodzaju - wykonalne za pomocą kontroli *) +let transform_preps morf = + let morf = match morf with + | LexArg(id,lex,PREP c) -> if is_compar lex then LexArg(id,lex,COMPAR c) else LexArg(id,lex,PREP c) + | SimpleLexArg(lex,PREP c) -> if is_compar lex then SimpleLexArg(lex,COMPAR c) else SimpleLexArg(lex,PREP c) + | PrepNP(prep,c) -> if is_compar prep then ComparP(prep,c) else PrepNP(prep,c) + | PrepAdjP(prep,c) -> if is_compar prep then ComparP(prep,c) else PrepAdjP(prep,c) + | PrepNCP(prep,case,ctype,comp) as morf -> if is_compar prep then failwith "transform_preps" else morf + | morf -> morf in + match morf with + | ComparP(prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> ComparP(prep,Case case)) + | ComparP _ -> failwith "transform_preps" + | LexArg(id,lex,COMPAR Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> LexArg(id,lex,COMPAR (Case case))) + | SimpleLexArg(lex,COMPAR Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> SimpleLexArg(lex,COMPAR (Case case))) + | LexArg(id,lex,COMPAR (Case _)) as morf -> [morf] + | SimpleLexArg(lex,COMPAR (Case _)) as morf -> [morf] + | LexArg(id,lex,COMPAR _) -> failwith "transform_preps" + | SimpleLexArg(lex,COMPAR _) -> failwith "transform_preps" + | PrepNP("per",Str) -> [PrepNP("per",Case "nom");PrepNP("per",Case "voc")] (* FIXME: voc do poprawienie w leksykonie *) + | PrepNP(_,Case _) as morf -> [morf] + | PrepAdjP(_,Case _) as morf -> [morf] + | PrepNCP(_,Case _,_,_) as morf -> [morf] + | PrepNP _ -> failwith "transform_preps" + | PrepAdjP _ -> failwith "transform_preps" + | PrepNCP _ -> failwith "transform_preps" + | LexArg(id,"w",PREP Str) -> [LexArg(id,"w",PREP (Case "acc"));LexArg(id,"w",PREP (Case "loc"));] + | SimpleLexArg("w",PREP Str) -> [SimpleLexArg("w",PREP (Case "acc"));SimpleLexArg("w",PREP (Case "loc"))] + | LexArg(id,lex,PREP (Case _)) as morf -> [morf] + | SimpleLexArg(lex,PREP (Case _)) as morf -> [morf] + | LexArg(id,lex,PREP _) -> failwith "transform_preps" + | SimpleLexArg(lex,PREP _) -> failwith "transform_preps" + | morf -> [morf] + +let transform_pers_schema lemma negation mood schema = + Xlist.map schema (fun s -> + {s with morfs = + let morfs = List.flatten (Xlist.map s.morfs (transform_comps negation mood)) in + let morfs = List.flatten (Xlist.map morfs transform_preps) in + if s.gf = SUBJ then List.flatten (Xlist.map morfs (function + | E phrase -> Xlist.map (transform_pers_subj_phrase lemma negation mood phrase) (fun phrase -> E phrase) + | LexArg(id,lex,pos) -> Xlist.map (transform_pers_subj_pos lemma negation mood pos) (fun pos -> LexArg(id,lex,pos)) + | SimpleLexArg(lex,pos) -> Xlist.map (transform_pers_subj_pos lemma negation mood pos) (fun pos -> SimpleLexArg(lex,pos)) + | phrase -> transform_pers_subj_phrase lemma negation mood phrase)) + else List.flatten (Xlist.map s.morfs (function + | LexArg(id,lex,pos) -> Xlist.map (transform_pers_pos lemma negation mood pos) (fun pos -> LexArg(id,lex,pos)) + | SimpleLexArg(lex,pos) -> Xlist.map (transform_pers_pos lemma negation mood pos) (fun pos -> SimpleLexArg(lex,pos)) + | phrase -> transform_pers_phrase lemma negation mood phrase))}) + +let transform_nosubj_schema lemma negation mood schema = + Xlist.map schema (fun s -> + {s with morfs = + let morfs = List.flatten (Xlist.map s.morfs (transform_comps negation mood)) in + let morfs = List.flatten (Xlist.map morfs transform_preps) in + if s.gf = SUBJ then [Null] + else List.flatten (Xlist.map morfs (function + | LexArg(id,lex,pos) -> Xlist.map (transform_pers_pos lemma negation mood pos) (fun pos -> LexArg(id,lex,pos)) + | SimpleLexArg(lex,pos) -> Xlist.map (transform_pers_pos lemma negation mood pos) (fun pos -> SimpleLexArg(lex,pos)) + | phrase -> transform_pers_phrase lemma negation mood phrase))}) + +(* let transform_ger_adv_lex = function + | s -> print_endline ("transform_ger_adv_lex: " ^ s); s + +let transform_ger_adv_pos = function + | LexArg(id,lex,ADV grad) -> LexArg(id,transform_ger_adv_lex lex,ADJ(NumberAgr,AllAgr,GenderAgr,grad)) + | SimpleLexArg(lex,ADV grad) -> SimpleLexArg(transform_ger_adv_lex lex,ADJ(NumberAgr,AllAgr,GenderAgr,grad)) + | morf -> morf *) + +let transform_ger_schema lemma negation schema = (* FIXME: zakładam, że ger zeruje mood, czy to prawda? *) + Xlist.map schema (fun s -> + {s with morfs = + let morfs = List.flatten (Xlist.map s.morfs (transform_comps negation "gerundial")) in + let morfs = List.flatten (Xlist.map morfs transform_preps) in + (* let morfs = Xlist.map morfs transform_ger_adv_pos in *) + if s.gf = SUBJ then List.flatten (Xlist.map morfs (function + | E phrase -> Xlist.map (transform_ger_subj_phrase lemma negation "gerundial" phrase) (fun phrase -> E phrase) + | LexArg(id,lex,pos) -> Xlist.map (transform_ger_subj_pos lemma negation "gerundial" pos) (fun pos -> LexArg(id,lex,pos)) + | SimpleLexArg(lex,pos) -> Xlist.map (transform_ger_subj_pos lemma negation "gerundial" pos) (fun pos -> SimpleLexArg(lex,pos)) + | phrase -> transform_ger_subj_phrase lemma negation "gerundial" phrase)) + else List.flatten (Xlist.map s.morfs (function + | LexArg(id,lex,pos) -> Xlist.map (transform_pers_pos lemma negation "gerundial" pos) (fun pos -> LexArg(id,lex,pos)) + | SimpleLexArg(lex,pos) -> Xlist.map (transform_pers_pos lemma negation "gerundial" pos) (fun pos -> SimpleLexArg(lex,pos)) + | phrase -> transform_pers_phrase lemma negation "gerundial" phrase))}) + +let transform_ppas_schema lemma negation mood schema = + if not (Xlist.fold schema false (fun b p -> if p.gf = OBJ then true else b)) then raise Not_found else + Xlist.map schema (fun s -> + let morfs = List.flatten (Xlist.map s.morfs (transform_comps negation mood)) in + let morfs = List.flatten (Xlist.map morfs transform_preps) in + {s with morfs = + if s.gf = OBJ then [Null] else + if s.gf = SUBJ then List.flatten (Xlist.map morfs (function + | E phrase -> raise Not_found (* tylko 'obladzać' i 'oblodzić', chyba błąd *) + | LexArg(id,lex,SUBST(n,Str)) -> raise Not_found (* FIXME!!! *) + | SimpleLexArg(lex,SUBST(n,Str)) -> raise Not_found (* FIXME!!! *) + | phrase -> transform_ppas_subj_phrase lemma negation mood (s.cr <> [] || s.ce <> []) phrase)) + else List.flatten (Xlist.map s.morfs (function + | LexArg(id,lex,pos) -> Xlist.map (transform_pers_pos lemma negation mood pos) (fun pos -> LexArg(id,lex,pos)) + | SimpleLexArg(lex,pos) -> Xlist.map (transform_pers_pos lemma negation mood pos) (fun pos -> SimpleLexArg(lex,pos)) + | phrase -> transform_pers_phrase lemma negation mood phrase))}) + +let transform_num_schema acm schema = + Xlist.map schema (fun s -> + {s with morfs=List.flatten (Xlist.map s.morfs (function + | Null -> [Null] + | LexArg(id,lex,SUBST(NumberUndef,CaseUndef)) -> + (match acm with + "rec" -> [LexArg(id,lex,SUBST(NumberUndef,GenAgr))] + | "congr" -> [LexArg(id,lex,SUBST(NumberUndef,AllAgr))] + | _ -> failwith "transform_num_schema") + | SimpleLexArg(lex,SUBST(NumberUndef,CaseUndef)) -> + (match acm with + "rec" -> [SimpleLexArg(lex,SUBST(NumberUndef,GenAgr))] + | "congr" -> [SimpleLexArg(lex,SUBST(NumberUndef,AllAgr))] + | _ -> failwith "transform_num_schema") + | morf -> failwith ("transform_num_schema: " ^ ENIAMwalStringOf.phrase morf)))}) + +let transform_schema pos lemma schema = + let phrase_fun,pos_fun = match pos with + "subst" -> transform_np_phrase,transform_np_pos + | "adj" -> transform_adj_phrase,transform_adj_pos + | "adv" -> transform_adv_phrase,transform_adv_pos + | "prep" -> transform_prep_phrase,transform_prep_pos + | "comprep" -> transform_comprep_phrase,transform_comprep_pos + | "compar" -> transform_compar_phrase,transform_compar_pos + | "comp" -> transform_comp_phrase,transform_comp_pos + | "qub" -> transform_qub_phrase,transform_qub_pos + | "siebie" -> transform_siebie_phrase,transform_siebie_pos + | _ -> failwith "transform_schema" + in + Xlist.map schema (fun s -> + let morfs = List.flatten (Xlist.map s.morfs (transform_comps NegationUndef "")) in (* FIXME: zależność od trybu warunkowego i negacji *) + {s with morfs=List.flatten (Xlist.map morfs (function + LexArg(id,lex,pos) -> Xlist.map (pos_fun lemma pos) (fun pos -> LexArg(id,lex,pos)) + | SimpleLexArg(lex,pos) -> Xlist.map (pos_fun lemma pos) (fun pos -> SimpleLexArg(lex,pos)) + | phrase -> phrase_fun lemma phrase))}) + +let rec remove_adj_agr = function + [] -> [] + | {morfs=[Null;AdjP(CaseAgr)]} :: l -> remove_adj_agr l + | {morfs=[Null;AdjP(Part)]} :: l -> remove_adj_agr l + | s :: l -> (*print_endline (ENIAMwalStringOf.schema [s]);*) s :: (remove_adj_agr l) + +let rec get_role gf = function + [] -> raise Not_found + | s :: l -> if s.gf = gf then s.role,s.role_attr else get_role gf l + +let expand_negation = function + Negation -> [Negation] + | Aff -> [Aff] + | NegationUndef -> [Negation;Aff] + +let expand_aspect = function + Aspect s -> [Aspect s] + | AspectUndef -> [Aspect "imperf";Aspect "perf"] + +let aspect_sel = function + Aspect s -> [ENIAM_LCGlexiconTypes.Aspect,ENIAM_LCGlexiconTypes.Eq,[s]] + | AspectUndef -> [] + +open ENIAM_LCGlexiconTypes + +let transform_entry pos lemma negation pred aspect schema = + if pos = "subst" || pos = "depr" then ( + if negation <> NegationUndef || pred <> PredFalse || aspect <> AspectUndef then failwith ("transform_entry 1"); + [[],transform_schema "subst" lemma schema]) else + if pos = "adj" || pos = "adjc" || pos = "adjp" then ( + if negation <> NegationUndef || aspect <> AspectUndef then failwith ("transform_entry 2"); + let sel = match pred with PredTrue -> [Case,Eq,["pred"]] | _ -> [] in + [sel,transform_schema "adj" lemma schema]) else + if pos = "adv" || pos = "prep" || pos = "comprep" || pos = "comp" || pos = "compar" || pos = "qub" || pos = "siebie" then ( + if negation <> NegationUndef || (*pred <> PredFalse ||*) aspect <> AspectUndef then failwith ("transform_entry 3"); (* FIXME: typy przysłówków *) + [[],transform_schema pos lemma schema]) else + if pred <> PredFalse then failwith ("transform_entry 4") else + if pos = "num" || pos = "intnum" then ( + if negation <> NegationUndef || aspect <> AspectUndef then failwith ("transform_entry 5"); + Xlist.map ["congr";"rec"] (fun acm -> + [Acm,Eq,[acm]],transform_num_schema acm schema)) else + List.flatten (Xlist.map (expand_negation negation) (fun negation -> + let sel = [Negation,Eq,[ENIAMwalStringOf.negation negation]] @ aspect_sel aspect in + if pos = "fin" || pos = "bedzie" then + [sel @ [Mood,Eq,["indicative"]],transform_pers_schema lemma negation "indicative" schema; + sel @ [Mood,Eq,["imperative"]],transform_pers_schema lemma negation "imperative" schema] else + if pos = "praet" || pos = "winien" then + [sel @ [Mood,Eq,["indicative"]],transform_pers_schema lemma negation "indicative" schema; + sel @ [Mood,Eq,["conditional"]],transform_pers_schema lemma negation "conditional" schema] else + if pos = "impt" then + [sel @ [Mood,Eq,["imperative"]],transform_nosubj_schema lemma negation "imperative" schema] else + if pos = "imps" then + [sel @ [Mood,Eq,["indicative"]],transform_nosubj_schema lemma negation "indicative" schema] else + if pos = "pred" then + [sel @ [Mood,Eq,["indicative"]],transform_pers_schema lemma negation "indicative" schema] else + if pos = "pcon" || pos = "pant" || pos = "inf" || pos = "pact" then + (* let role,role_attr = try get_role SUBJ schema with Not_found -> "Initiator","" in *) + [sel, transform_nosubj_schema lemma negation "indicative" schema] else + if pos = "ppas" then + try + (* let role,role_attr = try get_role OBJ schema with Not_found -> "Theme","" in *) + [sel, transform_ppas_schema lemma negation "indicative" schema] + with Not_found -> [] else + if pos = "ger" then + [sel,transform_ger_schema lemma negation schema] else + failwith ("transform_entry: " ^ pos))) + +let transform_lex_entry pos lemma = function + SimpleLexEntry(lemma,pos) -> [[],SimpleLexEntry(lemma,pos)] + | LexEntry(id,lemma,pos,NoRestr,schema) -> + Xlist.map (transform_entry pos lemma NegationUndef PredFalse AspectUndef schema) (fun (sel,schema) -> + sel,LexEntry(id,lemma,pos,NoRestr,schema)) + | ComprepNPEntry(s,NoRestr,schema) -> + Xlist.map (transform_entry "comprep" s NegationUndef PredFalse AspectUndef schema) (fun (sel,schema) -> + sel,ComprepNPEntry(s,NoRestr,schema)) + | LexEntry(id,lemma,pos,_,[]) as entry -> [[],entry] (* FIXME *) + | entry -> print_endline ("transform_lex_entry:" ^ ENIAMwalStringOf.lex_entry entry); [[],entry] + +(*let reduce_frame_negation lexemes = function + Negation -> StringMap.mem lexemes "nie" + | _ -> true + +let reduce_frame_mood lexemes = function + "conditional" -> StringMap.mem lexemes "by" + | _ -> true + +let reduce_frame_aux lexemes = function + NoAux -> true + | PastAux -> (try let poss = StringMap.find lexemes "być" in StringSet.mem poss "praet" with Not_found -> false) + | FutAux -> (try let poss = StringMap.find lexemes "być" in StringSet.mem poss "bedzie" with Not_found -> false) + | ImpAux -> StringMap.mem lexemes "niech" || StringMap.mem lexemes "niechaj" || StringMap.mem lexemes "niechże" || StringMap.mem lexemes "niechajże" + +let reduce_frame_atrs pos lexemes = function + Frame(NounAtrs _,_) -> true + | Frame(AdjAtrs _,_) -> true + | Frame(EmptyAtrs _,_) -> true + | Frame(PersAtrs(_,_,negation,mood,_,aux,_),_) -> reduce_frame_negation lexemes negation && reduce_frame_mood lexemes mood && reduce_frame_aux lexemes aux + | Frame(NonPersAtrs(_,_,_,_,negation,_),_) -> if pos = "pact" || pos = "ppas" then true else reduce_frame_negation lexemes negation + | Frame(GerAtrs(_,_,negation,_),_) -> reduce_frame_negation lexemes negation + | Frame(_,_) as frame -> failwith ("reduce_frame_atrs: " ^ ENIAMwalStringOf.frame "" frame) + | LexFrame _ -> true + | ComprepFrame _ -> true + +let rec reduce_frame_atrs_list pos lexemes = function + [] -> [] + | frame :: l -> (if reduce_frame_atrs pos lexemes frame then [frame] else []) @ reduce_frame_atrs_list pos lexemes l +*) + +let _ = + let schemata = ENIAMwalReduce.merge_entries ENIAMwalParser.phrases ENIAMwalParser.schemata in + Entries.iter schemata (fun pos lemma (opinion,neg,pred,aspect,schema) -> + match pos with + "noun" -> ignore (transform_entry "subst" lemma neg pred aspect schema) + | "adj" -> ignore (transform_entry "adj" lemma neg pred aspect schema) + | "adv" -> ignore (transform_entry "adv" lemma neg pred aspect schema) + | "verb" -> + ignore (transform_entry "fin" lemma neg pred aspect schema); + ignore (transform_entry "praet" lemma neg pred aspect schema); + ignore (transform_entry "impt" lemma neg pred aspect schema); + ignore (transform_entry "imps" lemma neg pred aspect schema); + ignore (transform_entry "ger" lemma neg pred aspect schema); + ignore (transform_entry "pact" lemma neg pred aspect schema); + ignore (transform_entry "ppas" lemma neg pred aspect schema); + ignore (transform_entry "inf" lemma neg pred aspect schema); + ignore (transform_entry "pcon" lemma neg pred aspect schema); + ignore (transform_entry "pant" lemma neg pred aspect schema); + ignore (transform_entry "bedzie" lemma neg pred aspect schema); + ignore (transform_entry "winien" lemma neg pred aspect schema); + ignore (transform_entry "pred" lemma neg pred aspect schema); + () + | _ -> failwith "unknown pos"); + ignore (Entries.map ENIAMwalParser.entries transform_lex_entry); + () diff --git a/lexSemantics/ENIAMwalFrames.ml b/lexSemantics/ENIAMwalFrames.ml index 19efdd4..c6da5fa 100644 --- a/lexSemantics/ENIAMwalFrames.ml +++ b/lexSemantics/ENIAMwalFrames.ml @@ -1,7 +1,7 @@ (* - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty". - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences * * This library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -525,752 +525,6 @@ let remove_pro_args schema = (* FIXME: sprawdzić czy Pro i Null są zawsze na p | s -> s :: schema)) -let simplify_pos = function - "subst" -> "noun" - | "depr" -> "noun" - | "psubst" -> "noun" - | "pdepr" -> "noun" - | "adj" -> "adj" - | "adjc" -> "adj" - | "ger" -> "verb" - | "pact" -> "verb" - | "ppas" -> "verb" - | "fin" -> "verb" - | "bedzie" -> "verb" - | "praet" -> "verb" - | "winien" -> "verb" - | "impt" -> "verb" - | "imps" -> "verb" - | "inf" -> "verb" - | "pcon" -> "verb" - | "pant" -> "verb" - | "pred" -> "verb" - | "ppron12" -> "pron" - | "ppron3" -> "pron" - | "siebie" -> "pron" - | s -> s - -let transform_zeby = function - Aff -> [Comp "że"] - | Negation -> [Comp "że";Comp "żeby"] - | NegationUndef -> [Comp "że";Comp "żeby"] - | _ -> failwith "transform_zeby" - -let transform_gdy = function - "indicative" -> [Comp "gdy"] - | "imperative" -> [Comp "gdy"] - | "conditional" -> [Comp "gdyby"] - | "gerundial" -> [Comp "gdy"] - | "" -> [Comp "gdy";Comp "gdyby"] - | s -> failwith ("transform_gdy: " ^ s) - -let transform_comp negation mood = function - Comp comp -> [Comp comp] - | Zeby -> transform_zeby negation - | Gdy -> transform_gdy mood - | CompUndef -> [CompUndef](*failwith "transform_comp"*) - -let transform_str = function - Aff -> [Case "acc"] - | Negation -> [Case "gen"] - | NegationUndef -> [Case "acc";Case "gen"] - | _ -> failwith "transform_str" - -(* FIXME: wstawić wszędzie adj jako wariant PrepNP, ComprepNP i NP *) -let transform_np_phrase = function - NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)] - | NP(CaseAgr) -> [NP(CaseAgr)(*;NumP(CaseAgr)*)] - | AdjP(Case _) as morf -> [morf] - | AdjP(CaseAgr) -> [AdjP(AllAgr)] - | AdjP(AllAgr) -> [AdjP(AllAgr)] - | AdjP(Str) -> [AdjP(AllAgr)] - | PrepNP(sem,prep,Case case) -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)] -(* | PrepNumP(_,Case _) as morf -> [morf] *) - | ComprepNP _ as morf -> [morf] - | ComparNP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> ComparNP(sem,prep,Case case)) - | ComparPP _ as morf -> [morf] - | CP(ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> CP(ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji*) - | NCP(Case c,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> NCP(Case c,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji*) - | PrepNCP(sem,prep,Case case,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> PrepNCP(sem,prep,Case case,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji *) - | PrepAdjP(sem,_,Case _) as morf -> [morf] (* to wygląda seryjny błąd w Walentym xp(abl[prepadjp(z,gen)]) *) - | PrepNP(sem,prep,Str) -> List.flatten (Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)])) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *) - | PrepAdjP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> PrepAdjP(sem,prep,Case case)) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *) - | AdvP as morf -> [morf] (* FIXME: tu trafiają przysłówkowe realizacje, trzeba by je przetłumaczyć na przymiotniki *) - | FixedP _ as morf -> [morf] - | PrepP as morf -> [morf] - | Or as morf -> [morf] - | Pro as morf -> [morf] - | Null as morf -> [morf] - | phrase -> print_endline ("transform_np_phrase: " ^ ENIAMwalStringOf.phrase phrase); [phrase] - -let transform_np_pos = function - | SUBST(_,Case _) as morf -> [morf] - | SUBST(_,CaseAgr) as morf -> [morf] - | ADJ(_,Case _,_,_) as morf -> [morf] - | ADJ(n,CaseAgr,g,gr) -> [ADJ(n,AllAgr,g,gr)] - | PACT(n,CaseAgr,g,a,neg,r) -> [PACT(n,AllAgr,g,a,neg,r)] - | PPAS(_,Case _,_,_,_) as morf -> [morf] - | PPAS(n,CaseAgr,g,a,neg) -> [PPAS(n,AllAgr,g,a,neg)] - | ADJ(n,Str,g,gr) -> [ADJ(n,AllAgr,g,gr)] - | PPAS(n,Str,g,a,neg) -> [PPAS(n,AllAgr,g,a,neg)] - | PREP(Case _) as morf -> [morf] - | ADV _ as morf -> [morf] (* FIXME: tu trafiają przysłówkowe realizacje, trzeba by je przetłumaczyć na przymiotniki *) - | COMP _ as morf -> [morf] - | QUB as morf -> [morf] - | pos -> print_endline ("transform_np_pos: " ^ ENIAMwalStringOf.pos pos); [pos] - -let transform_adj_phrase = function - NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)] - | NP(Part) -> [NP(Case "gen");NP(Case "acc")(*;NumP(Case "gen");NumP(Case "acc")*)] - | AdjP(CaseAgr) -> [AdjP(AllAgr)] (* jedno wystąpienie 'cały szczęśliwy', może się przydać podniesienie typu *) - | PrepNP(sem,prep,Case case) -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)] - | ComprepNP _ as morf -> [morf] - | ComparNP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> ComparNP(sem,prep,Case case)) - | ComparPP _ as morf -> [morf] - | CP(ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> CP(ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji*) - | NCP(Case c,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> NCP(Case c,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji*) - | PrepNCP(sem,prep,Case case,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> PrepNCP(sem,prep,Case case,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji *) - | PrepAdjP(sem,_,Case _) as morf -> [morf] - | PrepNP(sem,prep,Str) -> List.flatten (Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)])) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *) - | PrepAdjP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> PrepAdjP(sem,prep,Case case)) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *) - | InfP _ as morf -> [morf] - | AdvP as morf -> [morf] - | FixedP _ as morf -> [morf] - | PrepP as morf -> [morf] - | Or as morf -> [morf] - | Pro as morf -> [morf] - | Null as morf -> [morf] - | morf -> print_endline ("transform_adj_phrase: " ^ ENIAMwalStringOf.phrase morf); [morf] - -let transform_adj_pos = function - | SUBST(_,Case _) as morf -> [morf] - | ADJ(n,CaseAgr,g,gr) -> [ADJ(n,AllAgr,g,gr)] - | PREP(Case _) as morf -> [morf] - | ADV _ as morf -> [morf] - | morf -> print_endline ("transform_adj_pos: " ^ ENIAMwalStringOf.pos morf); [morf] - -let transform_prep_pos = function - | SUBST(_,Case _) as morf -> [morf] - | SUBST(n,Str) -> [SUBST(n,CaseAgr)] - | NUM(Case _,_,_) as morf -> [morf] - | ADJ(_,Case _,_,_) as morf -> [morf] - | GER(_,Case _,_,_,_,_) as morf -> [morf] - | PPAS(_,Case _,_,_,_) as morf -> [morf] - | ADV _ as morf -> [morf] - | QUB as morf -> [morf] - | pos -> print_endline ("transform_prep_pos: " ^ ENIAMwalStringOf.pos pos); [pos] - -let transform_compar_phrase = function - NP(Str) -> [NP CaseUndef(*;NumP(CaseUndef)*)] (* FIXME: ta sama sytuacja co w "jako" *) - | FixedP _ as morf -> [morf] - | phrase -> print_endline ("transform_compar_phrase: " ^ ENIAMwalStringOf.phrase phrase); [phrase] - -let transform_compar_pos = function - | SUBST(_,Case _) as morf -> [morf] - | ADJ(_,Case _,_,_) as morf -> [morf] - | PREP(Case _) as morf -> [morf] - | PPAS(_,Case _,_,_,_) as morf -> [morf] - | SUBST(Number n,Str) -> [SUBST(Number n,CaseUndef)] - | SUBST(NumberAgr,Str) -> [SUBST(NumberUndef,CaseUndef)] - | SUBST(NumberUndef,Str) -> [SUBST(NumberUndef,CaseUndef)] - | PPAS(NumberAgr,Str,GenderAgr,a,neg) -> [PPAS(NumberUndef,CaseUndef,GenderUndef,a,neg)] (* FIXME: ta sama sytuacja co w "jako" *) - | PPAS(NumberAgr,CaseAgr,GenderAgr,a,neg) -> [PPAS(NumberUndef,CaseUndef,GenderUndef,a,neg)] (* FIXME: ta sama sytuacja co w "jako" *) - | ADJ(NumberAgr,Str,GenderAgr,gr) -> [ADJ(NumberUndef,CaseUndef,GenderUndef,gr)] (* FIXME: ta sama sytuacja co w "jako" *) - | ADJ(NumberAgr,CaseAgr,GenderAgr,gr) -> [ADJ(NumberUndef,CaseUndef,GenderUndef,gr)] (* FIXME: ta sama sytuacja co w "jako" *) - | NUM(Case _,_,_) as morf -> [morf] - | pos -> print_endline ("transform_compar_pos: " ^ ENIAMwalStringOf.pos pos); [pos] - -let transform_adv_phrase = function - NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)] - | PrepNP(sem,prep,Case case) -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)] - | PrepNCP(sem,prep,Case case,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> PrepNCP(sem,prep,Case case,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji *) - | ComprepNP _ as morf -> [morf] - | CP(ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> CP(ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji*) - | InfP _ as morf -> [morf] - | AdvP as morf -> [morf] - | Or as morf -> [morf] - | Pro as morf -> [morf] - | Null as morf -> [morf] - | PrepAdjP(sem,_,Case _) as morf -> [morf] - | PrepNP(sem,prep,Str) -> List.flatten (Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)])) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *) - | PrepAdjP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> PrepAdjP(sem,prep,Case case)) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *) - | ComparNP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> ComparNP(sem,prep,Case case)) - | ComparPP _ as morf -> [morf] -(* | AdjP(CaseAgr) as morf -> [morf] *) -(* | NCP(Case c,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> NCP(Case c,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji*) - | PrepNCP(prep,Case case,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> PrepNCP(prep,Case case,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji *) - | FixedP _ as morf -> [morf]*) - | morf -> print_endline ("transform_adv_phrase: " ^ ENIAMwalStringOf.phrase morf); [morf] - -let transform_adv_pos = function -(* | SUBST(_,Case _) as morf -> [morf] - | ADJ(_,CaseAgr,_,_) as morf -> [morf]*) - COMP _ as morf -> [morf] - | PREP(Case _) as morf -> [morf] - | ADV _ as morf -> [morf] - | morf -> print_endline ("transform_adv_pos: " ^ ENIAMwalStringOf.pos morf); [morf] - -(*| Prepnp("jako",Str) as morf -> morf - | Prepnp("jak",Str) as morf -> morf - | Prepnp("niczym",Str) as morf -> morf - | Prepadjp("jako",Str) as morf -> morf - | Prepadjp("jak",Str) as morf -> morf - | Prepadjp("niczym",Str) as morf -> morf - | Compar "jako" as morf -> morf - | Compar "jak" as morf -> morf - | Compar "niczym" as morf -> morf - | Compar "niż" as morf -> morf*) - -let transform_pers_subj_phrase negation mood = function (* FIXME: prepnp(na,loc) *) - | NP(Str) -> [NP(NomAgr)(*;NumP(NomAgr)*)] - | NCP(Str,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> NCP(NomAgr,ctype,comp)) - | CP(ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> CP(ctype,comp)) - | InfP _ as morf -> [morf] - | Or as morf -> [morf] - | NP(Part) -> [NP(Case "gen")(*;NP(Case "acc")*)(*;NumP(Case "gen");NumP(Case "acc")*)] - | Pro -> [ProNG] - | morf -> print_endline ("transform_pers_subj_phrase: " ^ ENIAMwalStringOf.phrase morf); [morf] - -let transform_pers_subj_pos negation mood = function - COMP _ as morf -> [morf] - | SUBST(n,Str) -> [SUBST(n,NomAgr)] - | SUBST(n,Case "nom") -> [SUBST(n,NomAgr)] (* wygląda na błąd Walentego, ale nie ma znaczenia *) - | NUM(Str,g,AcmUndef) -> [NUM(NomAgr,g,AcmUndef)] - | ADJ(n,Str,g,gr) -> [ADJ(n,NomAgr,g,gr)] - | morf -> print_endline ("transform_ger_subj_pos: " ^ ENIAMwalStringOf.pos morf); [morf] - -let transform_ger_subj_phrase negation mood control = function - | NP(Str) -> [NP(Case "gen");PrepNP(NoSem,"przez",Case "acc")(*;NumP(Case "gen")*)(*;PrepNumP("przez",Case "acc")*)] (* FIXME: czy przez:acc jest możliwe? *) - | NCP(Str,ctype,comp) -> List.flatten (Xlist.map (transform_comp negation mood comp) (fun comp -> [NCP(Case "gen",ctype,comp);PrepNCP(NoSem,"przez",Case "acc",ctype,comp)])) (* FIXME: czy przez:acc jest możliwe? *) - | CP(ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> CP(ctype,comp)) (* FIXME: czy to jest możliwe? *) - | InfP _ as morf -> [morf] (* FIXME: czy to jest możliwe? *) - | Or as morf -> [morf] - | NP(Part) -> [NP(Case "gen")(*;NP(Case "acc")*)(*;NumP(Case "gen");NumP(Case "acc")*)] - | Pro -> if control then [Pro] else [Null] - | morf -> print_endline ("transform_ger_subj_phrase: " ^ ENIAMwalStringOf.phrase morf); [morf] - -let transform_ger_subj_pos negation mood = function (* FIXME: ADV(_) *) - COMP _ as morf -> [morf] (* FIXME: czy to jest możliwe? *) - | SUBST(n,Str) -> [SUBST(n,Case "gen")] - | SUBST(n,Case "nom") -> [SUBST(n,Case "gen")] (* wygląda na błąd Walentego, ale nie ma znaczenia *) - | NUM(Str,g,AcmUndef) -> [NUM(Case "gen",g,AcmUndef)] - | ADJ(n,Str,g,gr) -> [ADJ(n,Case "gen",g,gr)] - | morf -> print_endline ("transform_pers_subj_pos: " ^ ENIAMwalStringOf.pos morf); [morf] - -let transform_ppas_subj_phrase negation mood control = function - | NP(Str) -> [PrepNP(NoSem,"przez",Case "acc")(*;PrepNumP("przez",Case "acc")*)] - | NCP(Str,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> PrepNCP(NoSem,"przez",Case "acc",ctype,comp)) - | CP(ctype,comp) -> [Null] (* zakładam, że w ramie jest też NCP *) - | Pro -> if control then [Pro] else [Null] - | morf -> print_endline ("transform_ppas_subj_phrase: " ^ ENIAMwalStringOf.phrase morf); [morf] - -let transform_pers_phrase negation mood = function - | NP(Str) -> List.flatten (Xlist.map (transform_str negation) (fun case -> [NP case(*;NumP(case)*)])) - | AdjP(Str) -> Xlist.map (transform_str negation) (fun case -> AdjP case) (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *) - | NCP(Str,ctype,comp) -> List.flatten (Xlist.map (transform_str negation) (fun case -> Xlist.map (transform_comp negation mood comp) (fun comp -> NCP(case,ctype,comp)))) - | NP(Part) -> [NP(Case "gen");NP(Case "acc")(*;NumP(Case "gen");NumP(Case "acc")*)] - | NCP(Part,ctype,comp) -> List.flatten (Xlist.map (transform_comp negation mood comp) (fun comp -> [NCP(Case "gen",ctype,comp);NCP(Case "acc",ctype,comp)])) - | NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)] - | PrepNP(sem,prep,Case case) -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)] -(* | PrepNumP(_,Case _) as morf -> [morf] *) - | ComprepNP _ as morf -> [morf] - | NCP(Case case,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> NCP(Case case,ctype,comp)) - | PrepNCP(sem,prep,Case case,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> PrepNCP(sem,prep,Case case,ctype,comp)) - | AdjP(Case _) as morf -> [morf] (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *) - | PrepAdjP(sem,_,Case _) as morf -> [morf] (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *) - | PrepNP(sem,prep,Str) -> List.flatten (Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)])) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *) - | PrepAdjP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> PrepAdjP(sem,prep,Case case)) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *) - | ComparNP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> ComparNP(sem,prep,Case case)) - | ComparPP _ as morf -> [morf] - | CP(ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> CP(ctype,comp)) - | InfP _ as morf -> [morf] - | PadvP as morf -> [morf] - | AdvP -> if mood = "gerundial" then [AdjP AllAgr] else [AdvP] - | FixedP _ as morf -> [morf] - | PrepP as morf -> [morf] - | Or as morf -> [morf] - | Lex "się" as morf -> [morf] -(* | Refl as morf -> [morf] *) -(* | Recip as morf -> [morf] *) - | Pro as morf -> [morf] - | Null as morf -> [morf] - | morf -> print_endline ("transform_pers_phrase: " ^ ENIAMwalStringOf.phrase morf); [morf] - -let transform_pers_pos negation mood = function - | SUBST(n,Str) -> Xlist.map (transform_str negation) (fun case -> SUBST(n,case)) - | NUM(Str,g,a) -> Xlist.map (transform_str negation) (fun case -> NUM(case,g,a)) - | ADJ(n,Str,g,gr) -> Xlist.map (transform_str negation) (fun case -> ADJ(n,case,g,gr)) - | PPAS(n,Str,g,a,neg) -> Xlist.map (transform_str negation) (fun case -> PPAS(n,Str,g,a,neg)) - | SUBST(n,Part) -> [SUBST(n,Case "gen");SUBST(n,Case "acc")] - | SUBST(_,Case _) as morf -> [morf] - | NUM(Case _,_,_) as morf -> [morf] - | PREP(Case _) as morf -> [morf] - | ADJ(_,Case _,_,_) as morf -> [morf] - | PREP(Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> PREP(Case case)) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *) - | SUBST(n,CaseAgr) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> SUBST(n,Case case)) (* FIXME: sprawdzić kto kontroluje! *) - | ADJ(n,CaseAgr,g,gr) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> ADJ(n,Case case,g,gr)) (* FIXME: sprawdzić kto kontroluje! *) - | COMPAR as morf -> [morf] - | COMP _ as morf -> [morf] - | INF _ as morf -> [morf] - | ADV grad -> if mood = "gerundial" then [ADJ(NumberAgr,AllAgr,GenderAgr,grad)] else [ADV grad] - | morf -> print_endline ("transform_pers_pos: " ^ ENIAMwalStringOf.pos morf); [morf] - -let transform_pers_schema negation mood schema = - Xlist.map schema (fun s -> - {s with morfs = - if s.gf = SUBJ then List.flatten (Xlist.map s.morfs (function - Phrase phrase -> Xlist.map (transform_pers_subj_phrase negation mood phrase) (fun phrase -> Phrase phrase) - | E phrase -> Xlist.map (transform_pers_subj_phrase negation mood phrase) (fun phrase -> E phrase) - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_subj_pos negation mood pos) (fun pos -> LexArg(id,pos,lex)) - | _ -> failwith "transform_fin_schema")) - else List.flatten (Xlist.map s.morfs (function - Phrase phrase -> Xlist.map (transform_pers_phrase negation mood phrase) (fun phrase -> Phrase phrase) - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *) - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation mood pos) (fun pos -> LexArg(id,pos,lex)) - | _ -> failwith "transform_fin_schema"))}) - -let transform_impt_schema negation mood schema = - Xlist.map schema (fun s -> - {s with morfs = - if s.gf = SUBJ then [Phrase ProNG] - else List.flatten (Xlist.map s.morfs (function - Phrase phrase -> Xlist.map (transform_pers_phrase negation mood phrase) (fun phrase -> Phrase phrase) - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *) - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation mood pos) (fun pos -> LexArg(id,pos,lex)) - | _ -> failwith "transform_impt_schema"))}) - -let transform_imps_schema negation mood schema = - Xlist.map schema (fun s -> - {s with morfs = - if s.gf = SUBJ then [Phrase Pro] - else List.flatten (Xlist.map s.morfs (function - Phrase phrase -> Xlist.map (transform_pers_phrase negation mood phrase) (fun phrase -> Phrase phrase) - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *) - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation mood pos) (fun pos -> LexArg(id,pos,lex)) - | _ -> failwith "transform_imps_chema"))}) - -let transform_ger_schema negation schema = (* FIXME: zakładam, że ger zeruje mood, czy to prawda? *) - Xlist.map schema (fun s -> - {s with morfs = - if s.gf = SUBJ then List.flatten (Xlist.map s.morfs (function - Phrase phrase -> Xlist.map (transform_ger_subj_phrase negation "gerundial" (s.cr <> [] || s.ce <> []) phrase) (fun phrase -> Phrase phrase) - | E phrase -> Xlist.map (transform_ger_subj_phrase negation "gerundial" (s.cr <> [] || s.ce <> []) phrase) (fun phrase -> E phrase) - | LexArg(id,pos,lex) -> Xlist.map (transform_ger_subj_pos negation "gerundial" pos) (fun pos -> LexArg(id,pos,lex)) - | _ -> failwith "transform_fin_schema")) - else List.flatten (Xlist.map s.morfs (function - Phrase phrase -> Xlist.map (transform_pers_phrase negation "gerundial" phrase) (fun phrase -> Phrase phrase) - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *) - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation "gerundial" pos) (fun pos -> LexArg(id,pos,lex)) - | _ -> failwith "transform_fin_schema"))}) - -let transform_padv_schema negation mood pro schema = - Xlist.map schema (fun s -> - {s with morfs = - if s.gf = SUBJ then if s.ce = [] then if pro then [Phrase Pro] else [Phrase Null] else [Phrase Null] else - List.flatten (Xlist.map s.morfs (function - Phrase phrase -> Xlist.map (transform_pers_phrase negation mood phrase) (fun phrase -> Phrase phrase) - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *) - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation mood pos) (fun pos -> LexArg(id,pos,lex)) - | _ -> failwith "transform_fin_schema"))}) - -let transform_pact_schema negation mood schema = - Xlist.map schema (fun s -> - {s with morfs = - if s.gf = SUBJ then [Phrase Null] - else List.flatten (Xlist.map s.morfs (function - Phrase phrase -> Xlist.map (transform_pers_phrase negation mood phrase) (fun phrase -> Phrase phrase) - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *) - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation mood pos) (fun pos -> LexArg(id,pos,lex)) - | _ -> failwith "transform_pact_schema"))}) - -let transform_ppas_schema negation mood schema = - Xlist.map schema (fun s -> - {s with morfs = - if s.gf = OBJ then [Phrase Null] else - if s.gf = SUBJ then List.flatten (Xlist.map s.morfs (function - Phrase phrase -> Xlist.map (transform_ppas_subj_phrase negation mood (s.cr <> [] || s.ce <> []) phrase) (fun phrase -> Phrase phrase) - | E phrase -> Xlist.map (transform_ppas_subj_phrase negation mood (s.cr <> [] || s.ce <> []) phrase) (fun phrase -> E phrase) - | LexArg(id,SUBST(n,Str),lex) -> raise Not_found (* FIXME!!! *) - | _ -> failwith "transform_ppas_schema")) - else List.flatten (Xlist.map s.morfs (function - Phrase phrase -> Xlist.map (transform_pers_phrase negation mood phrase) (fun phrase -> Phrase phrase) - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *) - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation mood pos) (fun pos -> LexArg(id,pos,lex)) - | _ -> failwith "transform_ppas_schema"))}) - -let add_padv schema = - List.flatten (Xlist.map schema (fun s -> - if s.gf = SUBJ then - match s.cr with - [] -> [{s with cr=["3"]}; let s = adjunct_schema_field "" Both [Phrase Null;Phrase PadvP] in {s with ce=["3"]}] - | [cr] -> [s; let s = adjunct_schema_field "" Both [Phrase Null;Phrase PadvP] in {s with ce=[cr]}] - | _ -> failwith "add_padv" - else [s])) - -let transform_np_schema schema = - Xlist.map schema (fun s -> - {s with morfs=List.flatten (Xlist.map s.morfs (function - Phrase phrase -> Xlist.map (transform_np_phrase phrase) (fun phrase -> Phrase phrase) -(* | LexArg(id,ADV _,lex) as morf -> print_endline (ENIAMwalStringOf.morf morf); [morf] *) - | LexArg(id,pos,lex) -> Xlist.map (transform_np_pos pos) (fun pos -> LexArg(id,pos,lex)) - | Multi[AdjP AllAgr] -> [Multi[AdjP AllAgr]] - | _ -> failwith "transform_np_schema"))}) - -let transform_num_schema acm schema = - Xlist.map schema (fun s -> - {s with morfs=List.flatten (Xlist.map s.morfs (function (* kierunek argumentu został dodany w expand_lexicalizations_morfs *) - | Phrase Pro -> [Phrase Pro] - | LexArg(id,SUBST(NumberUndef,CaseUndef),lex) -> - (match acm with - Acm "rec" -> [LexArg(id,SUBST(NumberUndef,GenAgr),lex)] - | Acm "congr" -> [LexArg(id,SUBST(NumberUndef,AllAgr),lex)] - | _ -> failwith "transform_num_schema") - | morf -> failwith ("transform_num_schema: " ^ ENIAMwalStringOf.morf morf)))}) - -let transform_adj_schema schema = - Xlist.map schema (fun s -> - {s with morfs=List.flatten (Xlist.map s.morfs (function - Phrase phrase -> Xlist.map (transform_adj_phrase phrase) (fun phrase -> Phrase phrase) - | LexArg(id,pos,lex) -> Xlist.map (transform_adj_pos pos) (fun pos -> LexArg(id,pos,lex)) - | _ -> failwith "transform_adj_schema"))}) - -let transform_adv_schema schema = - Xlist.map schema (fun s -> - {s with morfs=List.flatten (Xlist.map s.morfs (function - Phrase phrase -> Xlist.map (transform_adv_phrase phrase) (fun phrase -> Phrase phrase) - | LexArg(id,pos,lex) -> Xlist.map (transform_adv_pos pos) (fun pos -> LexArg(id,pos,lex)) - | _ -> failwith "transform_adv_schema"))}) - -let transform_prep_schema schema = - Xlist.map schema (fun s -> - {s with morfs=List.flatten (Xlist.map s.morfs (function - Phrase(NumP(case)) -> [Phrase(NumP(case))] - | LexArg(id,pos,lex) -> Xlist.map (transform_prep_pos pos) (fun pos -> LexArg(id,pos,lex)) - | morf -> failwith ("transform_prep_schema: " ^ ENIAMwalStringOf.morf morf)))}) - -let transform_compar_schema schema = - Xlist.map schema (fun s -> - {s with morfs=List.flatten (Xlist.map s.morfs (function - Phrase phrase -> Xlist.map (transform_compar_phrase phrase) (fun phrase -> Phrase phrase) - | LexArg(id,pos,lex) -> Xlist.map (transform_compar_pos pos) (fun pos -> LexArg(id,pos,lex)) - | morf -> failwith ("transform_compar_schema: " ^ ENIAMwalStringOf.morf morf)))}) - -let transform_comp_schema schema = (* kierunek argumentu został dodany w expand_lexicalizations_morfs *) - Xlist.map schema (fun s -> - {s with morfs=List.flatten (Xlist.map s.morfs (function - | LexArg(_,PERS _,_) as morf -> [morf] - | morf -> failwith ("transform_comp_schema: " ^ ENIAMwalStringOf.morf morf)))}) - -let transform_qub_schema schema = - Xlist.map schema (fun s -> - {s with morfs=List.flatten (Xlist.map s.morfs (function - | LexArg(_,PERS _,_) as morf -> [morf] - | morf -> failwith ("transform_qub_schema: " ^ ENIAMwalStringOf.morf morf)))}) - -let rec remove_adj_agr = function - [] -> [] - | {morfs=[Phrase Null;Phrase(AdjP(CaseAgr))]} :: l -> remove_adj_agr l - | {morfs=[Phrase Null;Phrase(AdjP(Part))]} :: l -> remove_adj_agr l - | s :: l -> (*print_endline (ENIAMwalStringOf.schema [s]);*) s :: (remove_adj_agr l) - -let rec get_role gf = function - [] -> raise Not_found - | s :: l -> if s.gf = gf then s.role,s.role_attr else get_role gf l - -let expand_negation = function - Negation -> [Negation] - | Aff -> [Aff] - | NegationUndef -> [Negation;Aff] - | NegationNA -> failwith "expand_negation" - -let expand_aspect = function - Aspect s -> [Aspect s] - | AspectUndef -> [Aspect "imperf";Aspect "perf"] - | AspectNA -> failwith "expand_aspect" - -let load_list filename = - Str.split (Str.regexp "\n") (File.load_file filename) - -let subst_uncountable_lexemes = StringSet.of_list (load_list subst_uncountable_lexemes_filename) -let subst_uncountable_lexemes2 = StringSet.of_list (load_list subst_uncountable_lexemes_filename2) -let subst_container_lexemes = StringSet.of_list (load_list subst_container_lexemes_filename) -let subst_numeral_lexemes = StringSet.of_list (load_list subst_numeral_lexemes_filename) -let subst_time_lexemes = StringSet.of_list (load_list subst_time_lexemes_filename) - -let subst_pronoun_lexemes = StringSet.of_list ["co"; "kto"; "cokolwiek"; "ktokolwiek"; "nic"; "nikt"; "coś"; "ktoś"; "to"] -let adj_pronoun_lexemes = StringSet.of_list ["czyj"; "jaki"; "który"; "jakiś"; "ten"; "taki"] - -(* let adj_quant_lexemes = StringSet.of_list ["każdy"; "wszelki"; "wszystek"; "żaden"; "jakiś"; "pewien"; "niektóry"; "jedyny"; "sam"] *) - -let empty_valence_lexemes = StringSet.union subst_pronoun_lexemes adj_pronoun_lexemes - - -let noun_type lemma pos = - let nsyn = - if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then "pronoun" else - if pos = "psubst" || pos = "pdepr" || pos = "date" then "proper" else - if StringSet.mem subst_pronoun_lexemes lemma then "pronoun" else - "common" in - let nsem = - if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then [Common "count"] else - if StringSet.mem subst_time_lexemes lemma then [Time] else - let l = ["count"] in - let l = if StringSet.mem subst_uncountable_lexemes lemma || StringSet.mem subst_uncountable_lexemes2 lemma then "mass" :: l else l in - let l = if StringSet.mem subst_container_lexemes lemma then "measure" :: l else l in - Xlist.map l (fun s -> Common s) in - nsyn,nsem - -let adj_type lemma = (* FIXME: typy przymiotników wymagają zbadania - przejrzenia listy przymiotników *) - let adjsyn = if StringSet.mem adj_pronoun_lexemes lemma then "pronoun" else "common" in (* FIXME: dodać heurystykę uwzględniającą wielkość liter aby wykrywać proper np. Oświęcimski*) - adjsyn - -let transform_frame lexeme pos = function (* FIXME: dodać tutaj typy rzeczowników *) - Frame(DefaultAtrs(meanings,refl,opinion,negation,pred,aspect),schema) as frame -> - if pos = "subst" || pos = "depr" || pos = "psubst" || pos = "pdepr" || pos = "ppron12" || pos = "ppron3" || pos = "siebie" then ( - if refl <> ReflEmpty || negation <> NegationNA || pred <> PredNA || aspect <> AspectNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); - let nsyn,nsem(*,typ*) = noun_type lexeme pos in - let schema = if nsyn = "pronoun" then [] else (remove_adj_agr schema) @ noun_adjuncts in (* FIXME: remove_adj_agr jest w słowniku tymczasowo *) -(* List.flatten (Xlist.map typ (fun typ -> *) - Xlist.map nsem (fun nsem -> Frame(NounAtrs(meanings,nsyn,nsem(*,typ*)),transform_np_schema schema)))(* ))*) else - if pos = "symbol" || pos = "date" || pos = "date-interval" || pos = "hour" || pos = "hour-minute" || pos = "hour-interval" || pos = "hour-minute-interval" || - pos = "year" || pos = "year-interval" || pos = "day" || pos = "day-interval" || pos = "day-month" || pos = "day-month-interval" || - pos = "match-result" || pos = "month-interval" || pos = "roman" || pos = "roman-interval" || pos = "url" || pos = "email" || pos = "obj-id" then - let nsyn,nsem = "proper",[Common "count"] in - Xlist.map nsem (fun nsem -> Frame(NounAtrs(meanings,nsyn,nsem),transform_np_schema schema)) else - if pos = "adj" || pos = "adjc" || pos = "adjp" || pos = "ordnum" then ( - if refl <> ReflEmpty || negation <> NegationNA || aspect <> AspectNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); - let adjsyn(*,adjsem,typ*) = adj_type lexeme in - let schema = if pos = "adjp" || pos = "ordnum" then schema else if adjsyn = "pronoun" then [] else schema @ adj_adjuncts in - let case = match pred with Pred -> Case "pred" | PredNA -> CaseUndef in -(* Xlist.map typ (fun typ -> *) - [Frame(AdjAtrs(meanings,case,adjsyn(*,adjsem,typ*)),transform_adj_schema schema)])(* )*) else - if pos = "adv" then ( - if refl <> ReflEmpty || negation <> NegationNA || pred <> PredNA || aspect <> AspectNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); (* FIXME: typy przysłówków *) - [Frame(EmptyAtrs meanings,transform_adv_schema (remove_adj_agr schema))]) else - if pos = "fin" then ( - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in - let schema = (add_padv schema) @ verb_adjuncts in - List.flatten (Xlist.map (expand_negation negation) (fun negation -> - Xlist.map (expand_aspect aspect) (function - Aspect "imperf" -> Frame(PersAtrs(meanings,s,negation,"indicative","pres",NoAux,Aspect "imperf"), transform_pers_schema negation "indicative" schema) - | Aspect "perf" -> Frame(PersAtrs(meanings,s,negation,"indicative","fut",NoAux,Aspect "perf"), transform_pers_schema negation "indicative" schema) - | _ -> failwith "transform_frame") @ - [Frame(PersAtrs(meanings,s,negation,"imperative","fut",ImpAux,aspect), transform_pers_schema negation "imperative" schema)]))) else - if pos = "bedzie" then ( - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in - let schema = (add_padv schema) @ verb_adjuncts in - List.flatten (Xlist.map (expand_negation negation) (fun negation -> - Xlist.map (expand_aspect aspect) (function - Aspect "imperf" -> Frame(PersAtrs(meanings,s,negation,"indicative","fut",NoAux,Aspect "imperf"), transform_pers_schema negation "indicative" schema) - | Aspect "perf" -> Frame(PersAtrs(meanings,s,negation,"indicative","fut",NoAux,Aspect "perf"), transform_pers_schema negation "indicative" schema) (* FIXME: niepotrzebne *) - | _ -> failwith "transform_frame")))) else - if pos = "praet" then ( - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in - let schema = (add_padv schema) @ verb_adjuncts in - List.flatten (Xlist.map (expand_negation negation) (fun negation -> - List.flatten (Xlist.map (expand_aspect aspect) (function - Aspect "imperf" -> - [Frame(PersAtrs(meanings,s,negation,"indicative","past",NoAux,Aspect "imperf"), transform_pers_schema negation "indicative" schema); - Frame(PersAtrs(meanings,s,negation,"conditional","past",NoAux,Aspect "imperf"), transform_pers_schema negation "conditional" schema); - Frame(PersAtrs(meanings,s,negation,"indicative","fut",FutAux,Aspect "imperf"), transform_pers_schema negation "indicative" schema)] - | Aspect "perf" -> - [Frame(PersAtrs(meanings,s,negation,"indicative","past",NoAux,Aspect "perf"), transform_pers_schema negation "indicative" schema); - Frame(PersAtrs(meanings,s,negation,"conditional","past",NoAux,Aspect "perf"), transform_pers_schema negation "conditional" schema)] - | _ -> failwith "transform_frame"))))) else - if pos = "winien" then ( - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in - let schema = (add_padv schema) @ verb_adjuncts in - List.flatten (Xlist.map (expand_negation negation) (fun negation -> - List.flatten (Xlist.map (expand_aspect aspect) (fun aspect -> - [Frame(PersAtrs(meanings,s,negation,"indicative","pres",NoAux,aspect), transform_pers_schema negation "indicative" schema); - Frame(PersAtrs(meanings,s,negation,"conditional","past",NoAux,aspect), transform_pers_schema negation "conditional" schema); - Frame(PersAtrs(meanings,s,negation,"indicative","past",PastAux,aspect), transform_pers_schema negation "indicative" schema)]))))) else - if pos = "impt" then ( - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in - let schema = (add_padv schema) @ verb_adjuncts in - Xlist.map (expand_negation negation) (fun negation -> - Frame(PersAtrs(meanings,s,negation,"imperative","fut",NoAux,aspect),transform_impt_schema negation "imperative" schema))) else - if pos = "imps" then ( - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in - let schema = (add_padv schema) @ verb_adjuncts in - Xlist.map (expand_negation negation) (fun negation -> - Frame(PersAtrs(meanings,s,negation,"indicative","past",NoAux,aspect),transform_imps_schema negation "indicative" schema))) else - if pos = "pred" then ( - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in - let schema = (add_padv schema) @ verb_adjuncts in - List.flatten (Xlist.map (expand_negation negation) (fun negation -> - [Frame(PersAtrs(meanings,s,negation,"indicative","pres",NoAux,aspect), transform_pers_schema negation "indicative" schema); - Frame(PersAtrs(meanings,s,negation,"indicative","fut",FutAux,aspect), transform_pers_schema negation "indicative" schema); - Frame(PersAtrs(meanings,s,negation,"indicative","past",PastAux,aspect), transform_pers_schema negation "indicative" schema)]))) else - if pos = "pcon" || pos = "pant" || pos = "inf" then ( - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); - let role,role_attr = try get_role SUBJ schema with Not_found -> "Initiator","" in - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in - let schema = schema @ verb_adjuncts in - Xlist.map (expand_negation negation) (fun negation -> - Frame(NonPersAtrs(meanings,s,role,role_attr,negation,aspect),transform_padv_schema negation "indicative" true schema))) else - if pos = "pact" then ( - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); - try - let role,role_attr = try get_role SUBJ schema with Not_found -> "Initiator","" in - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in - let schema = schema @ verb_adjuncts in - Xlist.map (expand_negation negation) (fun negation -> - Frame(NonPersAtrs(meanings,s,role,role_attr,negation,aspect),transform_pact_schema negation "indicative" schema)) - with Not_found -> []) else - if pos = "ppas" then ( - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); - try - let role,role_attr = try get_role OBJ schema with Not_found -> "Theme","" in - let s,schema = if refl = ReflSie then raise Not_found else lexeme, schema in - let schema = schema @ verb_adjuncts in - Xlist.map (expand_negation negation) (fun negation -> - Frame(NonPersAtrs(meanings,s,role,role_attr,negation,aspect),transform_ppas_schema negation "indicative" schema)) - with Not_found -> []) else - if pos = "ger" then ( - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in (* FIXME: czy ger może mieć niesemantyczne się? *) - let schema = schema @ verb_adjuncts in - Xlist.map (expand_negation negation) (fun negation -> - Frame(GerAtrs(meanings,s,negation,aspect),transform_ger_schema negation schema))) else - failwith ("transform_frame: " ^ pos) - | LexFrame(id,pos,NoRestr,schema) as frame -> - (match pos with - SUBST _ -> [LexFrame(id,pos,NoRestr,transform_np_schema schema)] - | PREP _ -> [LexFrame(id,pos,NoRestr,transform_prep_schema schema)] - | NUM(c,g,AcmUndef) -> - Xlist.map [Acm "congr";Acm "rec"] (fun acm -> - LexFrame(id,NUM(c,g,acm),NoRestr,transform_num_schema acm schema)) - | ADJ(n,c,g,gr) -> [LexFrame(id,pos,NoRestr,transform_adj_schema schema)] - | ADV(gr) -> [LexFrame(id,pos,NoRestr,transform_adv_schema schema)] - | GER(n,c,g,a,negation,ReflEmpty) -> - Xlist.map (expand_negation negation) (fun negation -> - LexFrame(id,GER(n,c,g,a,negation,ReflEmpty),NoRestr,transform_ger_schema negation schema)) - | PACT(n,c,g,a,negation,ReflEmpty) -> - Xlist.map (expand_negation negation) (fun negation -> - LexFrame(id,PACT(n,c,g,a,negation,ReflEmpty),NoRestr,transform_pact_schema negation "indicative" schema)) - | PPAS(n,c,g,a,negation) -> - Xlist.map (expand_negation negation) (fun negation -> - LexFrame(id,PPAS(n,c,g,a,negation),NoRestr,transform_ppas_schema negation "indicative" schema)) - | INF(a,negation,r) -> - Xlist.map (expand_negation negation) (fun negation -> - LexFrame(id,INF(a,negation,r),NoRestr,transform_padv_schema negation "indicative" false schema)) - | QUB -> [LexFrame(id,pos,NoRestr,transform_qub_schema schema)] - | COMPAR -> [LexFrame(id,pos,NoRestr,transform_compar_schema schema)] - | COMP _ -> [LexFrame(id,pos,NoRestr,transform_comp_schema schema)] - | PERS(negation,r) -> - Xlist.map (expand_negation negation) (fun negation -> - LexFrame(id,PERS(negation,r),NoRestr,transform_pers_schema negation "indicative" schema)) - | _ -> failwith ("transform_frame:" ^ ENIAMwalStringOf.frame lexeme frame)) - | ComprepFrame(s,pos,NoRestr,schema) as frame -> - (match pos with - PREP _ -> [ComprepFrame(s,pos,NoRestr,transform_prep_schema schema)] - | ADV _ -> [ComprepFrame(s,pos,NoRestr,transform_adv_schema schema)] - | _ -> failwith ("transform_frame:" ^ ENIAMwalStringOf.frame lexeme frame)) - | frame -> failwith ("transform_frame:" ^ ENIAMwalStringOf.frame lexeme frame) - -let reduce_frame_negation lexemes = function - Negation -> StringMap.mem lexemes "nie" - | _ -> true - -let reduce_frame_mood lexemes = function - "conditional" -> StringMap.mem lexemes "by" - | _ -> true - -let reduce_frame_aux lexemes = function - NoAux -> true - | PastAux -> (try let poss = StringMap.find lexemes "być" in StringSet.mem poss "praet" with Not_found -> false) - | FutAux -> (try let poss = StringMap.find lexemes "być" in StringSet.mem poss "bedzie" with Not_found -> false) - | ImpAux -> StringMap.mem lexemes "niech" || StringMap.mem lexemes "niechaj" || StringMap.mem lexemes "niechże" || StringMap.mem lexemes "niechajże" - -let reduce_frame_atrs pos lexemes = function - Frame(NounAtrs _,_) -> true - | Frame(AdjAtrs _,_) -> true - | Frame(EmptyAtrs _,_) -> true - | Frame(PersAtrs(_,_,negation,mood,_,aux,_),_) -> reduce_frame_negation lexemes negation && reduce_frame_mood lexemes mood && reduce_frame_aux lexemes aux - | Frame(NonPersAtrs(_,_,_,_,negation,_),_) -> if pos = "pact" || pos = "ppas" then true else reduce_frame_negation lexemes negation - | Frame(GerAtrs(_,_,negation,_),_) -> reduce_frame_negation lexemes negation - | Frame(_,_) as frame -> failwith ("reduce_frame_atrs: " ^ ENIAMwalStringOf.frame "" frame) - | LexFrame _ -> true - | ComprepFrame _ -> true - -let rec reduce_frame_atrs_list pos lexemes = function - [] -> [] - | frame :: l -> (if reduce_frame_atrs pos lexemes frame then [frame] else []) @ reduce_frame_atrs_list pos lexemes l - -let find_frames lexemes = -(* print_endline "find_frames 1"; *) - let valence = StringMap.fold lexemes StringMap.empty (fun valence lexeme poss -> -(* let poss = StringSet.fold poss StringSet.empty (fun poss pos -> StringSet.add poss (simplify_pos pos)) in *) -(* Printf.printf "find_frame: %s |%s|\n" s (String.concat " " (StringSet.to_list lexemes)); *) - StringSet.fold poss valence (fun valence pos -> - let valence = - let frames_sem = try StringMap.find (StringMap.find walenty (simplify_pos pos)) lexeme with Not_found -> [] in -(* if frames_sem <> [] then Printf.printf "%s %s in TEI\n%!" lexeme pos; *) - if frames_sem <> [] then - Xlist.fold frames_sem valence (fun valence frame -> - convert_frame_sem expands subtypes equivs lexemes valence lexeme pos frame) - else - let frames = match simplify_pos pos with - "verb" -> ((*try StringMap.find verb_frames lexeme with Not_found ->*) ["verb","","","","",""]) - | "noun" -> ((*try StringMap.find noun_frames lexeme with Not_found ->*) if StringSet.mem empty_valence_lexemes lexeme then ["empty","","","","",""] else ["noun","","","","",""]) - | "adj" -> ((*try StringMap.find adj_frames lexeme with Not_found ->*) if StringSet.mem empty_valence_lexemes lexeme then ["empty","","","","",""] else ["adj","","","","",""]) - | "adv" -> ((*try StringMap.find adv_frames lexeme with Not_found ->*) ["adv","","","","",""]) - | "pron" -> ["empty","","","","",""] - | "adjp" -> ["empty","","","","",""] - | "ordnum" -> ["empty","","","","",""] - | "symbol" -> ["empty","","","","",""] - | "date" -> ["date","","","","",""] - | "date-interval" -> ["empty","","","","",""] - | "hour" -> ["hour","","","","",""] - | "hour-minute" -> ["hour","","","","",""] - | "hour-interval" -> ["empty","","","","",""] - | "hour-minute-interval" -> ["empty","","","","",""] - | "year" -> ["empty","","","","",""] - | "year-interval" -> ["empty","","","","",""] - | "day" -> ["day","","","","",""] - | "day-interval" -> ["day","","","","",""] - | "day-month" -> ["date2","","","","",""] - | "day-month-interval" -> ["empty","","","","",""] - | "match-result" -> ["empty","","","","",""] - | "month-interval" -> ["empty","","","","",""] - | "roman" -> ["empty","","","","",""] - | "roman-interval" -> ["empty","","","","",""] - | "url" -> ["empty","","","","",""] - | "email" -> ["empty","","","","",""] - | "obj-id" -> ["empty","","","","",""] - | _ -> [] in -(* if frames = [] then valence else - Printf.printf "find_frame: %s |l|=%d\n" s (Xlist.size l); *) - Xlist.fold frames valence (fun valence frame -> - convert_frame expands subtypes equivs lexemes valence lexeme pos frame) in - Xlist.fold ((*try StringMap.find compreps lexeme with Not_found ->*) []) valence (fun valence (cpos,frame) -> (* FIXME: na razie przyimki złożone są wyłączone *) - if cpos = pos then convert_comprep_frame expands subtypes equivs lexemes valence lexeme pos frame else valence))) in -(* print_endline "find_frames 2"; *) - let valence = StringMap.mapi valence (fun lexeme poss -> - StringMap.mapi poss (fun pos frames -> - List.flatten (Xlist.map frames (fun frame -> -(* print_endline ("find_frames: " ^ ENIAMwalStringOf.frame lexeme frame); *) - expand_restr valence lexeme pos frame)))) in -(* print_endline "find_frames 3"; *) - let valence = StringMap.mapi valence (fun lexeme poss -> - StringMap.mapi poss (fun pos frames -> - reduce_frame_atrs_list pos lexemes (List.flatten (Xlist.map frames (transform_frame lexeme pos))))) in -(* let valence = StringMap.mapi valence (fun lexeme poss -> - StringMap.mapi poss (fun pos frames -> - Xlist.map frames (assign_thematic_role pos))) in*) -(* StringMap.iter valence (fun lexeme poss -> - StringMap.iter poss (fun pos frames -> - Xlist.iter frames (fun frame -> print_endline (ENIAMwalStringOf.frame lexeme frame))));*) -(* print_endline "find_frames 4"; *) - valence (*let _ = let valence = Xlist.fold all_frames StringMap.empty (fun valence (pos,frame_map) -> diff --git a/lexSemantics/ENIAMwalParser.ml b/lexSemantics/ENIAMwalParser.ml index 0c51aff..4e812d7 100644 --- a/lexSemantics/ENIAMwalParser.ml +++ b/lexSemantics/ENIAMwalParser.ml @@ -1,7 +1,7 @@ (* - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty". - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences * * This library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -156,7 +156,7 @@ let parse_negation = function (* | [Text ""] -> NegationNA *) | l -> failwith ("parse_negation: " ^ string_of_token_list l) -let parse_refl = function +(* let parse_refl = function (* [] -> ReflEmpty | [Text "się"] -> ReflSie | [Text ""] -> ReflEmpty @@ -164,7 +164,7 @@ let parse_refl = function | [Text "true"] -> ReflSie *) | [Text "nosię"] -> ReflFalse | [Text "się"] -> ReflTrue - | l -> failwith ("parse_refl: " ^ string_of_token_list l) + | l -> failwith ("parse_refl: " ^ string_of_token_list l) *) let parse_ctype = function [Text "int"] -> Int @@ -172,11 +172,11 @@ let parse_ctype = function | [Text "_"] -> CompTypeUndef | l -> failwith ("parse_ctype: " ^ string_of_token_list l) -let parse_acm = function +(* let parse_acm = function (* [Text "int"] -> Int | [Text "rel"] -> Rel *) | [Text "_"] -> AcmUndef - | l -> failwith ("parse_acm: " ^ string_of_token_list l) + | l -> failwith ("parse_acm: " ^ string_of_token_list l) *) let parse_comp = function | [Text "co"] -> Comp "co" (* subst qub prep comp *) @@ -234,17 +234,17 @@ let parse_pos = function | "PPRON3",[number;case] -> PPRON3(parse_number number,parse_case case) | "SIEBIE",[case] -> SIEBIE(parse_case case) | "PREP",[case] -> PREP(parse_case case) - | "NUM",[case;gender;acm] -> NUM(parse_case case,parse_gender gender,parse_acm acm) + | "NUM",[case;gender] -> NUM(parse_case case,parse_gender gender) | "ADJ",[number;case;gender;grad] -> ADJ(parse_number number,parse_case case,parse_gender gender,parse_grad grad) | "ADV",[grad] -> ADV(parse_grad grad) - | "GER",[number;case;gender;aspect;negation;refl] -> GER(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation,parse_refl refl) + | "GER",[number;case;gender;aspect;negation] -> GER(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation) | "PPAS",[number;case;gender;aspect;negation] -> PPAS(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation) - | "PACT",[number;case;gender;aspect;negation;refl] -> PACT(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation,parse_refl refl) - | "INF",[aspect;negation;refl] -> INF(parse_aspect aspect,parse_negation negation,parse_refl refl) + | "PACT",[number;case;gender;aspect;negation] -> PACT(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation) + | "INF",[aspect;negation] -> INF(parse_aspect aspect,parse_negation negation) | "QUB",[] -> QUB - | "COMPAR",[] -> COMPAR + | "COMPAR",[] -> COMPAR Str | "COMP",[ctype] -> COMP(parse_ctype ctype) - | "PERS",[negation;refl] -> PERS(parse_negation negation,parse_refl refl) + | "PERS",[negation] -> PERS(parse_negation negation) | s,ll -> failwith ("parse_pos: " ^ s ^ "(" ^ String.concat "," (Xlist.map ll string_of_token_list) ^ ")") let rec parse_phrase = function @@ -253,7 +253,7 @@ let rec parse_phrase = function | "adjp",[case] -> AdjP(parse_case case) | "prepadjp",[[Text prep]; case] -> PrepAdjP(prep,parse_case case) | "comprepnp",[[Text prep]] -> ComprepNP prep - | "comparp",[[Text prep]] -> ComparP prep + | "comparp",[[Text prep]] -> ComparP(prep,Str) | "cp",[ctype;comp] -> CP(parse_ctype ctype,parse_comp comp) | "ncp",[case;ctype;comp] -> NCP(parse_case case,parse_ctype ctype,parse_comp comp) | "prepncp",[[Text prep];case;ctype;comp] -> PrepNCP(prep,parse_case case,parse_ctype ctype,parse_comp comp) diff --git a/lexSemantics/ENIAMwalReduce.ml b/lexSemantics/ENIAMwalReduce.ml index ada6e48..73b593a 100644 --- a/lexSemantics/ENIAMwalReduce.ml +++ b/lexSemantics/ENIAMwalReduce.ml @@ -1,7 +1,7 @@ (* - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty". - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences * * This library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -24,7 +24,7 @@ let create_phrase_reqs s (reqs,noreqs) = function | PrepNP(prep,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | PrepAdjP(prep,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | PrepNCP(prep,_,_,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs - | ComparP(prep) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs + | ComparP(prep,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | FixedP(prep) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | SimpleLexArg(lex,_) -> StringMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs | LexArg(_,lex,_) -> StringMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs @@ -35,7 +35,7 @@ let create_phrase_reqs2 s (reqs,noreqs) = function | PrepNP(prep,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | PrepAdjP(prep,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | PrepNCP(prep,_,_,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs - | ComparP(prep) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs + | ComparP(prep,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | FixedP(prep) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | SimpleLexArg(lex,_) -> IntMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs | LexArg(_,lex,_) -> IntMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs @@ -105,7 +105,7 @@ let reduce_phrase (test_comprep_reqs,test_comprep_reqs2,test_lexarg_reqs,test_le | PrepNP(prep,case) as phrase -> if test_lexemes prep then phrase else raise Not_found | PrepAdjP(prep,case) as phrase -> if test_lexemes prep then phrase else raise Not_found | ComprepNP(prep) as phrase -> if test_comprep_reqs prep && test_comprep_reqs2 prep then phrase else raise Not_found - | ComparP(prep) as phrase -> if test_lexemes prep then phrase else raise Not_found + | ComparP(prep,case) as phrase -> if test_lexemes prep then phrase else raise Not_found | CP(ctype,comp) -> CP(ctype,reduce_comp test_lexemes comp) | NCP(case,ctype,comp) -> if test_lexemes "to" then NCP(case,ctype,reduce_comp test_lexemes comp) else raise Not_found | PrepNCP(prep,case,ctype,comp) -> if test_lexemes prep && test_lexemes "to" then PrepNCP(prep,case,ctype,reduce_comp test_lexemes comp) else raise Not_found @@ -204,7 +204,10 @@ let entries,schemata,connected = - usunięcie adjunctów - uwzględnienie cech morfoskładniowych - scalenie schematów - - dodanie adjunctów + - dodanie adjunctów - pamiętać o padvp +*) +(* TODO + - leksykalizacje bez schema *) (* let _ = diff --git a/lexSemantics/ENIAMwalStringOf.ml b/lexSemantics/ENIAMwalStringOf.ml index 273fa9d..8167768 100644 --- a/lexSemantics/ENIAMwalStringOf.ml +++ b/lexSemantics/ENIAMwalStringOf.ml @@ -1,5 +1,5 @@ (* - * ENIAMwalenty, a converter for Polish Valence Dictionary "Walenty". + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences * @@ -55,11 +55,11 @@ let case = function | Part -> "part" | CaseAgr -> "agr" (* | CaseUAgr -> "uagr" - | NomAgr -> "nomagr" - | GenAgr -> "genagr" - | AllAgr -> "allagr" | AllUAgr -> "alluagr" *) | CaseUndef -> "_" + | AllAgr -> "allagr" + | NomAgr -> "nomagr" + | GenAgr -> "genagr" let rec comp = function Comp s -> s @@ -87,15 +87,15 @@ let grad = function Grad s -> s | GradUndef -> "_" -let refl = function +(* let refl = function (* ReflEmpty -> "" *) | ReflTrue -> "się" | ReflFalse -> "nosię" - | ReflUndef -> "_" + | ReflUndef -> "_" *) -let acm = function +(* let acm = function Acm s -> s - | AcmUndef -> "_" + | AcmUndef -> "_" *) let gf = function SUBJ -> "subj" @@ -108,17 +108,17 @@ let pos = function | PPRON3(n,c) -> "PPRON3(" ^ number n ^ "," ^ case c ^ ")" | SIEBIE(c) -> "SIEBIE(" ^ case c ^ ")" | PREP(c) -> "PREP(" ^ case c ^ ")" - | NUM(c,g,a) -> "NUM(" ^ case c ^ "," ^ gender g ^ "," ^ acm a ^ ")" + | NUM(c,g) -> "NUM(" ^ case c ^ "," ^ gender g ^ ")" | ADJ(n,c,g,gr) -> "ADJ(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ grad gr ^ ")" | ADV(gr) -> "ADV(" ^ grad gr ^ ")" - | GER(n,c,g,a,neg,r) -> "GER(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ "," ^ refl r ^ ")" - | PACT(n,c,g,a,neg,r) -> "PACT(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ "," ^ refl r ^ ")" + | GER(n,c,g,a,neg) -> "GER(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ ")" + | PACT(n,c,g,a,neg) -> "PACT(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ ")" | PPAS(n,c,g,a,neg) -> "PPAS(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ ")" - | INF(a,n,r) -> "INF(" ^ aspect a ^ "," ^ negation n ^ "," ^ refl r ^ ")" + | INF(a,n) -> "INF(" ^ aspect a ^ "," ^ negation n ^ ")" | QUB -> "QUB" - | COMPAR -> "COMPAR" + | COMPAR c -> "COMPAR(" ^ case c ^ ")" | COMP(c) -> "COMP(" ^ comp_type c ^ ")" - | PERS(n,r) -> "PERS(" ^ negation n ^ "," ^ refl r ^ ")" + | PERS(n) -> "PERS(" ^ negation n ^ ")" | FIXED -> "FIXED" let rec phrase = function @@ -129,7 +129,7 @@ let rec phrase = function (* | NumP(c) -> "nump(" ^ case c ^ ")" | PrepNumP(prep,c) -> "prepnump(" ^ prep ^ "," ^ case c ^ ")" *) | ComprepNP(prep) -> "comprepnp(" ^ prep ^ ")" - | ComparP(prep) -> "comparp(" ^ prep ^ ")" + | ComparP(prep,c) -> "comparp(" ^ prep ^ "," ^ case c ^ ")" | CP(ct,co) -> "cp(" ^ comp_type ct ^ "," ^ comp co ^ ")" | NCP(c,ct,co) -> "ncp(" ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")" | PrepNCP(prep,c,ct,co) -> "prepncp(" ^ prep ^ "," ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")" diff --git a/lexSemantics/ENIAMwalTypes.ml b/lexSemantics/ENIAMwalTypes.ml index b8ed94e..357addb 100644 --- a/lexSemantics/ENIAMwalTypes.ml +++ b/lexSemantics/ENIAMwalTypes.ml @@ -1,7 +1,7 @@ (* - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty". - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences * * This library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -24,14 +24,14 @@ type opinion = Pewny | Potoczny | Watpliwy | Archaiczny | Zly | Wulgarny | Nieok type negation = Negation | Aff | NegationUndef (*| NegationNA*) type pred = PredTrue | PredFalse | PredUndef (*| PredNA*) type aspect = Aspect of string | AspectUndef (*| AspectNA*) -type case = Case of string | Str | Part | CaseAgr (*| NomAgr | GenAgr | AllAgr*) | CaseUndef (*| AllUAgr | CaseUAgr*) +type case = Case of string | Str | Part | CaseAgr | CaseUndef (*| AllUAgr | CaseUAgr*) | GenAgr | NomAgr | AllAgr type comp = Comp of string | Zeby | Gdy | CompUndef type comp_type = Int | Rel | CompTypeUndef (*| CompTypeAgr*) type number = Number of string | NumberUndef | NumberAgr type gender = Gender of string | GenderUndef | GenderAgr | Genders of string list type grad = Grad of string | GradUndef -type refl = (*ReflEmpty |*) ReflTrue | ReflFalse | ReflUndef -type acm = Acm of string | AcmUndef +(* type refl = (*ReflEmpty |*) ReflTrue | ReflFalse | ReflUndef *) +(* type acm = Acm of string | AcmUndef *) (*type mood = (*Mood of*) string (*| MoodUndef*) type tense = string @@ -47,17 +47,17 @@ type pos = | PPRON3 of number * case | SIEBIE of case | PREP of case - | NUM of case * gender * acm + | NUM of case * gender | ADJ of number * case * gender * grad | ADV of grad - | GER of number * case * gender * aspect * negation * refl - | PACT of number * case * gender * aspect * negation * refl + | GER of number * case * gender * aspect * negation + | PACT of number * case * gender * aspect * negation | PPAS of number * case * gender * aspect * negation - | INF of aspect * negation * refl + | INF of aspect * negation | QUB - | COMPAR + | COMPAR of case | COMP of comp_type - | PERS of (*number * gender * aspect * person * *)negation * refl + | PERS of (*number * gender * aspect * person * *)negation | FIXED type phrase = @@ -68,7 +68,7 @@ type phrase = (* | NumP of case | PrepNumP of string * case *) | ComprepNP of string - | ComparP of string (** case*) + | ComparP of string * case | CP of comp_type * comp | NCP of case * comp_type * comp | PrepNCP of string * case * comp_type * comp @@ -128,8 +128,8 @@ let empty_meaning = {mng_id = (-1); | GerAtrs of meaning list * string * negation * aspect | NonPersAtrs of meaning list * string * string * string * negation * aspect *) -type schema = {sch_id: int; opinion: opinion; reflexiveMark: refl; aspect: aspect; - negativity: negation; predicativity: pred; positions: position list; text_rep: string} +(* type schema = {sch_id: int; opinion: opinion; reflexiveMark: refl; aspect: aspect; + negativity: negation; predicativity: pred; positions: position list; text_rep: string} *) type lex_entry = (* Frame of frame_atrs * position list *) diff --git a/lexSemantics/ENIAMwalenty.ml b/lexSemantics/ENIAMwalenty.ml index 91fdf70..2cb437c 100644 --- a/lexSemantics/ENIAMwalenty.ml +++ b/lexSemantics/ENIAMwalenty.ml @@ -1,7 +1,7 @@ (* - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty". - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences * * This library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by diff --git a/lexSemantics/README b/lexSemantics/README index 9678665..eaf4ecd 100644 --- a/lexSemantics/README +++ b/lexSemantics/README @@ -2,8 +2,7 @@ ENIAMsemValence Version 1.0 : ----------------------- ENIAMsemValence is a library that assigns tokens with lexicosemantic information. -It recognizes named entities and assigns thematic roles, -senses, valence and other semantic information to tokens. +It assigns thematic roles, word senses, valence and other semantic information to tokens. Install ------- diff --git a/lexSemantics/TODO b/lexSemantics/TODO index 675bdb8..9a7e01a 100644 --- a/lexSemantics/TODO +++ b/lexSemantics/TODO @@ -3,3 +3,56 @@ - sprawdzić czy walencja nazw własnych jest dobrze zrobiona. - trzeba zrobić słownik nazw własnych - trzeba poprawić selekcję preferencji selecyjnych: jeśli podrzędnikiem jest zaimek nie muszą jawnie występować wśród sensów. + +- błędy w realizacjach +xp(abl[prepadjp(z,gen)] +na korzyść - na niekorzyść +xp(mod[prepnp(jako,str)]) -> xp(mod[compar(jako)]) +xp(mod[prepadjp(jako,str)]) -> xp(mod[compar(jako)]) +na sposób - zgłoszone + +cyrk advp(misc) [54480] +banalnie - pred +subst woda lex(880,woda,subst) {lex(święcony,ADJ(agr,gen,agr,pos))} + +uciąć: pewny: _: : perf: subj{np(str)} + obj{lex(np(str),sg,'gałąź',ratr1({lex(cp(rel[który]),aff,'siedzieć',,ratr(subj{np(str)}+{lex(adjp(agr),agr,agr,pos,'sam',natr)}+{lex(prepadjp(na,loc),sg,f,pos,'który',natr)}))}))} + +np(str) -> adjp(agr) +przychodzić: pewny: _: : imperf: subj{lex(np(str),sg,'godzina',ratr1({lex(np(str),agr,'zły',natr)}))} + {prepnp(na,acc)} + +Czym jest podmiot podniesiony w poniższych ramach i jak się zachowuje w stronie biernej? +obladzać: pewny: _: : imperf: subj{E} + obj{np(str)} + {np(dat)} +oblodzić: pewny: _: : perf: subj{E} + obj{np(str)} + {np(dat)} + +Czy 'zły' nie powinien być tu przymiotnikiem? +przychodzić: pewny: _: : imperf: subj{lex(np(str),sg,'godzina',ratr1({lex(np(str),agr,'zły',natr)}))} + {prepnp(na,acc)} +przyjść: pewny: _: : perf: subj{lex(np(str),sg,'godzina',ratr1({lex(np(str),agr,'zły',natr)}))} + {prepnp(na,acc)} + +Czy zamiast adjp(str) -> adjp(agr) +kreślić: pewny: _: : imperf: subj{np(str)} + obj{np(str)} + {lex(np(inst),pl,XOR('barwa','kolor'),ratr({adjp(str)}))} +malować: pewny: _: : imperf: subj{np(str)} + obj{np(str)} + {lex(np(inst),pl,XOR('barwa','kolor'),ratr({adjp(str)}))} +namalować: pewny: _: : perf: subj{np(str)} + obj{np(str)} + {lex(np(inst),pl,XOR('barwa','kolor'),ratr({adjp(str)}))} +odmalować: pewny: _: : perf: subj{np(str)} + obj{np(str)} + {lex(np(inst),pl,XOR('barwa','kolor'),ratr({adjp(str)}))} +odmalowywać: pewny: _: : imperf: subj{np(str)} + obj{np(str)} + {lex(np(inst),pl,XOR('barwa','kolor'),ratr({adjp(str)}))} +zajrzeć: pewny: _: : perf: subj{lex(np(str),sg,XOR('bieda','głód','nędza','śmierć'),atr({adjp(str)}))} + {np(dat)} + {lex(prepnp(w,acc),pl,'oko',natr)} +chwycić: pewny: _: : perf: subj{np(str)} + {lex(np(inst),pl,'usta',atr({adjp(str)}))} + {lex(np(str),sg,'powietrze',atr({adjp(agr)}))} +chwytać: pewny: _: : imperf: subj{np(str)} + {lex(np(inst),pl,'usta',atr({adjp(str)}))} + {lex(np(str),sg,'powietrze',atr({adjp(agr)}))} +łapać: pewny: _: : imperf: subj{np(str)} + obj{lex(np(str),sg,'powietrze',atr({adjp(agr)}))} + {lex(np(inst),pl,'usta',atr({adjp(str)}))} +złapać: pewny: _: : perf: subj{np(str)} + obj{lex(np(str),sg,'powietrze',atr({adjp(agr)}))} + {lex(np(inst),pl,'usta',atr({adjp(str)}))} + + +Jak się zachowuje podmiot zdaniowy w stronie biernej? - o to już pytałem +podciąć: pewny: _: : perf: subj{cp(gdy)} + obj{lex(np(str),pl,'skrzydło',natr)} + {np(dat)} +podciąć: pewny: _: : perf: subj{cp(jak)} + obj{lex(np(str),pl,'skrzydło',natr)} + {np(dat)} +podciąć: pewny: _: : perf: subj{cp(kiedy)} + obj{lex(np(str),pl,'skrzydło',natr)} + {np(dat)} +podcinać: pewny: _: : imperf: subj{cp(gdy)} + obj{lex(np(str),pl,'skrzydło',natr)} + {np(dat)} +podcinać: pewny: _: : imperf: subj{cp(jak)} + obj{lex(np(str),pl,'skrzydło',natr)} + {np(dat)} +podcinać: pewny: _: : imperf: subj{cp(kiedy)} + obj{lex(np(str),pl,'skrzydło',natr)} + {np(dat)} +sprawiać: pewny: _: : imperf: subj{np(str);cp(że);ncp(str,że)} + obj{np(str)} + {np(dat)} +sprawić: pewny: _: : perf: subj{np(str);cp(że);ncp(str,że)} + obj{np(str)} + {np(dat)} +zaskakiwać: pewny: _: : imperf: subj{np(str);cp(int);cp(że);ncp(str,int);ncp(str,że)} + obj{np(str)} +zaskoczyć: pewny: _: : perf: subj{np(str);cp(int);cp(że);ncp(str,int);ncp(str,że)} + obj{np(str)} +zwracać: pewny: _: : imperf: subj{cp(int)} + obj{lex(np(str),sg,'uwaga',atr({possp}))} +zwracać: pewny: _: : imperf: subj{cp(że)} + obj{lex(np(str),sg,'uwaga',atr({possp}))} +zwrócić: pewny: _: : perf: subj{cp(int)} + obj{lex(np(str),sg,'uwaga',atr({possp}))} +zwrócić: pewny: _: : perf: subj{cp(że)} + obj{lex(np(str),sg,'uwaga',atr({possp}))} diff --git a/lexSemantics/entries.ml b/lexSemantics/entries.ml index 904e85a..5db5fdc 100644 --- a/lexSemantics/entries.ml +++ b/lexSemantics/entries.ml @@ -1,7 +1,7 @@ (* - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty". - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences * * This library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by diff --git a/lexSemantics/makefile b/lexSemantics/makefile index f2c6d89..6070066 100644 --- a/lexSemantics/makefile +++ b/lexSemantics/makefile @@ -3,7 +3,7 @@ OCAMLOPT=ocamlopt OCAMLDEP=ocamldep INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam OCAMLFLAGS=$(INCLUDES) -g -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa #eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-plWordnet.cmxa eniam-lcg-parser.cmxa #eniam-lcg-grammar-pl.cmxa #eniam-lexSemantics.cmxa +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa #eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-plWordnet.cmxa #eniam-lexSemantics.cmxa INSTALLDIR=`ocamlc -where`/eniam SOURCES= ENIAMlexSemanticsTypes.ml ENIAMcategories.ml ENIAMlexSemanticsData.ml ENIAMlexSemantics.ml @@ -28,8 +28,8 @@ eniam-lexSemantics.cmxa: $(SOURCES) # test: test.ml # $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml -test: entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml test.ml - $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml test.ml +test: entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml test.ml + $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml test.ml .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx diff --git a/lexSemantics/test.ml b/lexSemantics/test.ml index 32289e7..5b541b9 100644 --- a/lexSemantics/test.ml +++ b/lexSemantics/test.ml @@ -1,7 +1,7 @@ (* * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences * * This library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by diff --git a/lexSemantics/test2.ml b/lexSemantics/test2.ml index e5a708b..a4e6f2f 100644 --- a/lexSemantics/test2.ml +++ b/lexSemantics/test2.ml @@ -1,7 +1,7 @@ (* - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty". - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences * * This library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by diff --git a/subsyntax/README b/subsyntax/README index b62a123..e64cb5f 100644 --- a/subsyntax/README +++ b/subsyntax/README @@ -4,6 +4,7 @@ ENIAMsubsyntax Version 1.1 : ENIAMsubsyntax is a library that - performs tokenization, lemmatization, part of speech tagging; - detects MWE and abbreviations; +- recognizes named entities; - splits text into sentences. Install diff --git a/walenty/ENIAMwalConnect.ml b/walenty/ENIAMwalConnect.ml index 2e37a5b..34d1989 100644 --- a/walenty/ENIAMwalConnect.ml +++ b/walenty/ENIAMwalConnect.ml @@ -72,7 +72,7 @@ let connect entry = let phrases = process_morfs position.morfs in let morfs = Xlist.fold phrase_ids [] (fun morfs phrase_id -> try IntMap.find phrases phrase_id :: morfs - with Not_found -> Printf.printf "%s\n%!" entry.form_orth;morfs) in + with Not_found -> if entry.form_orth <> "podobać" then Printf.printf "connect: %s\n%!" entry.form_orth;morfs) in {position with role=arg.role; role_attr=arg.role_attribute; sel_prefs=sel_prefs; morfs=List.rev morfs} :: conn_positions)) in (* let meanings = List.rev (Xlist.rev_map frame.meanings (fun id -> diff --git a/walenty/ENIAMwalGenerate.ml b/walenty/ENIAMwalGenerate.ml index e43d298..083fdcb 100644 --- a/walenty/ENIAMwalGenerate.ml +++ b/walenty/ENIAMwalGenerate.ml @@ -20,8 +20,19 @@ open ENIAMwalTypes open Xstd +let correct_walenty entry = + if entry.form_orth = "podobać" then + {entry with schemata=Xlist.map entry.schemata (fun s -> + {s with positions=Xlist.map s.positions (fun p -> + if p.gf=SUBJ then {p with morfs=List.flatten (Xlist.map p.morfs (function + MorfId 126 -> [] + | m -> [m]))} + else p)})} + else entry + let load_walenty walenty_filename expands_filename = let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in + let walenty = Xlist.rev_map walenty correct_walenty in let expands = ENIAMwalTEI.load_expands expands_filename in let meanings = Xlist.fold walenty IntMap.empty (fun meanings entry -> diff --git a/walenty/ENIAMwalLex.ml b/walenty/ENIAMwalLex.ml index c7c2578..33270ab 100644 --- a/walenty/ENIAMwalLex.ml +++ b/walenty/ENIAMwalLex.ml @@ -41,6 +41,39 @@ let rec split_elexeme = function genders,[ORcoord(List.rev l)] | Elexeme gender -> [gender],[] +let rec get_lexemes = function + Lexeme s -> [s] + | ORconcat l -> List.flatten (Xlist.map l get_lexemes) + | ORcoord l -> List.flatten (Xlist.map l get_lexemes) + | XOR l -> List.flatten (Xlist.map l get_lexemes) + | Elexeme gender -> failwith "get_lexemes" + +let rec remove_list set = function + [] -> [] + | s :: l -> if Xlist.mem set s then remove_list set l else s :: (remove_list set l) + +let rec check_lexemes_morfs l = function + LexPhrase(lexs,(_,schema)) -> + let l = Xlist.fold lexs l (fun l (_,lex) -> + remove_list (get_lexemes lex) l) in + check_lexemes_schema l schema + | _ -> l + +and check_lexemes_schema l schema = + Xlist.fold schema l (fun l s -> + Xlist.fold s.morfs l check_lexemes_morfs) + +let add_refl_restr (restr,schema) = + (match restr with + Natr -> Ratr + | Atr1 -> Atr + | Atr -> Atr + | Ratr1 -> Ratr + | Ratr -> Ratr + | Ratrs -> Ratrs + | NoRestr -> failwith "add_refl_restr"), + position [LexPhrase([QUB,Lexeme "się"],(Natr,[]))] :: schema + let rec expand_lexicalizations_schema schema = Xlist.map schema (fun s -> {s with morfs=expand_lexicalizations_morfs s.morfs}) @@ -51,15 +84,7 @@ and expand_lexicalizations_morfs morfs = (* uproszczenie polegające na zezwolen LexPhrase(pos_lex,(restr,schema)) -> LexPhrase(pos_lex,(restr,expand_lexicalizations_schema schema)) | morf -> morf in match morf with - (* LexPhrase([ADV _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf] *) - (* | LexPhrase([PREP _,_;SUBST _,_],(_,schema)) -> if remove_trivial_args schema <> [] then print_endline (ENIAMwalStringOf.morf morf); [morf] *) - (* | LexPhrase([PREP _,_;GER _,_],(_,schema)) -> if remove_trivial_args schema <> [] then print_endline (ENIAMwalStringOf.morf morf); [morf] *) - (* | LexPhrase([NUM _,_;_],(_,schema)) -> if remove_trivial_args schema <> [] then print_endline (ENIAMwalStringOf.morf morf); [morf] *) - (* | LexPhrase([PREP _,_;NUM _,_;_],(_,schema)) -> if remove_trivial_args schema <> [] then print_endline (ENIAMwalStringOf.morf morf); [morf] *) - (* | LexPhrase([PREP _,_;ADJ _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf] - | LexPhrase([PREP _,_;PPAS _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf] - | LexPhrase([PREP _,_;PACT _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf] *) - (* | Phrase(PrepNumP(prep,case)) -> [LexPhrase([PREP case,Lexeme prep],(Ratrs,[position(*2*) [Phrase(NumP(case))]]))] *) +(* | Phrase(PrepNumP(prep,case)) -> [LexPhrase([PREP case,Lexeme prep],(Ratrs,[position(*2*) [Phrase(NumP(case))]]))] *) | Phrase(PrepNumP(prep,case)) -> [Phrase(PrepNP(prep,case))] (* FIXME: celowe uproszczenie *) | LexPhrase([PREP pcase,plex;SUBST(n,c),slex],(Atr1,[{morfs=[LexPhrase([QUB,_],_)]} as s])) -> (* print_endline (ENIAMwalStringOf.morf morf); *) @@ -74,62 +99,30 @@ and expand_lexicalizations_morfs morfs = (* uproszczenie polegające na zezwolen [LexPhrase([PREP pcase,plex],(Ratrs,[position [LexPhrase([SUBST(n,c),slex],(Natr,[]))];s(*{s with dir=Backward}*)]))] | LexPhrase([PREP pcase,plex;pos,lex],restr) -> [LexPhrase([PREP pcase,plex],(Ratrs,[position [LexPhrase([pos,lex],restr)]]))] - | LexPhrase([PREP pcase,plex;NUM(c,g,a),nlex;pos,lex],restr) -> + | LexPhrase([PREP pcase,plex;NUM(c,g),nlex;pos,lex],restr) -> let genders,lexs = split_elexeme lex in Xlist.map genders (fun gender -> - LexPhrase([PREP pcase,plex],(Ratrs,[position [LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[(*num*)position [Phrase Null(*Pro*)]]))]]))) @ (*FIXME*) + LexPhrase([PREP pcase,plex],(Ratrs,[position [LexPhrase([NUM(c,gender),nlex],(Ratrs,[(*num*)position [Phrase Null(*Pro*)]]))]]))) @ (*FIXME*) Xlist.map lexs (fun lex -> - LexPhrase([PREP pcase,plex],(Ratrs,[position [LexPhrase([NUM(c,g,a),nlex],(Ratrs,[(*num*)position [LexPhrase([pos,lex],restr)]]))]]))) - | LexPhrase([NUM(c,g,a),nlex;pos,lex],restr) -> + LexPhrase([PREP pcase,plex],(Ratrs,[position [LexPhrase([NUM(c,g),nlex],(Ratrs,[(*num*)position [LexPhrase([pos,lex],restr)]]))]]))) + | LexPhrase([NUM(c,g),nlex;pos,lex],restr) -> let genders,lexs = split_elexeme lex in Xlist.map genders (fun gender -> - LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[(*num*)position [Phrase Null(*Pro*)]]))) @ + LexPhrase([NUM(c,gender),nlex],(Ratrs,[(*num*)position [Phrase Null(*Pro*)]]))) @ Xlist.map lexs (fun lex -> - LexPhrase([NUM(c,g,a),nlex],(Ratrs,[(*num*)position [LexPhrase([pos,lex],restr)]]))) + LexPhrase([NUM(c,g),nlex],(Ratrs,[(*num*)position [LexPhrase([pos,lex],restr)]]))) + | LexPhrase([INF(a,n),lex;QUB,Lexeme "się"],restr) -> [LexPhrase([INF(a,n),lex],add_refl_restr restr)] + | LexPhrase([COMP ctype,clex;pos,lex;QUB,Lexeme "się"],restr) -> + if Xlist.size (check_lexemes_schema (get_lexemes clex) (snd restr)) = 0 then + [LexPhrase([pos,lex],add_refl_restr restr)] + else [LexPhrase([COMP ctype,clex],(Ratrs,[(*std*)position (*Forward*) [LexPhrase([pos,lex],add_refl_restr restr)]]))] | LexPhrase([COMP ctype,clex;pos,lex],restr) -> - [LexPhrase([COMP ctype,clex],(Ratrs,[(*std*)position (*Forward*) [LexPhrase([pos,lex],restr)]]))] - | LexPhrase([SUBST(n,c),slex;COMP ctype,clex;pos,lex],restr) -> - [LexPhrase([SUBST(n,c),slex],(Ratrs,[(*std*)position (*Forward*) [LexPhrase([COMP ctype,clex],(Ratrs,[(*std*)position (*Forward*) [LexPhrase([pos,lex],restr)]]))]]))] (* FIXME: poprawić po zrobieniu NCP *) + if Xlist.size (check_lexemes_schema (get_lexemes clex) (snd restr)) = 0 then + [LexPhrase([pos,lex],restr)] + else [LexPhrase([COMP ctype,clex],(Ratrs,[(*std*)position (*Forward*) [LexPhrase([pos,lex],restr)]]))] | LexPhrase(_::_::_,_) -> failwith ("expand_lexicalizations_morfs: " ^ ENIAMwalStringOf.morf morf) - (* | LexPhrase([PREP pcase,plex;SUBST(n,c),slex],(Atr1,[gf,cr,ce,[LexPhrase([QUB,lex],arestr)]])) -> - (* print_endline (ENIAMwalStringOf.morf morf); *) - [LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))]])); - LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))];gf,cr,ce,[LexPhrase([QUB,lex],arestr)]]))] - | LexPhrase([PREP(pcase),plex;SUBST(n,c),slex],(Atr1,[gf,cr,ce,[LexPhrase([ADV gr,lex],arestr)]])) -> - (* print_endline (ENIAMwalStringOf.morf morf); *) - [LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))]])); - LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))];gf,cr,ce,[LexPhrase([ADV gr,lex],arestr)]]))] - | LexPhrase([PREP pcase,plex;SUBST(n,c),slex],(Ratr1,[gf,cr,ce,[LexPhrase([ADV gr,lex],arestr)]])) -> - (* print_endline (ENIAMwalStringOf.morf morf); *) - [LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))];gf,cr,ce,[LexPhrase([ADV gr,lex],arestr)]]))] - | LexPhrase([PREP pcase,plex;pos,lex],restr) -> - [LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))] - | LexPhrase([PREP pcase,plex;NUM(c,g,a),nlex;pos,lex],restr) -> - let genders,lexs = split_elexeme lex in - Xlist.map genders (fun gender -> - LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[("OBJ","QUANT-ARG",["T"]),[],[],[Phrase Pro]]))]]))) @ - Xlist.map lexs (fun lex -> - LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([NUM(c,g,a),nlex],(Ratrs,[("OBJ","QUANT-ARG",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))]]))) - | LexPhrase([NUM(c,g,a),nlex;pos,lex],restr) -> - let genders,lexs = split_elexeme lex in - Xlist.map genders (fun gender -> - LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[("OBJ","QUANT-ARG",["T"]),[],[],[Phrase Pro]]))) @ - Xlist.map lexs (fun lex -> - LexPhrase([NUM(c,g,a),nlex],(Ratrs,[("OBJ","QUANT-ARG",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))) - | LexPhrase([COMP ctype,clex;pos,lex],restr) -> - [LexPhrase([COMP ctype,clex],(Ratrs,[("C","",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))] - | LexPhrase([SUBST(n,c),slex;COMP ctype,clex;pos,lex],restr) -> - [LexPhrase([SUBST(n,c),slex],(Ratrs,[("OBJ","",["T"]),[],[],[LexPhrase([COMP ctype,clex],(Ratrs,[("C","",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))]]))] - | LexPhrase(_::_::_,_) -> failwith ("expand_lexicalizations_morfs: " ^ ENIAMwalStringOf.morf morf)*) | morf -> [morf])) -let rec get_lexemes = function - Lexeme s -> [s] - | ORconcat l -> List.flatten (Xlist.map l get_lexemes) - | ORcoord l -> List.flatten (Xlist.map l get_lexemes) - | XOR l -> List.flatten (Xlist.map l get_lexemes) - | Elexeme gender -> failwith "get_lexemes" - let winien = StringSet.of_list ["winien"; "rad"; "powinien"; "nierad"; "niekontent"; "kontent"; "gotów"] let pred = StringSet.of_list ["żal"; "śmiech"; "znać"; "wstyd"; "wolno"; "widać"; "wiadomo"; "warto"; "trzeba"; "trza"; "słychać"; "szkoda"; "strach"; "stać"; "sposób"; "potrzeba"; "pora"; @@ -148,7 +141,11 @@ let get_pos lex = function | "się" -> ["qub"] | _ -> ["subst"]) | PREP _ -> ["prep"] - | NUM _ -> ["num"] + | NUM _ -> + (try + let _ = int_of_string lex in + ["intnum"] + with _ -> ["num"]) | ADV _ -> ["adv"] | ADJ _ -> ["adj"] | GER _ -> ["ger"] diff --git a/walenty/ENIAMwalStringOf.ml b/walenty/ENIAMwalStringOf.ml index 481de8b..51a16aa 100644 --- a/walenty/ENIAMwalStringOf.ml +++ b/walenty/ENIAMwalStringOf.ml @@ -93,9 +93,9 @@ let refl = function | ReflFalse -> "nosię" | ReflUndef -> "_" -let acm = function +(* let acm = function Acm s -> s - | AcmUndef -> "_" + | AcmUndef -> "_" *) let gf = function SUBJ -> "subj" @@ -108,17 +108,17 @@ let pos = function | PPRON3(n,c) -> "PPRON3(" ^ number n ^ "," ^ case c ^ ")" | SIEBIE(c) -> "SIEBIE(" ^ case c ^ ")" | PREP(c) -> "PREP(" ^ case c ^ ")" - | NUM(c,g,a) -> "NUM(" ^ case c ^ "," ^ gender g ^ "," ^ acm a ^ ")" + | NUM(c,g) -> "NUM(" ^ case c ^ "," ^ gender g ^ (*"," ^ acm a ^*) ")" | ADJ(n,c,g,gr) -> "ADJ(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ grad gr ^ ")" | ADV(gr) -> "ADV(" ^ grad gr ^ ")" - | GER(n,c,g,a,neg,r) -> "GER(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ "," ^ refl r ^ ")" - | PACT(n,c,g,a,neg,r) -> "PACT(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ "," ^ refl r ^ ")" + | GER(n,c,g,a,neg) -> "GER(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg (*^ "," ^ refl r*) ^ ")" + | PACT(n,c,g,a,neg) -> "PACT(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg (*^ "," ^ refl r*) ^ ")" | PPAS(n,c,g,a,neg) -> "PPAS(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ ")" - | INF(a,n,r) -> "INF(" ^ aspect a ^ "," ^ negation n ^ "," ^ refl r ^ ")" + | INF(a,n) -> "INF(" ^ aspect a ^ "," ^ negation n (*^ "," ^ refl r*) ^ ")" | QUB -> "QUB" | COMPAR -> "COMPAR" | COMP(c) -> "COMP(" ^ comp_type c ^ ")" - | PERS(n,r) -> "PERS(" ^ negation n ^ "," ^ refl r ^ ")" + | PERS(n) -> "PERS(" ^ negation n ^ (*"," ^ refl r ^*) ")" | FIXED -> "FIXED" let phrase = function @@ -136,7 +136,7 @@ let phrase = function | InfP(a) -> "infp(" ^ aspect a (*^ req r*) ^ ")" | AdvP -> "advp" | FixedP s -> "fixed(" ^ s ^ ")" - | Num(c,a) -> "num(" ^ case c ^ "," ^ acm a ^ ")" + (* | Num(c,a) -> "num(" ^ case c ^ "," ^ acm a ^ ")" *) | Or -> "or" | Qub -> "qub" (* | Pro -> "pro" diff --git a/walenty/ENIAMwalTEI.ml b/walenty/ENIAMwalTEI.ml index 573974e..1950ef2 100644 --- a/walenty/ENIAMwalTEI.ml +++ b/walenty/ENIAMwalTEI.ml @@ -225,11 +225,14 @@ let process_lex_phrase lemma = function | PrepNP(prep,case),number,GenderUndef,GradUndef,NegationUndef,ReflUndef -> [PREP case,Lexeme prep;SUBST(number,case),lemma] | AdjP(case),number,gender,grad,NegationUndef,ReflUndef -> [ADJ(number,case,gender,grad),lemma] | PrepAdjP(prep,case),number,gender,grad,NegationUndef,ReflUndef -> [PREP case,Lexeme prep;ADJ(number,case,gender,grad),lemma] - | InfP(aspect),NumberUndef,GenderUndef,GradUndef,negation,refl -> [INF(aspect,negation,refl),lemma] + | InfP(aspect),NumberUndef,GenderUndef,GradUndef,negation,ReflTrue -> [INF(aspect,negation),lemma;QUB,Lexeme "się"] + | InfP(aspect),NumberUndef,GenderUndef,GradUndef,negation,refl -> [INF(aspect,negation),lemma] | PpasP(case),number,gender,GradUndef,negation,ReflUndef -> [PPAS(number,case,gender,AspectUndef,negation),lemma] | PrepPpasP(prep,case),number,gender,GradUndef,negation,ReflUndef -> [PREP case,Lexeme prep;PPAS(number,case,gender,AspectUndef,negation),lemma] - | PactP(case),number,gender,GradUndef,negation,refl -> [PACT(number,case,gender,AspectUndef,negation,refl),lemma] - | PrepGerP(prep,case),number,GenderUndef,GradUndef,negation,refl -> [PREP case,Lexeme prep;GER(number,case,GenderUndef,AspectUndef,negation,refl),lemma] + | PactP(case),number,gender,GradUndef,negation,ReflTrue -> [PACT(number,case,gender,AspectUndef,negation),lemma;QUB,Lexeme "się"] + | PactP(case),number,gender,GradUndef,negation,refl -> [PACT(number,case,gender,AspectUndef,negation),lemma] + | PrepGerP(prep,case),number,GenderUndef,GradUndef,negation,ReflTrue -> [PREP case,Lexeme prep;GER(number,case,GenderUndef,AspectUndef,negation),lemma;QUB,Lexeme "się"] + | PrepGerP(prep,case),number,GenderUndef,GradUndef,negation,refl -> [PREP case,Lexeme prep;GER(number,case,GenderUndef,AspectUndef,negation),lemma] | Qub,NumberUndef,GenderUndef,GradUndef,NegationUndef,ReflUndef -> [QUB,lemma] | AdvP,NumberUndef,GenderUndef,grad,NegationUndef,ReflUndef -> [ADV grad,lemma] | phrase,number,gender,grad,negation,reflex -> @@ -248,10 +251,14 @@ let rec process_lex lex = function | PhraseAbbr(Advp mode,[]),[],lemma,Lexeme "" -> let poss = process_lex_phrase lemma (AdvP,lex.lex_number,lex.lex_gender,lex.lex_degree,lex.lex_negation,lex.lex_reflex) in LexPhrase(poss,lex.lex_modification) - | Phrase (NumP(case)),[],lemma,num_lemma -> LexPhrase([NUM(case,GenderUndef,AcmUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification) - | Phrase (PrepNumP(prep,case)),[],lemma,num_lemma -> LexPhrase([PREP case,Lexeme prep;NUM(case,GenderUndef,AcmUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification) - | PhraseComp(Cp,(ctype,[Comp comp])),[],lemma,Lexeme "" -> LexPhrase([COMP ctype,Lexeme comp;PERS(lex.lex_negation,lex.lex_reflex),lemma],lex.lex_modification) - | PhraseComp(Cp,(ctype,[Comp comp1;Comp comp2])),[],lemma,Lexeme "" -> LexPhrase([COMP ctype,XOR[Lexeme comp1;Lexeme comp2];PERS(lex.lex_negation,lex.lex_reflex),lemma],lex.lex_modification) + | Phrase (NumP(case)),[],lemma,num_lemma -> LexPhrase([NUM(case,GenderUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification) + | Phrase (PrepNumP(prep,case)),[],lemma,num_lemma -> LexPhrase([PREP case,Lexeme prep;NUM(case,GenderUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification) + | PhraseComp(Cp,(ctype,[Comp comp])),[],lemma,Lexeme "" -> + if lex.lex_reflex = ReflTrue then LexPhrase([COMP ctype,Lexeme comp;PERS(lex.lex_negation),lemma;QUB,Lexeme "się"],lex.lex_modification) + else LexPhrase([COMP ctype,Lexeme comp;PERS(lex.lex_negation),lemma],lex.lex_modification) + | PhraseComp(Cp,(ctype,[Comp comp1;Comp comp2])),[],lemma,Lexeme "" -> + if lex.lex_reflex = ReflTrue then LexPhrase([COMP ctype,XOR[Lexeme comp1;Lexeme comp2];PERS(lex.lex_negation),lemma;QUB,Lexeme "się"],lex.lex_modification) + else LexPhrase([COMP ctype,XOR[Lexeme comp1;Lexeme comp2];PERS(lex.lex_negation),lemma],lex.lex_modification) | Phrase phrase,[],lemma,Lexeme "" -> let poss = process_lex_phrase lemma (phrase,lex.lex_number,lex.lex_gender,lex.lex_degree,lex.lex_negation,lex.lex_reflex) in LexPhrase(poss,lex.lex_modification) diff --git a/walenty/ENIAMwalTypes.ml b/walenty/ENIAMwalTypes.ml index 27c19e8..75758d8 100644 --- a/walenty/ENIAMwalTypes.ml +++ b/walenty/ENIAMwalTypes.ml @@ -31,7 +31,7 @@ type number = Number of string | NumberUndef | NumberAgr type gender = Gender of string | GenderUndef | GenderAgr | Genders of string list type grad = Grad of string | GradUndef type refl = ReflEmpty | ReflTrue | ReflFalse | ReflUndef -type acm = Acm of string | AcmUndef +(* type acm = Acm of string | AcmUndef *) type gf = SUBJ | OBJ | ARG @@ -41,17 +41,17 @@ type pos = | PPRON3 of number * case | SIEBIE of case | PREP of case - | NUM of case * gender * acm + | NUM of case * gender (* acm*) | ADJ of number * case * gender * grad | ADV of grad - | GER of number * case * gender * aspect * negation * refl - | PACT of number * case * gender * aspect * negation * refl + | GER of number * case * gender * aspect * negation (** refl*) + | PACT of number * case * gender * aspect * negation (* refl*) | PPAS of number * case * gender * aspect * negation - | INF of aspect * negation * refl + | INF of aspect * negation (* refl*) | QUB | COMPAR | COMP of comp_type - | PERS of (*number * gender * aspect * person * *)negation * refl + | PERS of (*number * gender * aspect * person * *)negation (* refl*) | FIXED type phrase = @@ -69,7 +69,7 @@ type phrase = | InfP of aspect | AdvP | FixedP of string - | Num of case * acm + (* | Num of case (* acm*) *) | Or | Qub (* | Pro