(* * ENIAM_LCGgrammarPL is a library that provides LCG lexicon form Polish * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences * * This library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. *) open Xstd open ENIAM_LCGgrammarPLtypes open ENIAM_LCGtypes type rule = Basic of string | Quant of (cat * string) list * string | Raised of (cat * string) list * string * cat list | Quot of (cat * string) list * string | Inclusion of string | Conj of (cat * string) list * string | Bracket of string let symbol_weight = 1. let measure_weight = 0.5 (* FIXME: "Można było" - brakuje uzgodnienia rodzaju przymiotnika w przypadku predykatywnym, i ogólnie kontroli składniowej *) let grammar = [ (* symbole występujące w tekście - daty itp. i słowa określające ich typy *) "lemma=dzień,pos=subst,number=sg,case=gen", Basic "day-lex/(date+day+day-month)",symbol_weight; "lemma=dzień,pos=subst,number=sg", Basic "np*number*case*gender*person/(date+day+day-month)",symbol_weight; "lemma=dzień,pos=subst,number=pl", Basic "np*number*case*gender*person/(date-interval+day-interval+day-month-interval)",symbol_weight; "pos=date", Basic "date{schema}",symbol_weight; "pos=date-interval", Basic "date-interval",symbol_weight; "pos=day", Basic "day/month-lex",symbol_weight; "pos=day-interval", Basic "day-interval/month-lex",symbol_weight; "pos=day-month", Basic "day-month{schema}",symbol_weight; "pos=day-month-interval", Basic "day-month-interval",symbol_weight; "lemma=styczeń|luty|marzec|kwiecień|maj|czerwiec|lipiec|sierpień|wrzesień|październik|litopad|grudzień,pos=subst,number=sg,case=gen", Basic "month-lex/(1+year+np*T*gen*T*T)",symbol_weight; "lemma=styczeń|luty|marzec|kwiecień|maj|czerwiec|lipiec|sierpień|wrzesień|październik|litopad|grudzień,pos=subst,number=sg", Basic "np*number*case*gender*person/year",symbol_weight; "pos=month-interval", Basic "month-interval",symbol_weight; "lemma=rok,pos=subst,number=sg",Basic "np*number*case*gender*person|year",symbol_weight; "lemma=rok,pos=subst,number=pl",Basic "np*number*case*gender*person/year-interval",symbol_weight; "pos=year", Basic "year",symbol_weight; "pos=year-interval", Basic "year-interval",symbol_weight; "lemma=wiek,pos=subst,number=sg",Basic "np*number*case*gender*person|roman",symbol_weight; "lemma=wiek,pos=subst,number=pl",Basic "np*number*case*gender*person/roman-interval",symbol_weight; "pos=roman", Basic "roman",symbol_weight; "pos=roman-interval", Basic "roman-interval",symbol_weight; "lemma=godzina,pos=subst,number=sg",Basic "np*number*case*gender*person/(hour+hour-minute)",symbol_weight; "lemma=godzina,pos=subst,number=pl",Basic "np*number*case*gender*person/(hour-interval+hour-minute-interval)",symbol_weight; "pos=hour-minute", Basic "hour-minute{schema}",symbol_weight; "pos=hour", Basic "hour{schema}",symbol_weight; "pos=hour-minute-interval", Basic "hour-minute-interval",symbol_weight; "pos=hour-interval", Basic "hour-interval",symbol_weight; "lemma=rysunek,pos=subst,number=sg",Basic "np*number*case*gender*person/obj-id",symbol_weight; (* objids *) "pos=obj-id", Basic "obj-id",symbol_weight; "pos=match-result", Basic "match-result",symbol_weight; "pos=url", Basic "url",symbol_weight; "pos=email", Basic "email",symbol_weight; "pos=symbol", Basic "np*number*case*gender*person{\\(1+qub),/(1+inclusion)}",0.; (* FIXME: uslalić kiedy schema jest pusta i wyciąć ją w takich przypadkach *) (* frazy rzeczownikowe *) "pos=subst|depr,nsyn!=pronoun,nsem!=measure", Basic "np*number*case*gender*person{\\(1+num*number*case*gender*person*congr)}{schema}{\\(1+qub),/(1+inclusion)}",0.; "pos=subst,case=gen,nsyn!=pronoun,nsem!=measure", Quant([Number,"numbers";Case,"all_cases";Gender,"genders";Person,"persons"], "np*sg*case*n2*person{\\num*number*case*gender*person*rec}{schema}{\\(1+qub),/(1+inclusion)}"), (* UWAGA: number "sg" i gender "n2", żeby uzgadniać z podmiotem czasownika *)0.; "pos=subst,case=gen,nsyn!=pronoun,nsem!=measure", Quant([Unumber,"all_numbers";Ucase,"all_cases";Ugender,"all_genders"; Uperson,"all_persons";Number,"numbers";Case,"all_cases";Gender,"genders";Person,"ter"],(* FIXME: "all_cases" *) "np*unumber*ucase*ugender*uperson{\\measure*unumber*ucase*ugender*uperson}{schema}{\\(1+qub),/(1+inclusion)}"),0.; "pos=subst|depr,nsyn=pronoun,nsem!=measure", Basic "np*number*case*gender*person{\\(1+num*number*case*gender*person*congr)}{\\(1+qub),/(1+inclusion)}",0.; "pos=subst,case=gen,nsyn=pronoun,nsem!=measure", Quant([Number,"numbers";Case,"all_cases";Gender,"genders";Person,"persons"], "np*sg*case*n2*person{\\num*number*case*gender*person*rec}{\\(1+qub),/(1+inclusion)}"), (* UWAGA: number "sg" i gender "n2", żeby uzgadniać z podmiotem czasownika *)0.; "pos=subst,case=gen,nsyn=pronoun,nsem!=measure", Quant([Unumber,"all_numbers";Ucase,"all_cases";Ugender,"all_genders"; Uperson,"all_persons";Number,"numbers";Case,"all_cases";Gender,"genders";Person,"ter"], "np*unumber*ucase*ugender*uperson{\\measure*unumber*ucase*ugender*uperson}{\\(1+qub),/(1+inclusion)}"),0.; "pos=ppron12", Basic "np*number*case*gender*person{\\(1+qub),/(1+inclusion)}",0.; "pos=ppron3,praep=npraep|praep-npraep", Basic "np*number*case*gender*person{\\(1+qub),/(1+inclusion)}",0.; "pos=siebie", Basic "np*number*case*gender*person{\\(1+qub),/(1+inclusion)}",0.; "lemma=jakiś|ten|taki,pos=apron", Quant([Number,"numbers";Case,"cases";Gender,"genders";Person,"ter"], "np*number*case*gender*person{\\(1+qub),/(1+inclusion)}"),0.; (* liczebniki *) (* FIXME: liczba po rzeczowniku *) (* FIXME: zbadać jak liczebniki współdziałąją z jako COMPAR *) "pos=num|intnum|realnum|intnum-interval|realnum-interval", Basic "num*number*case*gender*person*acm{\\(1+qub),/(1+inclusion)}", (* FIXME: jak usunięcie Phrase ProNG wpływa na pokrycie? *)0.; (* pojemniki *) "pos=subst,nsem=measure", Basic "measure*number*case*gender*person{\\(1+num*number*case*gender*person*congr)}{schema}{\\(1+qub),/(1+inclusion)}",measure_weight; "pos=subst,case=gen,nsem=measure", Quant([Number,"numbers";Case,"all_cases";Gender,"genders";Person,"ter"], "measure*sg*case*n2*person{\\num*number*case*gender*person*rec}{schema}{\\(1+qub),/(1+inclusion)}"),measure_weight;(* UWAGA: number "sg" i gender "n2", żeby uzgadniać z podmiotem czasownika *) (* frazy przyimkowe *) "pos=prep", Basic "prepnp*lemma*case{\\(1+advp),/np*T*case*T*T}{\\(1+qub),/(1+inclusion)}",0.; "pos=prep", Basic "prepadjp*lemma*case{\\(1+advp),/adjp*T*case*T}{\\(1+qub),/(1+inclusion)}",0.; "lemma=po,pos=prep", Quant([Case,"postp"],"prepadjp*lemma*case{\\(1+advp),/(adjp*sg*dat*m1+adjp*T*postp*T)}{\\(1+qub),/(1+inclusion)}"),0.;(* po polsku, po kreciemu *) "lemma=z,pos=prep", Quant([Case,"postp"],"prepadjp*lemma*case{\\(1+advp),/adjp*sg*nom*f}{\\(1+qub),/(1+inclusion)}"),0.;(* z bliska *) "lemma=na,pos=prep", Quant([Case,"postp"],"prepadjp*lemma*case{\\(1+advp),/advp}{\\(1+qub),/(1+inclusion)}"),0.;(* na lewo *) (* przimkowe określenia czasu *) "lemma=z,pos=prep,case=gen", Basic "prepnp*lemma*case{\\(1+advp),/(day-month+day+year+date+hour+hour-minute)}{\\(1+qub),/(1+inclusion)}",0.; "lemma=do,pos=prep,case=gen", Basic "prepnp*lemma*case{\\(1+advp),/(day-month+day+year+date+hour+hour-minute)}{\\(1+qub),/(1+inclusion)}",0.; "lemma=na,pos=prep,case=acc", Basic "prepnp*lemma*case{\\(1+advp),/(day-month+day+date+hour+hour-minute)}{\\(1+qub),/(1+inclusion)}",0.; "lemma=o,pos=prep,case=loc", Basic "prepnp*lemma*case{\\(1+advp),/(hour+hour-minute)}{\\(1+qub),/(1+inclusion)}",0.; "lemma=od,pos=prep,case=gen", Basic "prepnp*lemma*case{\\(1+advp),/(day-month+day+year+date+hour+hour-minute)}{\\(1+qub),/(1+inclusion)}",0.; "lemma=około,pos=prep,case=gen", Basic "prepnp*lemma*case{\\(1+advp),/(day-month+day+year+date+hour+hour-minute)}{\\(1+qub),/(1+inclusion)}",0.; "lemma=po,pos=prep,case=loc", Basic "prepnp*lemma*case{\\(1+advp),/(day-month+day+year+date+hour+hour-minute)}{\\(1+qub),/(1+inclusion)}",0.; "lemma=przed,pos=prep,case=inst", Basic "prepnp*lemma*case{\\(1+advp),/(day-month+day+year+date+hour+hour-minute)}{\\(1+qub),/(1+inclusion)}",0.; "lemma=w,pos=prep,case=loc", Basic "prepnp*lemma*case{\\(1+advp),/(day-month+day+year+date+hour+hour-minute)}{\\(1+qub),/(1+inclusion)}",0.; (* komparatywy *) (* FIXME: trzeba poprawić comparnp i comparpp w walencji *) "pos=compar", Quant([Case,"nom&gen&dat&acc&inst"],"compar*lemma*case{\\(1+advp),/np*T*case*T*T}{\\(1+qub),/(1+inclusion)}"),0.; "pos=compar", Quant([Case,"postp"],"compar*lemma*case{\\(1+advp),/(prepnp*T*T+prepadjp*T*T)}{\\(1+qub),/(1+inclusion)}"),0.; (* frazy przymiotnikowe *) (* FIXME: check_frame_case - pamiętać o sprawdzaniu zgodności kategorii przy szukaniu schema *) (* FIXME: let grad = match grads with [grad] -> grad | _ -> failwith "make_adjp: grad" in*) "pos=adj|adjc|adjp", Basic "adjp*number*case*gender{schema}{\\(1+qub),/(1+inclusion)}{\\(1+adja)}",0.; "lemma=jakiś|ten|taki,pos=apron", Basic "adjp*number*case*gender{\\(1+qub),/(1+inclusion)}",0.; "pos=ordnum|roman-ordnum", Basic "adjp*number*case*gender{\\(1+qub),/(1+inclusion)}{\\(1+adja)}",0.; "pos=adja|intnum|realnum|intnum-interval|realnum-interval|roman|roman-interval",Basic "adja/hyphen",0.; (* przysłówki *) (* FIXME let grad = match grads with [grad] -> grad | _ -> failwith "make_advp: grad" in*) "pos=adv",Basic "advp{schema}{\\(1+qub),/(1+inclusion)}{\\(1+adja)}",0.; (* relatory *) (* FIXME: dwa znaczenia jak: pytanie o cechę lub spójnik *) "lemma=jak|skąd|dokąd|gdzie|którędy|kiedy,pos=adv", Raised([Inumber,"";Igender,"";Iperson,"";Ctype,"int&rel"], "cp*ctype*lemma{\\(1+advp),/(ip*inumber*igender*iperson/advp)}",[Ctype]),0.; (*["CTYPE",SubstVar "ctype"]*) "lemma=odkąd|dlaczego|czemu,pos=adv", Raised([Inumber,"";Igender,"";Iperson,"";Ctype,"int"], "cp*ctype*lemma{\\(1+advp),/(ip*inumber*igender*iperson/advp)}",[Ctype]),0.; (*["CTYPE",SubstVar "ctype"]*) "lemma=gdy,pos=adv", Raised([Inumber,"";Igender,"";Iperson,"";Ctype,"sub"], "cp*ctype*lemma{\\(1+advp),/(ip*inumber*igender*iperson/advp)}",[Ctype]),0.; (*["CTYPE",SubstVar "ctype"]*) (* czasowniki *) "pos=ger", Basic "np*number*case*gender*person{schema}{\\(1+qub),/(1+inclusion)}",0.; "pos=pact", Basic "adjp*number*case*gender{schema}{\\(1+qub),/(1+inclusion)}",0.; "pos=ppas", Basic "adjp*number*case*gender{schema}{\\(1+qub),/(1+inclusion)}",0.; "pos=fin|bedzie,negation=aff,mood=indicative", Basic "ip*number*gender*person{/(1+int)}{schema}{\\(1+qub),/(1+inclusion)}",0.; "pos=fin|bedzie,negation=neg,mood=indicative", Basic "ip*number*gender*person{/(1+int)}{schema}{\\(1+qub),/(1+inclusion)}{\\nie}",0.; "pos=fin,negation=aff,mood=imperative", Basic "ip*number*gender*person{/(1+int)}{schema,|aux-imp}{\\(1+qub),/(1+inclusion)}",0.; "pos=fin,negation=neg,mood=imperative", Basic "ip*number*gender*person{/(1+int)}{schema,|aux-imp}{\\(1+qub),/(1+inclusion)}{\\nie}",0.; "pos=impt|imps,negation=aff", Basic "ip*number*gender*person{/(1+int)}{schema}{\\(1+qub),/(1+inclusion)}",0.; "pos=impt|imps,negation=neg", Basic "ip*number*gender*person{/(1+int)}{schema}{\\(1+qub),/(1+inclusion)}{\\nie}",0.; "pos=pred,negation=aff,tense=pres", Basic "ip*number*gender*person{/(1+int)}{schema}{\\(1+qub),/(1+inclusion)}",0.; "pos=pred,negation=neg,tense=pres", Basic "ip*number*gender*person{/(1+int)}{schema}{\\(1+qub),/(1+inclusion)}{\\nie}",0.; "pos=pred,negation=aff,tense=fut", Basic "ip*number*gender*person{/(1+int)}{schema,|aux-fut*number*gender*person}{\\(1+qub),/(1+inclusion)}",0.; "pos=pred,negation=neg,tense=fut", Basic "ip*number*gender*person{/(1+int)}{schema,|aux-fut*number*gender*person}{\\(1+qub),/(1+inclusion)}{\\nie}",0.; "pos=pred,negation=aff,tense=past", Basic "ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{\\(1+qub),/(1+inclusion)}",0.; (* FIXME: tense *) "pos=pred,negation=neg,tense=past", Basic "ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{\\(1+qub),/(1+inclusion)}{\\nie}",0.; (* FIXME: tense *) "pos=praet|winien,person=ter,negation=aff,mood=indicative", Basic "ip*number*gender*person{/(1+int)}{schema}{\\(1+qub),/(1+inclusion)}",0.; "pos=praet|winien,person=ter,negation=neg,mood=indicative", Basic "ip*number*gender*person{/(1+int)}{schema}{\\(1+qub),/(1+inclusion)}{\\nie}",0.; "pos=praet|winien,person!=ter,negation=aff,mood=indicative", Basic "ip*number*gender*person{/(1+int)}{schema,|aglt*number*person}{\\(1+qub),/(1+inclusion)}",0.; "pos=praet|winien,person!=ter,negation=neg,mood=indicative", Basic "ip*number*gender*person{/(1+int)}{schema,|aglt*number*person}{\\(1+qub),/(1+inclusion)}{\\nie}",0.; "pos=praet|winien,person=ter,negation=aff,mood=conditional", Basic "ip*number*gender*person{/(1+int)}{schema,|by}{\\(1+qub),/(1+inclusion)}",0.; "pos=praet|winien,person=ter,negation=neg,mood=conditional", Basic "ip*number*gender*person{/(1+int)}{schema,|by}{\\(1+qub),/(1+inclusion)}{\\nie}",0.; "pos=praet|winien,person!=ter,negation=aff,mood=conditional", Basic "ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|by}{\\(1+qub),/(1+inclusion)}",0.; "pos=praet|winien,person!=ter,negation=neg,mood=conditional", Basic "ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|by}{\\(1+qub),/(1+inclusion)}{\\nie}",0.; "pos=praet|winien,negation=aff,tense=fut", Basic "ip*number*gender*person{/(1+int)}{schema,|aux-fut*number*gender*person}{\\(1+qub),/(1+inclusion)}",0.; "pos=winien,person=ter,negation=aff,tense=past", Basic "ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{\\(1+qub),/(1+inclusion)}",0.; "pos=winien,person=ter,negation=neg,tense=past", Basic "ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{\\(1+qub),/(1+inclusion)}{\\nie}",0.; "pos=winien,person!=ter,negation=aff,tense=past", Basic "ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|aux-past*number*gender*person}{\\(1+qub),/(1+inclusion)}",0.; "pos=winien,person!=ter,negation=neg,tense=past", Basic "ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|aux-past*number*gender*person}{\\(1+qub),/(1+inclusion)}{\\nie}",0.; "pos=bedzie", Basic "aux-fut*number*gender*person",0.; "lemma=być,pos=praet", Basic "aux-past*number*gender*person",0.; "pos=aglt", Basic "aglt*number*person",0.; "pos=inf,negation=aff", Basic "infp{schema}{\\(1+qub),/(1+inclusion)}",0.; "pos=inf,negation=neg", Basic "infp{schema}{\\(1+qub),/(1+inclusion)}{\\nie}",0.; "pos=pcon,negation=aff", Basic "padvp{schema}{\\(1+qub),/(1+inclusion)}",0.; "pos=pcon,negation=neg", Basic "padvp{schema}{\\(1+qub),/(1+inclusion)}{\\nie}",0.; "pos=pant,negation=aff", Basic "padvp{schema}{\\(1+qub),/(1+inclusion)}",0.; "pos=pant,negation=neg", Basic "padvp{schema}{\\(1+qub),/(1+inclusion)}{\\nie}",0.; "pos=comp", Quant([Ctype,"sub"],"cp*ctype*lemma/ip*T*T*T"),0.; "pos=conj", Quant([Ctype,"coord"],"cp*ctype*lemma/ip*T*T*T"),0.; "lemma=i|lub|czy|bądź,pos=conj", Conj([Number,"all_numbers";Gender,"all_genders";Person,"all_persons"],"(ip*number*gender*person/ip*T*T*T)\\ip*T*T*T"),0.; "lemma=,|i|lub|czy|bądź,pos=conj", Conj([],"(advp/prepnp*T*T)\\prepnp*T*T"),0.; "lemma=,|i|lub|czy|bądź,pos=conj", Conj([],"(advp/advp)\\prepnp*T*T"),0.; "lemma=,|i|lub|czy|bądź,pos=conj", Conj([],"(advp/prepnp*T*T)\\advp"),0.; "lemma=,|i|lub|czy|bądź,pos=conj", Conj([],"(advp/advp)\\advp"),0.; "lemma=,|i|lub|czy|bądź,pos=conj", Conj([Plemma,"";Case,"all_cases"],"(prepnp*plemma*case/prepnp*plemma*case)\\prepnp*plemma*case"),0.; "lemma=,|i|lub|czy|bądź,pos=conj", Conj([Number,"all_numbers";Case,"all_cases";Gender,"all_genders";Person,"all_persons"],"(np*number*case*gender*person/np*T*case*T*T)\\np*T*case*T*T"),0.; "lemma=,|i|lub|czy|bądź,pos=conj", Conj([Number,"all_numbers";Case,"all_cases";Gender,"all_genders"],"(adjp*number*case*gender/adjp*number*case*gender)\\adjp*number*case*gender"),0.; "lemma=co|kto,pos=subst", Raised([Inumber,"";Igender,"";Iperson,"";Ctype,"int&rel";Number,"numbers";Case,"cases";Gender,"genders";Person,"ter"], "cp*ctype*lemma/(ip*inumber*igender*iperson/np*number*case*gender*person)",[Ctype]),0.; (*["CTYPE",SubstVar "ctype"]*) "lemma=co|kto,pos=subst", Raised([Inumber,"";Igender,"";Iperson,"";Plemma,"";Ctype,"int&rel";Number,"numbers";Case,"cases";Gender,"genders";Person,"ter"], "cp*ctype*lemma{/(ip*inumber*igender*iperson/prepnp*plemma*case),/(prepnp*plemma*case/np*number*case*gender*person)}",[Ctype]),0.; (*["CTYPE",SubstVar "ctype"]*) "lemma=to,pos=subst", Quant([Ctype,"";Plemma,"";Number,"numbers";Case,"cases";Gender,"genders";Person,"ter"], "ncp*number*case*gender*person*ctype*plemma{\\(1+qub),/(1+inclusion)}{/cp*ctype*plemma}"),0.; "pos=ppron3,praep=praep", Raised([Plemma,"";Number,"numbers";Case,"cases";Gender,"genders";Person,"persons"], "prepnp*plemma*case\\(prepnp*plemma*case/np*number*case*gender*person)",[]), (*inclusion*)0. (*[Number;Case;Gender;Person]*); "lemma=ile,pos=num", (* FIXME: iloma ma bezpośredni podrzędnik rzeczownikowy, a ile nie *) (* FIXME: mwe "o ile, na ile" *) Quant([Inumber,"";Igender,"";Iperson,"";Ctype,"int&rel";Number,"numbers";Case,"cases";Gender,"genders";Person,"ter"], "cp*ctype*lemma/ip*inumber*igender*iperson"), (* FIXME: zaślepka, bo podrzędnik ile nie musi z nim sąciadować *)0.; (*["CTYPE",SubstVar "ctype"]*) (* FIXME: trzeba dodać przypadki, bezpośredniego podrzędnika rzeczownikowego i przyimka nad "ile" *) "lemma=czyj|jaki|który,pos=apron", Raised([Inumber,"";Igender,"";Iperson,"";Nperson,"";Ctype,"int";Number,"numbers";Case,"cases";Gender,"genders"], "cp*ctype*lemma{/(ip*inumber*igender*iperson/np*number*case*gender*nperson)}{/(np*number*case*gender*nperson\\adjp*number*case*gender)}",[Ctype]),0.; (*["CTYPE",SubstVar "ctype"]*) "lemma=czyj|jaki|który,pos=apron", Raised([Inumber,"";Igender,"";Iperson,"";Nperson,"";Plemma,"";Ctype,"int";Number,"numbers";Case,"cases";Gender,"genders"], "cp*ctype*lemma{/(ip*inumber*igender*iperson/prepnp*plemma*case)}{/(prepnp*plemma*case/np*number*case*gender*nperson)}{/(np*number*case*gender*nperson\\adjp*number*case*gender)}",[Ctype]),0.; (*["CTYPE",SubstVar "ctype"]*) "lemma=czyj|jaki,pos=apron", Raised([Inumber,"";Igender,"";Iperson,"";Ctype,"rel";Number,"numbers";Case,"cases";Gender,"genders";Person,"ter"], "cp*ctype*lemma/(ip*inumber*igender*iperson/np*number*case*gender*person)",[Ctype]),0.; (*["CTYPE",SubstVar "ctype"]*) "lemma=jaki|który,pos=apron", Raised([Inumber,"";Igender,"";Iperson,"";Plemma,"";Ctype,"rel";Number,"numbers";Case,"cases";Gender,"genders";Person,"ter"], "cp*ctype*lemma{/(ip*inumber*igender*iperson/prepnp*plemma*case)}{/(prepnp*plemma*case/np*number*case*gender*person)}",[Ctype]),0.; (*["CTYPE",SubstVar "ctype"]*) "lemma=się,pos=qub", Basic "się",0.; (* FIXME: dodać make_np *) "lemma=nie,pos=qub", Basic "nie",0.; "lemma=by,pos=qub", Basic "by",0.; "lemma=niech,pos=qub", Basic "aux-imp",0.; "lemma=niechaj,pos=qub", Basic "aux-imp",0.; "lemma=niechże,pos=qub", Basic "aux-imp",0.; "lemma=niechajże,pos=qub", Basic "aux-imp",0.; "lemma=czy,pos=qub", Quant([Ctype,"int"],"cp*ctype*lemma/ip*T*T*T"),0.; "lemma=gdyby,pos=qub", Quant([Ctype,"rel"],"cp*ctype*lemma/ip*T*T*T"),0.; "pos=qub", Basic "qub",0.; "pos=interj", Basic "interj",0.; "lemma=-,pos=interp", Basic "hyphen",0.; "lemma=?,pos=interp", Basic "int",0.; "lemma=„,pos=interp", Quot([Number,"";Case,"";Gender,"";Person,""],"(np*number*case*gender*person/rquot)/np*number*case*gender*person"),0.; "lemma=«,pos=interp", Quot([Number,"";Case,"";Gender,"";Person,""],"(np*number*case*gender*person/rquot2)/np*number*case*gender*person"),0.; "lemma=»,pos=interp", Quot([Number,"";Case,"";Gender,"";Person,""],"(np*number*case*gender*person/rquot3)/np*number*case*gender*person"),0.; "lemma=”,pos=interp", Basic "rquot",0.; "lemma=»,pos=interp", Basic "rquot2",0.; "lemma=«,pos=interp", Basic "rquot3",0.; "lemma=(,pos=interp", Inclusion "(inclusion/rparen)/(np*T*T*T*T+ip*T*T*T+adjp*T*T*T+prepnp*T*T)",0.; "lemma=[,pos=interp", Inclusion "(inclusion/rparen2)/(np*T*T*T*T+ip*T*T*T+adjp*T*T*T+prepnp*T*T)",0.; "lemma=),pos=interp", Basic "rparen",0.; "lemma=],pos=interp", Basic "rparen2",0.; "pos=unk", Basic "np*number*case*gender*person",0.; (* | ".","interp",[] -> [LCGrenderer.make_frame_simple [] ["dot"] c (make_node "." "interp" c.weight 0 [])] (* FIXME: to jest potrzebne przy CONLL *) | "<conll_root>","interp",[] -> let batrs = (make_node "<conll_root>" "interp" c.weight 0 []) in let schema_list = [[schema_field CLAUSE "Clause" Forward [Phrase IP;Phrase (CP(Int,CompUndef));Phrase (NP(Case "voc"));Phrase (Lex "interj")]]] in [LCGrenderer.make_frame false tokens lex_sems [] schema_list ["<conll_root>"] d batrs] | lemma,c,l -> failwith ("process_interp: " ^ lemma ^ ":" ^ c ^ ":" ^ (String.concat ":" (Xlist.map l (String.concat ".")))) in*) "pos=sinterj", Bracket "interj",0.; "lemma=</sentence>,pos=interp", Bracket "s\\?(ip*T*T*T+cp*int*T+np*sg*voc*T*T+interj)",0.; "lemma=<sentence>,pos=interp", Bracket "<root>/s",0.; "lemma=:,pos=interp", Bracket "or",0.; "lemma=:s,pos=interp", Bracket "<colon>\\<speaker>",0.; "lemma=:s,pos=interp", Bracket "(<colon>\\<speaker>)/<squery>",0.; "lemma=<or-sentence>,pos=interp", Bracket "<root>/s",0.; "lemma=<or-sentence>,pos=interp", Bracket "((<root>/<speaker-end>)/(ip*T*T*T/or))/or2",0.; "lemma=</or-sentence>,pos=interp", Bracket "or2\\?(ip*T*T*T+cp*int*T+np*sg*voc*T*T+interj)",0.; "lemma=<sentence>,pos=interp", Bracket "(<speaker>/<speaker-end>)/np*T*nom*T*T",0.; "lemma=</sentence>,pos=interp", Bracket "<speaker-end>",0.; ] let rec split_comma found rev = function "lemma" :: "=" :: "," :: l -> split_comma found ("," :: "=" :: "lemma" :: rev) l | "," :: l -> split_comma (List.rev rev :: found) [] l | s :: l -> split_comma found (s :: rev) l | [] -> if rev = [] then found else List.rev rev :: found let match_selectors = function "lemma" :: l -> Lemma,l | "pos" :: l -> Pos,l | "pos2" :: l -> Pos2,l | "cat" :: l -> Cat,l | "number" :: l -> Number,l | "case" :: l -> Case,l | "gender" :: l -> Gender,l | "person" :: l -> Person,l | "grad" :: l -> Grad,l | "praep" :: l -> Praep,l | "acm" :: l -> Acm,l | "aspect" :: l -> Aspect,l | "negation" :: l -> Negation,l | "mood" :: l -> Mood,l | "tense" :: l -> Tense,l | "nsem" :: l -> Nsem,l | "nsyn" :: l -> Nsyn,l | s :: l -> failwith ("match_selectors: " ^ s) | [] -> failwith "match_selectors: empty" let match_relation = function (* cat,"=" :: "=" :: l -> cat,StrictEq,l *) | cat,"!" :: "=" :: l -> cat,Neq,l | cat,"=" :: l -> cat,Eq,l | cat,s :: l -> failwith ("match_relation: " ^ (String.concat " " (s :: l))) | cat,[] -> failwith "match_relation: empty" let rec split_mid rev = function [s] -> List.rev (s :: rev) | s :: "|" :: l -> split_mid (s :: rev) l | [] -> failwith "split_mid: empty" | l -> failwith ("split_mid: " ^ (String.concat " " l)) let match_value = function cat,rel,[s] -> cat,rel,[s] | cat,rel,[] -> failwith "match_value: empty" | cat,rel,l -> cat,rel, split_mid [] l let parse_selectors s = (* print_endline s; *) let l = Xlist.map (Str.full_split (Str.regexp "|\\|,\\|=\\|!") s) (function Str.Text s -> s | Str.Delim s -> s) in let ll = split_comma [] [] l in let l = Xlist.rev_map ll match_selectors in let l = Xlist.rev_map l match_relation in let l = Xlist.rev_map l match_value in l let rec find_seletor s = function (t,Eq,x :: _) :: l -> if t = s then x else find_seletor s l | (t,_,_) :: l -> if t = s then failwith "find_seletor 1" else find_seletor s l | [] -> failwith "find_seletor 2" type syntax = A of string | B of internal_grammar_symbol | C of grammar_symbol | D of direction * grammar_symbol | E of (direction * grammar_symbol) list let avars = StringSet.of_list [ "number"; "case"; "gender"; "person"; "ctype"; "lemma"; "cat"; "acm"; "plemma"; "nperson"; "inumber"; "igender"; "iperson"; "unumber"; "ucase"; "ugender"; "uperson"] let atoms = StringSet.of_list [ "gen"; "congr"; "sg"; "n2"; "rec"; "dat"; "voc"; "m1"; "postp"; "nom"; "f"; "infp"; "np"; "prepnp"; "adjp"; "ip"; "cp"; "ncp"; "advp"; "padvp"; "adja"; "prepadjp"; "compar"; "measure"; "num"; "aglt"; "aux-fut"; "aux-past"; "aux-imp"; "qub"; "interj"; "hyphen"; "int"; "rparen"; "rparen2"; "rquot"; "rquot2"; "rquot3"; "inclusion"; "day-interval"; "day-lex"; "day-month-interval"; "date-interval"; "month-lex"; "month-interval"; "year-interval"; "roman"; "roman-interval"; "hour-minute-interval"; "hour-interval"; "obj-id"; "match-result"; "url"; "email"; "day-month"; "day"; "year"; "date"; "hour"; "hour-minute"; "się"; "nie"; "by"; "s"; "<root>"; "or"; "or2"; "<colon>"; "<speaker>"; "<speaker-end>"; "<squery>"] let operators = StringSet.of_list [ "*"; "+"; "/"; "|"; "\\"; "("; ")"; ","; "{"; "}"; "?"] let find_internal_grammar_symbols = function | "T" -> B Top | "1" -> C One | "schema" -> D(Both,Tensor[AVar "schema"]) (* | "qub_inclusion" -> D(Both,Tensor[AVar "qub_inclusion"]) *) | s -> if StringSet.mem avars s then B (AVar s) else if StringSet.mem atoms s then B (Atom s) else if StringSet.mem operators s then A s else failwith ("find_internal_grammar_symbols: " ^ s) let rec find_tensor = function B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: A "*" :: B s6 :: A "*" :: B s7 :: A "*" :: B s8 :: l -> failwith "find_tensor 1" | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: A "*" :: B s6 :: A "*" :: B s7 :: l -> C (Tensor[s1;s2;s3;s4;s5;s6;s7]) :: find_tensor l | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: A "*" :: B s6 :: l -> C (Tensor[s1;s2;s3;s4;s5;s6]) :: find_tensor l | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: l -> C (Tensor[s1;s2;s3;s4;s5]) :: find_tensor l | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: l -> C (Tensor[s1;s2;s3;s4]) :: find_tensor l | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: l -> C (Tensor[s1;s2;s3]) :: find_tensor l | B s1 :: A "*" :: B s2 :: l -> C (Tensor[s1;s2]) :: find_tensor l | B s1 :: l -> C (Tensor[s1]) :: find_tensor l | A "*" :: _ -> failwith "find_tensor 2" | t :: l -> t :: find_tensor l | [] -> [] let rec find_plus = function C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: A "+" :: C s5 :: A "+" :: C s6 :: A "+" :: C s7 :: l -> failwith "find_plus 1" | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: A "+" :: C s5 :: A "+" :: C s6 :: l -> C (Plus[s1;s2;s3;s4;s5;s6]) :: find_plus l | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: A "+" :: C s5 :: l -> C (Plus[s1;s2;s3;s4;s5]) :: find_plus l | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: l -> C (Plus[s1;s2;s3;s4]) :: find_plus l | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: l -> C (Plus[s1;s2;s3]) :: find_plus l | C s1 :: A "+" :: C s2 :: l -> C (Plus[s1;s2]) :: find_plus l | A "+" :: _ -> failwith "find_plus 2" | t :: l -> t :: find_plus l | [] -> [] let rec find_paren = function A "(" :: C s :: A ")" :: l -> C s :: find_paren l | s :: l -> s :: find_paren l | [] -> [] let rec find_imp = function | C s1 :: A "/" :: C s2 :: l -> C (Imp(s1,Forward,s2)) :: find_imp l | C s1 :: A "|" :: C s2 :: l -> C (Imp(s1,Both,s2)) :: find_imp l | C s1 :: A "\\" :: C s2 :: l -> C (Imp(s1,Backward,s2)) :: find_imp l | s :: l -> s :: find_imp l | [] -> [] let rec find_maybe = function | A "?" :: C s2 :: l -> C (Maybe s2) :: find_maybe l | A "?" :: _ -> failwith "find_maybe 1" | s :: l -> s :: find_maybe l | [] -> [] let rec find_mult_imp = function | A "{" :: A "/" :: C s2 :: l -> A "{" :: D (Forward,s2) :: find_mult_imp l | A "{" :: A "|" :: C s2 :: l -> A "{" :: D (Both,s2) :: find_mult_imp l | A "{" :: A "\\" :: C s2 :: l -> A "{" :: D (Backward,s2) :: find_mult_imp l | A "," :: A "/" :: C s2 :: l -> A "," :: D (Forward,s2) :: find_mult_imp l | A "," :: A "|" :: C s2 :: l -> A "," :: D (Both,s2) :: find_mult_imp l | A "," :: A "\\" :: C s2 :: l -> A "," :: D (Backward,s2) :: find_mult_imp l | A "/" :: _ -> failwith "find_mult_imp 1" | A "|" :: _ -> failwith "find_mult_imp 2" | A "\\" :: _ -> failwith "find_mult_imp 3" | A "(" :: _ -> failwith "find_mult_imp 4" | A ")" :: _ -> failwith "find_mult_imp 5" | s :: l -> s :: find_mult_imp l | [] -> [] let rec find_mult = function A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: l -> failwith "find_mult 1" | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7] :: find_mult l | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6] :: find_mult l | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5] :: find_mult l | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4] :: find_mult l | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3] :: find_mult l | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "}" :: l -> E[s1,t1;s2,t2] :: find_mult l | A "{" :: D(s1,t1) :: A "}" :: l -> E[s1,t1] :: find_mult l | A "{" :: _ -> failwith "find_mult 2" | A "}" :: _ -> failwith "find_mult 3" | A "," :: _ -> failwith "find_mult 4" | t :: l -> t :: find_mult l | [] -> [] let rec apply_mult = function C s :: E t :: l -> apply_mult (C (ImpSet(s,t)) :: l) | [C s] -> C s | _ -> failwith "apply_mult" let parse_syntax s = (* print_endline s; *) let l = Xlist.map (Str.full_split (Str.regexp "?\\|}\\|{\\|,\\|*\\|/\\|+\\|)\\|(\\||\\|\\") s) (function Str.Text s -> s | Str.Delim s -> s) in let l = List.rev (Xlist.rev_map l find_internal_grammar_symbols) in let l = find_tensor l in let l = find_plus l in let l = find_paren l in let l = find_maybe l in let l = find_imp l in let l = find_paren l in let l = find_imp l in let l = find_paren l in let l = find_imp l in let l = find_paren l in let l = find_mult_imp l in let l = find_mult l in match apply_mult l with C s -> s | _ -> failwith "parse_syntax" let pos_categories = Xlist.fold [ "subst",[Lemma,"lemma";Cat,"cat";Number,"numbers";Case,"cases";Gender,"genders";Person,"persons";Nsyn,"nsyn";Nsem,"nsem";]; "depr",[Lemma,"lemma";Cat,"cat";Number,"numbers";Case,"cases";Gender,"genders";Person,"persons";Nsyn,"nsyn";Nsem,"nsem";]; "ppron12",[Lemma,"lemma";Number,"numbers";Case,"cases";Gender,"genders";Person,"persons";]; "ppron3",[Lemma,"lemma";Number,"numbers";Case,"cases";Gender,"genders";Person,"persons";Praep,"praeps";]; "siebie",[Lemma,"lemma";Number,"numbers";Case,"cases";Gender,"genders";Person,"persons";]; "prep",[Lemma,"lemma";Case,"cases";]; "compar",[Lemma,"lemma";Case,"cases";]; "num",[Lemma,"lemma";Number,"numbers";Case,"cases";Gender,"genders";Person,"persons";Acm,"acms";]; "intnum",[Lemma,"lemma";Number,"numbers";Case,"cases";Gender,"genders";Person,"persons";Acm,"acms";]; "realnum",[Lemma,"lemma";Number,"numbers";Case,"cases";Gender,"genders";Person,"persons";Acm,"acms";]; "intnum-interval",[Lemma,"lemma";Number,"numbers";Case,"cases";Gender,"genders";Person,"persons";Acm,"acms";]; "realnum-interval",[Lemma,"lemma";Number,"numbers";Case,"cases";Gender,"genders";Person,"persons";Acm,"acms";]; "symbol",[Lemma,"lemma";Number,"numbers";Case,"cases";Gender,"genders";Person,"persons";]; "ordnum",[Lemma,"lemma";Number,"numbers";Case,"cases";Gender,"genders";Grad,"grads";]; "date",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "date-interval",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "hour-minute",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "hour",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "hour-minute-interval",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "hour-interval",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "year",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "year-interval",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "day",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "day-interval",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "day-month",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "day-month-interval",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "month-interval",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "roman-ordnum",[Lemma,"lemma";Number,"numbers";Case,"cases";Gender,"genders";Grad,"grads";]; "roman",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "roman-interval",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "match-result",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "url",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "email",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "obj-id",[Lemma,"lemma";Nsyn,"nsyn";Nsem,"nsem";]; "adj",[Lemma,"lemma";Cat,"cat";Number,"numbers";Case,"cases";Gender,"genders";Grad,"grads";]; "adjc",[Lemma,"lemma";Cat,"cat";Number,"numbers";Case,"cases";Gender,"genders";Grad,"grads";]; "adjp",[Lemma,"lemma";Cat,"cat";Number,"numbers";Case,"cases";Gender,"genders";Grad,"grads";]; "apron",[Lemma,"lemma";Number,"numbers";Case,"cases";Gender,"genders";Grad,"grads";]; "adja",[Lemma,"lemma";Cat,"cat";]; "adv",[Lemma,"lemma";Cat,"cat";Grad,"grads";];(* ctype *) "ger",[Lemma,"lemma";(*NewLemma,"newlemma";*)Cat,"cat";Number,"numbers";Case,"cases";Gender,"genders";Person,"persons";Aspect,"aspects";Negation,"negations";]; "pact",[Lemma,"lemma";(*NewLemma,"newlemma";*)Cat,"cat";Number,"numbers";Case,"cases";Gender,"genders";Aspect,"aspects";Negation,"negations";]; "ppas",[Lemma,"lemma";(*NewLemma,"newlemma";*)Cat,"cat";Number,"numbers";Case,"cases";Gender,"genders";Aspect,"aspects";Negation,"negations";]; "fin",[Lemma,"lemma";(*NewLemma,"newlemma";*)Cat,"cat";Number,"numbers";Gender,"genders";Person,"persons";Aspect,"aspects";Negation,"negations";Mood,"moods";Tense,"tenses";]; "bedzie",[Lemma,"lemma";(*NewLemma,"newlemma";*)Cat,"cat";Number,"numbers";Gender,"genders";Person,"persons";Aspect,"aspects";Negation,"negations";Mood,"moods";Tense,"tenses";]; "praet",[Lemma,"lemma";(*NewLemma,"newlemma";*)Cat,"cat";Number,"numbers";Gender,"genders";Person,"persons";Aspect,"aspects";Negation,"negations";Mood,"moods";Tense,"tenses";]; "winien",[Lemma,"lemma";(*NewLemma,"newlemma";*)Cat,"cat";Number,"numbers";Gender,"genders";Person,"persons";Aspect,"aspects";Negation,"negations";Mood,"moods";Tense,"tenses";]; "impt",[Lemma,"lemma";(*NewLemma,"newlemma";*)Cat,"cat";Number,"numbers";Gender,"genders";Person,"persons";Aspect,"aspects";Negation,"negations";Mood,"moods";Tense,"tenses";]; "imps",[Lemma,"lemma";(*NewLemma,"newlemma";*)Cat,"cat";Number,"numbers";Gender,"genders";Person,"persons";Aspect,"aspects";Negation,"negations";Mood,"moods";Tense,"tenses";]; "pred",[Lemma,"lemma";(*NewLemma,"newlemma";*)Cat,"cat";Number,"numbers";Gender,"genders";Person,"persons";Aspect,"aspects";Negation,"negations";Mood,"moods";Tense,"tenses";]; "aglt",[Lemma,"lemma";Number,"numbers";Person,"persons";Aspect,"aspects";]; "inf",[Lemma,"lemma";(*NewLemma,"newlemma";*)Cat,"cat";Aspect,"aspects";]; "pcon",[Lemma,"lemma";(*NewLemma,"newlemma";*)Cat,"cat";Aspect,"aspects";]; "pant",[Lemma,"lemma";(*NewLemma,"newlemma";*)Cat,"cat";Aspect,"aspects";]; "qub",[Lemma,"lemma";]; "comp",[Lemma,"lemma";];(* ctype *) "conj",[Lemma,"lemma";];(* ctype *) "interj",[Lemma,"lemma";]; "sinterj",[Lemma,"lemma";]; "burk",[Lemma,"lemma";]; "interp",[Lemma,"lemma";]; "unk",[Lemma,"lemma";Number,"numbers";Case,"cases";Gender,"genders";Person,"persons";]; ] StringMap.empty (fun map (k,v) -> StringMap.add map k v) module OrderedCat = struct type t = cat let compare = compare end module CatMap=Xmap.Make(OrderedCat) let merge_quant pos_quants quants = let map = Xlist.fold quants CatMap.empty (fun map (k,v) -> CatMap.add map k v) in let l,map = Xlist.fold pos_quants ([],map) (fun (l,map) (cat,v) -> if CatMap.mem map cat then (cat,CatMap.find map cat) :: l, CatMap.remove map cat else (cat,v) :: l, map) in List.rev (CatMap.fold map l (fun l cat v -> (cat,v) :: l)) (* FIXME: kopia z ENIAMcategories *) let all_genders = ["m1";"m2";"m3";"f";"n1";"n2";"p1";"p2";"p3"] let all_cases = ["nom";"gen";"dat";"acc";"inst";"loc";"voc"] let all_persons = ["pri";"sec";"ter"] let all_numbers = ["sg";"pl"] (* FIXME: przenieść do ENIAM_LCGrender *) let make_symbol = function [] -> Zero | [s] -> Atom s | l -> With(Xlist.map l (fun s -> Atom s)) let parse_quant_range = function Lemma,"lemma" -> Top | Cat,"" -> Zero | Cat,"cat" -> Top (* | NewLemma,"newlemma" -> Top *) | Number,"" -> Zero | Number,"numbers" -> Top | Number,"all_numbers" -> make_symbol all_numbers | Case,"" -> Zero | Case,"cases" -> Top | Case,"all_cases" -> make_symbol all_cases | Case,"postp" -> Atom "postp" | Case,"nom&gen&dat&acc&inst" -> make_symbol (Xstring.split "&" "nom&gen&dat&acc&inst") | Gender,"" -> Zero | Gender,"genders" -> Top | Gender,"all_genders" -> make_symbol all_genders | Person,"" -> Zero | Person,"persons" -> Top | Person,"all_persons" -> make_symbol all_persons | Person,"ter" -> Atom "ter" | Grad,"grads" -> Top | Praep,"praeps" -> Top | Acm,"acms" -> Top | Aspect,"aspects" -> Top | Negation,"negations" -> Top | Mood,"moods" -> Top | Tense,"tenses" -> Top | Nsyn,"nsyn" -> Top | Nsem,"nsem" -> Top | Ctype,"" -> Zero | Ctype,"sub" -> Atom "sub" | Ctype,"coord" -> Atom "coord" | Ctype,"int&rel" -> make_symbol ["int";"rel"] | Ctype,"int" -> Atom "int" | Ctype,"rel" -> Atom "rel" | Inumber,"" -> Zero | Igender,"" -> Zero | Iperson,"" -> Zero | Nperson,"" -> Zero | Plemma,"" -> Zero | Unumber,"all_numbers" -> make_symbol all_numbers | Ucase,"all_cases" -> make_symbol all_cases | Ugender,"all_genders" -> make_symbol all_genders | Uperson,"all_persons" -> make_symbol all_persons | cat,v -> print_endline ("parse_quant_range: " ^ string_of_cat cat ^ "=" ^ v); Atom v let parse_quants_range quant = Xlist.map quant (fun (cats,v) -> cats, parse_quant_range (cats,v)) let parse_rule pos = function Basic syntax -> let quant = parse_quants_range (StringMap.find pos_categories pos) in false, quant, parse_syntax syntax, BasicSem(Xlist.map quant fst) | Quant(quant,syntax) -> let quant = parse_quants_range (merge_quant (StringMap.find pos_categories pos) quant) in false, quant, parse_syntax syntax, BasicSem(Xlist.map quant fst) | Raised(quant,syntax,semantics) -> let quant = parse_quants_range (merge_quant (StringMap.find pos_categories pos) quant) in false, quant, parse_syntax syntax, RaisedSem(Xlist.map quant fst,semantics) | Quot(quant,syntax) -> let quant = parse_quants_range (merge_quant (StringMap.find pos_categories pos) quant) in false, quant, parse_syntax syntax, QuotSem(Xlist.map quant fst) | Inclusion syntax -> let quant = parse_quants_range (StringMap.find pos_categories pos) in false, quant, parse_syntax syntax, InclusionSem(Xlist.map quant fst) | Conj(quant,syntax) -> let quant = parse_quants_range (merge_quant (StringMap.find pos_categories pos) quant) in false, quant, parse_syntax syntax, ConjSem(Xlist.map quant fst) | Bracket syntax -> let quant = parse_quants_range (StringMap.find pos_categories pos) in true, quant, parse_syntax syntax, BasicSem(Xlist.map quant fst) let parse_grammar grammar = List.rev (Xlist.fold grammar [] (fun grammar (selectors,rule,weight) -> let selectors = parse_selectors selectors in let pos = find_seletor Pos selectors in let rule = try parse_rule pos rule with Not_found -> failwith ("parse_grammar: " ^ pos) in (selectors,rule,weight) :: grammar)) let rec add_quantifiers t = function [] -> t | (cat,s) :: l -> add_quantifiers (WithVar(string_of_cat cat,s,"",t)) l let rec add_quantifiers_simple t = function [] -> t | (cat,s) :: l -> if ENIAM_LCGrenderer.count_avar (string_of_cat cat) t = 0 then add_quantifiers_simple t l else add_quantifiers_simple (WithVar(string_of_cat cat,s,"",t)) l (* FIXME: kopia z ENIAM_LCGlatexOf *) let direction = function Forward -> "/" | Backward -> "\\backslash" | Both -> "|" let atom = function "m1" -> "\\text{m}_1" | "m2" -> "\\text{m}_2" | "m3" -> "\\text{m}_3" | "n1" -> "\\text{n}_1" | "n2" -> "\\text{n}_2" | "f" -> "\\text{f}" | "p1" -> "\\text{p}_1" | "p2" -> "\\text{p}_2" | "p3" -> "\\text{p}_3" | s -> "\\text{" ^ Xlatex.escape_string s ^ "}" let rec latex_of_internal_grammar_symbol c = function Atom x -> atom x | AVar x -> " " ^ x | With l -> let s = String.concat "\\with" (Xlist.map l (latex_of_internal_grammar_symbol 2)) in if c > 1 then "(" ^ s ^ ")" else s | Zero -> "0" | Top -> "\\top" (* argument schema oznacza schemat walencyjny dodawany do reguły dla danego leksemu na podstawie schematu walenycjnego tego leksemu *) (* "..." jako restrykcja kwantyfikatora oznacza, że dozwolone wartości zmiennej są wyznaczone przez interpretację morfosyntaktyczną formy, przykładowo dla formy "zielonemu" będą miały postać ..... *) (* tensor wiąże silniej niż plus i imp *) let quant_newline = function WithVar _ -> "" | _ -> "\\\\ \\hspace{1cm}" let rec latex_of_grammar_symbol c = function Tensor l -> let s = String.concat "\\bullet" (Xlist.map l (latex_of_internal_grammar_symbol 2)) in (*if c > 1 then "(" ^ s ^ ")" else*) s | Plus l -> let s = String.concat "\\oplus" (Xlist.map l (latex_of_grammar_symbol 2)) in if c > 1 then "(" ^ s ^ ")" else s | Imp(s,d,t) -> "(" ^ (latex_of_grammar_symbol 2 s) ^ direction d ^ (latex_of_grammar_symbol 2 t) ^ ")" | One -> "1" | ImpSet(s,l) -> let s = (latex_of_grammar_symbol 1 s) ^ "\\{" ^ String.concat "\n," (Xlist.map l (fun (d,a) -> if a = Tensor[AVar "schema"] then "schema" else direction d ^ latex_of_grammar_symbol 1 a)) ^ "\\}" in if c > 0 then "(" ^ s ^ ")" else s | WithVar(v,Top,e,t) -> "\\bigwith_{" ^ v ^ ":=\\dots} " ^ (quant_newline t) ^ (latex_of_grammar_symbol 2 t) | WithVar(v,s,e,t) -> "\\bigwith_{" ^ v ^ ":=" ^ (latex_of_internal_grammar_symbol 2 s) ^ "} " ^ (quant_newline t) ^ (latex_of_grammar_symbol 2 t) | Star s -> latex_of_grammar_symbol 2 s ^ "^\\star" | Bracket(lf,rf,s) -> "\\langle " ^ (if lf then "\\langle " else "") ^ (latex_of_grammar_symbol 0 s) ^ "\\rangle" ^ (if rf then "\\rangle " else "") | BracketSet d -> "{\\bf BracketSet}(" ^ direction d ^ ")" | Maybe s -> "?" ^ latex_of_grammar_symbol 2 s let latex_of_selectors selectors = String.concat ", " (Xlist.map selectors (fun (cat,rel,l) -> let rel = if rel = Eq then "=" else "!=" in string_of_cat cat ^ rel ^ (String.concat "|" l))) let print_latex_grammar grammar = Printf.printf "grammar size: %d\n" (Xlist.size grammar); Xlatex.latex_file_out "results/" "grammar" "a0" false (fun file -> Xlist.iter grammar (fun (selectors,(bracket,quant,syntax,semantics),weight) -> let syntax = add_quantifiers_simple syntax (List.rev quant) in Printf.fprintf file "%s\\\\\n$\\begin{array}{l}%s\\end{array}$\\\\\\;\\\\\\;\\\\\n" (latex_of_selectors selectors) (latex_of_grammar_symbol 0 syntax))); Xlatex.latex_compile_and_clean "results/" "grammar" let grammar = parse_grammar grammar (* let _ = print_latex_grammar grammar *)