diff --git a/morphology/data/alternations.dic b/morphology/data/alternations.dic index 510ae19..5b89f3f 100644 --- a/morphology/data/alternations.dic +++ b/morphology/data/alternations.dic @@ -191,7 +191,7 @@ p py p r ry r s sy s - š šy š +# š šy š t ty t v vy v z zy z diff --git a/morphology/data/fonetics.dic b/morphology/data/fonetics.dic new file mode 100644 index 0000000..e9d42eb --- /dev/null +++ b/morphology/data/fonetics.dic @@ -0,0 +1,876 @@ +@symbols +ω a ą e ę o ó u +δ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y z ź ż - ε v x q +#γ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y ź ż - ε +#ξ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y - ε +#μ a ą b c ć d e ę f g k l ł m n ń o ó p r s ś t u w y ź ż - ε +λ b c ć d f g h k l ł m n ń p r s ś t w z ź ż - ε v x q +κ b c ć d f g h i j k l ł m n ń p r s ś t w z ź ż - ε v x q +#σ a ą b c ć d e ę f g j h k l ł m n ń o ó p r s ś t u w y z ź ż - ε +ρ aε achε amiε ąε eε ęε iε oε omε onε umε +α ε goε muε mε +ά ε goε muε +β ε chε miε mε +γ aε achε amiε ąε eε ęε iε oε omε onε umε ówε uε owiε omε emε yε owieε emuε egoε goε mε muε ε + +@rev_symbols +ω a ą e ę o ó u {eu} +δ a ą b c ć č d ʒ ǯ e ę f g h x i k l ł m n ń o ó p r ř s ś š t u v w y z ź ž - ε { +λ b c ć č d ʒ ǯ f g h x k l ł m n ń p r ř s ś š t v w z ź ž - ε { q +κ b c ć č d ʒ ǯ f g h x i j ʲ k l ł m n ń p r ř s ś š t v w z ź ž - ε { +ρ aε axε am′iε ąε eε ęε iε oε omε onε umε + +@rev_rules +core p′ pi ω +core p′ p i +core pj pi ρ +core b′ bi ω +core b′ b i +core bj bi ρ +core m′ mi ω +core m′ m i +core m′j mi ρ +core f′ fi ω +core f′ f i +core fj fi ρ +core v′ wi ω +core v′ w i +core v w δ +core vj wi ρ +#core v w j +core t′ ć λ +core t′ ci ω +core t′ c i +core d′ dź λ +core d′ dzi ω +core d′ dz i +core ʒ dz +core tʲ ti ω +core tʲ t i +core tj ti ρ +core dʲ di ω +core dʲ d i +core dj di ρ +core č cz δ +core čʲ czi ω +core čʲ cz i +core č cz j +core ǯ dż δ +core ǯʲ dżi ω +core ǯʲ dż i +core ǯj dżi ρ +core s′ ś λ +core s′ si ω +core s′ s i +core z′ ź λ +core z′ zi ω +core z′ z i +core š sz δ +core š sz j +core šʲ szi ω +core šʲ sz i +core ž ż δ +core žʲ żi ω +core žʲ ż i +core žj żi ρ +core lʲ li ω +core lj li ρ +core ř rz δ +core ř rz j +core rʲ ri ω +core rʲ r i +core rj ri ρ +core n′ ń λ +core n′ ni ω +core n′ n i +core n′j ni ρ +core k′ ki ω +core k′ k i +core k′j ki ρ +core g′ gi ω +core g′ g i +core g′j gi ρ +core x ch δ +core xʲ chi ω +core xʲ ch i +core xʲj chi ρ +core hʲ hi ω +core hʲ h i +core aʲ a i +core eʲ e i +core oʲ o i +core óʲ ó i +core uʲ u i +core iʲ i i +core yʲ y i + +aux {ch}š ch +aux {q}k q +aux {ng}n ng +aux {tch}č tch +aux {sh}š sh +aux {w}ł w +aux {leigh}l leigh +aux {au}ał au +aux {sch}š sch +aux {tsch}č tsch +aux {z}c z +core {lj}lj lj a +core {lj}lj lj e +core {pj}pj pj e +aux {zs}ž zs ε +aux {cs}č cs ε + +aux {ay}aj ay ε +aux {ey}ej ey ε +aux {oy}oj oy ε +aux {ay}aj ay ω +aux {ey}ej ey ω +aux {oy}oj oy ω +aux {ai}aj ai ε +aux {ai}aj ai ω + +aux {dh}dʲ dhi ω +aux {dh}dʲ dh iκ +aux {dh}d dh δ +aux {gh}g′ ghi ω +aux {gh}g′ gh iκ +aux {gh}g gh δ +aux {kh}k′ khi ω +aux {kh}k′ kh iκ +aux {kh}k kh δ +aux {nh}n′ nhi ω +aux {nh}n′ nh iκ +aux {nh}n nh δ +aux {th}tʲ thi ω +aux {th}tʲ th iκ +aux {th}t th δ +core {v}v′ vi ω +core {v}v′ v iκ +core {v}v v δ +core {v}vj vi ρ +core {x}ks′ xi ω +core {x}ks′ x iκ +core {x}ks x δ + +#lemma=e/y gender=m1 +aux {dieu}dʲe dieu ε +aux {dieu}dʲi dieu ε +aux {quieu}k′e quieu ε +aux {quieu}k′i quieu ε +aux {lieu}lʲe lieu ε +aux {lieu}lʲi lieu ε +aux {rie}rʲe rie ε +aux {rie}rʲi rie ε +aux {gie}ǯʲe gie ε +aux {gie}ǯʲi gie ε +aux {kie}k′e kie ε +aux {kie}k′i kie ε +aux {tie}tʲe tie ε +aux {tie}tʲi tie ε +aux {pie}p′e pie ε +aux {pie}p′i pie ε +aux {die}dʲe die ε +aux {die}dʲi die ε +aux {bee}b′e bee ε +aux {bee}b′i bee ε + +#lemma=e gender=m1 +aux {mée}me mée ε +aux {ge}g′e ge ε +aux {ke}k′e ke ε + +#lemma=y gender=m1 +aux {by}b′i by ε +aux {dy}dʲi dy ε +aux {dí}dʲi dí ε +aux {phy}f′i phy ε +aux {guy}g′i guy ε +aux {ky}k′i ky ε +aux {my}m′i my ε +aux {li}li li ε +aux {ly}li ly ε +aux {ry}rʲi ry ε +core {ři}rʲi ři ε +aux {sy}sʲi sy ε +aux {cy}sʲi cy ε +aux {şi}sʲi şi ε +aux {thy}tʲi thy ε + +#lemma=e/ndm gender=m1 +aux {chais}še chais ε +aux {lais}le lais ε +aux {nais}ne nais ε +aux {rès}re rès ε +aux {rés}re rés ε +aux {ré}re ré ε + +#lemma=ε +aux {de}d de ε +aux {fe}f fe ε +aux {phe}f phe ε +aux {ge}ǯ ge ε +aux {ges}ǯ ges ε +aux {gue}g gue ε +aux {gues}g gues ε +aux {ke}k ke ε +aux {que}k que ε +aux {ques}k ques ε +aux {le}l le ε +aux {les}l les ε +aux {me}m me ε +aux {ne}n ne ε +aux {gne}n′ gne ε +aux {re}r re ε +aux {rue}r rue ε +aux {se}s se ε +aux {ce}s ce ε +aux {che}š che ε +aux {te}t te ε +aux {the}t the ε +aux {ve}v ve ε +aux {we}ł we ε +aux {se}z se ε +aux {ge}ž ge ε +aux {oe}oł oe ε + + +aux g′el gel ε + +aux g′e ge ε +aux k′e ke ε +aux k′i ky ε +aux k′i kij ε +aux k′i koj ε +aux k′i kyj ε + s s k′i +aux sk′i szky ε + l l i +aux li ly ε + +aux ks x ε +aux ks kx ε + + r r a +aux r rh a + t t a +aux t th a + k k +aux k kh a +aux ks x a +aux k c a + n n a +aux n nh a + d d a +aux d dh a + g g a +aux g gh a + +aux k c o +aux kk cc o + + + b b ε +aux b bes ε + d d ε +aux d de ε +aux d dh ε + f f ε +aux f phe ε +aux f ph ε + g g ε +aux g gue ε +aux g gues ε + k k ε +aux k ke ε +aux k c ε +aux k que ε +aux k q ε +aux k cq ε +aux k ques ε +aux k cques ε +aux kt ct ε + m m ε +aux m me ε + n n ε +aux n ne ε +aux n nes ε +aux n ng ε +aux n nh ε + r r ε +aux r re ε +aux r res ε +aux r rs ε +aux r rh ε + s s ε +aux s se ε +aux s ce ε +aux s th ε + t t ε +aux t te ε +aux t tes ε +aux t thes ε +aux t th ε +aux t the ε +aux t tt ε + + e e j +aux ej ey ε + + a a ε +aux a ah ε + a a j +aux aja ayah ε + + o o j +aux oja oya ε + +acro {A}a A +acro {B}b B +acro {C}c C +acro {C}k C +acro {Ć}t′ Ć +acro {D}d D +acro {E}e E +acro {F}f F +acro {G}g G +acro {H}h H +acro {I}j I +acro {J}j J +acro {K}k K +acro {L}l L +acro {Ł}ł Ł +acro {M}m M +acro {N}n N +acro {O}o O +acro {P}p P +acro {R}r R +acro {S}s S +acro {Ś}s′ Ś +acro {T}t T +acro {U}u U +acro {V}v V +acro {W}v W +acro {X}ks X +acro {Y}y Y +acro {Z}z Z +acro {Ż}ž Ż +acro {J}jot J +acro {Z}zet Z +acro {Ż}žet Ż +acro {v}v V +acro {x}ks X +acro {j}jot J +acro {z}zet z +acro {ż}žet Ż + +acro a A ε + +@rules +core p′ pi ω +core p′ p iκ +#core p p δ +#core p p j +core pj pi ρ +core b′ bi ω +core b′ b iκ +#core b b δ +core bj bi ρ +#zbitka +#core b b j +core m′ mi ω +core m′ m iκ +#core m m δ +core m′j mi ρ +core f′ fi ω +core f′ f iκ +#core f f δ +core fj fi ρ +core v′ wi ω +core v′ w iκ +core v w δ +core vj wi ρ +#zbitka +#core v w j +core t′ ć λ +#zbitki +#core t′ ć u +#core t′ ć a +#core t′ ć i +core t′ ci ω +#core c c j +core t′ c iκ +#core c c μ +core d′ dź λ +core d′ dzi ω +core d′ dz iκ +core ʒ dz δ +#zbitka +#core ʒ dz j +#core t t δ +#core t t j +core tʲ ti ω +core tʲ t iκ +core tj ti ρ +#core d d ξ +core dʲ di ω +core dʲ d iκ +#core d d j +core dj di ρ +core č cz δ +core čʲ czi ω +core čʲ cz iκ +#core čʲ cz j +core ǯ dż δ +core ǯʲ dżi ω +core ǯʲ dż iκ +core ǯj dżi ρ +core s′ ś λ +core s′ si ω +#core s s j +core s′ s iκ +#core s s γ +core z′ ź λ +core z′ zi ω +#core z z j +core z′ z iκ +#core z z δ +core š sz δ +#core šʲ sz j +core šʲ szi ω +core šʲ sz iκ +core ž ż δ +core žʲ żi ω +core žʲ ż iκ +core žj żi ρ +#core l l δ +core lʲ li ω +core l l iκ +#core l l j +#core ł ł δ +#zbitka +#core ł ł i +#zbitka +#core ł ł j +core lj li ρ +core ř rz δ +#zbitka +#core ř rz j +core r r zi +#core r r γ +#core r r j +core rʲ ri ω +core rʲ r iκ +core rj ri ρ +core n′ ń λ +core n′ ni ω +core n′ n iκ +#core n n δ +#core n n j +core n′j ni ρ +core k′ ki ω +core k′ k iκ +#core k k δ +#core k k j +core k′j ki ρ +core g′ gi ω +core g′ g iκ +#core g g δ +core g′j gi ρ +core x ch δ +core xʲ chi ω +core xʲ ch iκ +core xʲj chi ρ +#core h h δ +core hʲ hi ω +core hʲ h iκ +core mar mar z +core m′er mier z +core n′e nie i +#core a a σ +core aʲ a i +#core a a i +#core e e σ +core eʲ e i +#core e e i +#core o o σ +core oʲ o i +#core o o i +#core ó ó σ +core óʲ ó i +#core ó ó i +#core u u σ +core uʲ u i +#core u u i +#core i i σ +core iʲ i i +#core i i i +#core y y σ +core yʲ y i +#core y y i +#core ą ą σ +#core ę ę σ +#core j j +#core A A +#core B B +#core C C +#core Ć Ć +#core D D +#core E E +#core F F +#core G G +#core H H +#core I I +#core J J +#core K K +#core L L +#core Ł Ł +#core M M +#core N N +#core O O +#core P P +#core R R +#core S S +#core Ś Ś +#core T T +#core U U +#core W W +#core Y Y +#core Z Z +#core Ż Ż +#core - - + +aux {ch}š ch γ +aux {q}k q γ +aux {ng}n ng γ +aux {tch}č tch γ +aux {sh}š sh γ +aux {w}ł w +aux {leigh}l leigh γ +aux {au}ał au γ +aux {sch}š sch γ +aux {tsch}č tsch γ +aux {z}c z γ +core {lj}lj lj γ +core {pj}pj pj γ +aux {zs}ž zs γ +aux {cs}č cs γ + +aux {ay}aj ay γ +aux {ey}ej ey γ +aux {oy}oj oy γ +aux {ai}aj ai γ + +aux {dh}dʲ dhi ω +aux {dh}dʲ dh iκ +aux {dh}d dh δ +aux {gh}g′ ghi ω +aux {gh}g′ gh iκ +aux {gh}g gh δ +aux {kh}k′ khi ω +aux {kh}k′ kh iκ +aux {kh}k kh δ +aux {nh}n′ nhi ω +aux {nh}n′ nh iκ +aux {nh}n nh δ +aux {th}tʲ thi ω +aux {th}tʲ th iκ +aux {th}t th δ +core {v}v′ vi ω +core {v}v′ v iκ +core {v}v v δ +core {v}vj vi ρ +core {x}ks′ xi ω +core {x}ks′ x iκ +core {x}ks x δ + +#lemma=e/y gender=m1 +aux {dieu}dʲe dieu α +aux {dieu}dʲi dieu β +aux {quieu}k′e quieu α +aux {quieu}k′i quieu β +aux {lieu}lʲe lieu α +aux {lieu}lʲi lieu β +aux {rie}rʲe rie α +aux {rie}rʲi rie β +aux {gie}ǯʲe gie α +aux {gie}ǯʲi gie β +aux {kie}k′e kie α +aux {kie}k′i kie β +aux {tie}tʲe tie α +aux {tie}tʲi tie β +aux {pie}p′e pie α +aux {pie}p′i pie β +aux {die}dʲe die α +aux {die}dʲi die β +aux {bee}b′e bee α +aux {bee}b′i bee β + +#lemma=e gender=m1 +aux {mée}me mée α +aux {ge}g′e ge α +aux {ke}k′e ke α + +#lemma=y gender=m1 +aux {by}b′ by ’eά +aux {by}b′i by β +aux {dy}dʲ dy ’eά +aux {dy}dʲ dy eά +aux {dy}dʲ dy owieε +aux {dy}dʲi dy β +aux {dí}dʲ dí eά +aux {dí}dʲ dí owieε +aux {dí}dʲi dí β +aux {phy}f′ phy ’eά +aux {phy}f′i phy β +aux {guy}g′ guy ’eά +aux {guy}g′i guy β +aux {ky}k′ ky ’eά +aux {ky}k′ ky eά +aux {ky}k ky owieε +aux {ky}k′i ky β +aux {my}m′ my ’eά +aux {my}m′i my β +aux {li}l li eά +aux {li}l l iβ +aux {ly}l ly ’eά +aux {ly}li ly β +aux {ry}rʲ ry ’eά +aux {ry}rʲi ry β +core {ři}rʲ ři eά +core {ři}rʲ ři owieε +core {ři}rʲi ři β +aux {sy}sʲ sy ’eά +aux {sy}sʲ sy eά +aux {sy}sʲ sy owieε +aux {sy}sʲi sy β +aux {şi}sʲ şi eά +aux {şi}sʲ şi owieε +aux {şi}sʲi şi β +aux {cy}sʲ cy ’eά +aux {cy}sʲi cy β +aux {thy}tʲ thy ’eά +aux {thy}tʲ thy eά +aux {thy}tʲ thy owieε +aux {thy}tʲi thy β + +#lemma=e/ndm gender=m1 +aux {chais}še chais ’ +aux {lais}le lais ’ +aux {nais}ne nais ’ +aux {rès}re rès ’ +aux {rés}re rés ’ +aux {ré}re ré ά + +#lemma=ε +aux {de}d de ’ +aux {de}d de ε +aux {fe}f fe ’ +aux {fe}f fe ε +aux {phe}f phe ’ +aux {phe}f phe ε +aux {ge}ǯ ge ’ +aux {ge}ǯ ge ε +aux {ges}ǯ ges ’ +aux {ges}ǯ ges ε +aux {gue}g gue ’ +aux {gue}g gue ε +aux {gues}g gues ’ +aux {gues}g gues ε +aux {ke}k ke ’ +aux {ke}k ke ε +aux {que}k que ’ +aux {que}k que ε +aux {ques}k ques ’ +aux {ques}k ques ε +aux {le}l le ’ +aux {le}l le ε +aux {les}l les ’ +aux {les}l les ε +aux {me}m me ’ +aux {me}m me ε +aux {ne}n ne ’ +aux {ne}n ne ε +aux {gne}n′ gne ’ +aux {gne}n′ gne ε +aux {re}r re ’ +aux {re}r re ε +aux {rue}r rue ’ +aux {rue}r rue ε +aux {se}s se ’ +aux {se}s se ε +aux {ce}s ce ’ +aux {ce}s ce ε +aux {che}š che ’ +aux {che}š che ε +aux {te}t te ’ +aux {te}t te ε +aux {the}t the ’ +aux {the}t the ε +aux {ve}v ve ’ +aux {ve}v ve ε +aux {we}ł we ’ +aux {we}ł we ε +aux {se}z se ’ +aux {se}z se ε +aux {ge}ž ge ’ +aux {ge}ž ge ε +#z wyjątkiem sg:loc.voc +aux {oe}oł oe ’ +aux {oe}oł oe ε + +aux ’ γ +# ’ ’ owi + + n n g + z z γ + b b e + b b y + d d y + m m y + s s y + c c y + r r y + k k y + l l y + l l a + n n a + r r è + d d e + f f e + p p h + g g e + g g u + k k e + q q u + l l e + m m e + n n e + g g n + r r e + s s e + c c e +# c c h + t t e + t t h + v v e + w w e + s s e + o o e + m m é + a a y + e e y + o o y + +acro a A ε + +acro {A}a A +acro {B}b B +acro {C}c C +acro {C}k C +acro {Ć}t′ Ć +acro {D}d D +acro {E}e E +acro {F}f F +acro {G}g G +acro {H}h H +acro {I}j I +acro {J}j J +acro {K}k K +acro {L}l L +acro {Ł}ł Ł +acro {M}m M +acro {N}n N +acro {O}o O +acro {P}p P +acro {R}r R +acro {S}s S +acro {Ś}s′ Ś +acro {T}t T +acro {U}u U +acro {V}v V +acro {W}v W +acro {X}ks X +acro {Y}y Y +acro {Z}z Z +acro {Ż}ž Ż +acro {J}jot J-ot δ +acro {Z}zet Z-et δ +acro {Ż}žet Ż-et δ +acro {z}zet z-et δ + +acro {B}b′e B-ie ε +acro {D}d′e D-zie ε +acro {F}f′e F-ie ε +acro {M}m′e M-ie ε +acro {N}n′e N-ie ε +acro {P}p′e P-ie ε +acro {R}ře R-ze ε +acro {S}s′e S-ie ε +acro {T}t′e -cie ε +acro {V}v′e V-ie ε +acro {W}v′e W-ie ε +acro {X}ks′e X-ie ε +acro {Z}z′e Z-ie ε +acro {J}jot′e J-ocie ε +acro {Z}zet′e Z-ecie ε +acro {Ż}žet′e Ż-ecie ε + +acro {B}b′e Bie ε +acro {D}d′e Dzie ε +acro {F}f′e Fie ε +acro {M}m′e Mie ε +acro {N}n′e Nie ε +acro {P}p′e Pie ε +acro {R}ře Rze ε +acro {S}s′e Sie ε +acro {T}t′e cie ε +acro {V}v′e Vie ε +acro {W}v′e Wie ε +acro {X}ks′e Xie ε +acro {Z}z′e Zie ε +acro {J}jot′e Jocie ε +acro {Z}zet′e Zecie ε +acro {Ż}žet′e Żecie ε + +acro b′e b-ie ε +acro d′e d-zie ε +acro f′e f-ie ε +acro m′e m-ie ε +acro n′e n-ie ε +acro p′e p-ie ε +acro ře r-ze ε +acro s′e s-ie ε +acro t′e -cie ε +core {v}v′e v-ie ε +acro v′e w-ie ε +core {x}ks′e x-ie ε +acro z′e z-ie ε +acro {j}jot′e j-ocie ε +acro {z}zet′e z-ecie ε +acro {ż}žet′e ż-ecie ε + +acro {T}t′e CIE ε + a a cieε + e e cieε + i i cieε + o o cieε + u u cieε +acro {A}a{T}t′e acie ε +acro {E}e{T}t′e ecie ε +acro {I}j{T}t′e icie ε +acro {O}o{T}t′e ocie ε +acro {U}u{T}t′e ucie ε + +acro {C}k′i C-i ε +acro {C}k′em C-iem ε +acro {E}eʲi E-i ε +acro {G}g′i G-i ε +acro {G}g′em G-iem ε +acro {J}ji J-i ε +acro {K}k′i K-i ε +acro {K}k′em K-iem ε +acro {L}li L-i ε +acro - γ + + diff --git a/morphology/data/fonetics_acro.dic b/morphology/data/fonetics_acro.dic deleted file mode 100644 index 2ff073b..0000000 --- a/morphology/data/fonetics_acro.dic +++ /dev/null @@ -1,170 +0,0 @@ -@symbols -δ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y z ź ż - ε v x q -γ aε achε amiε ąε eε ęε iε oε omε onε umε ówε uε owiε omε emε yε owieε emuε egoε goε mε muε ε - - -@rev_symbols - - -@rev_rules -acro {A}a A -acro {B}b B -acro {C}c C -acro {C}k C -acro {Ć}t′ Ć -acro {D}d D -acro {E}e E -acro {F}f F -acro {G}g G -acro {H}h H -acro {I}j I -acro {J}j J -acro {K}k K -acro {L}l L -acro {Ł}ł Ł -acro {M}m M -acro {N}n N -acro {O}o O -acro {P}p P -acro {R}r R -acro {S}s S -acro {Ś}s′ Ś -acro {T}t T -acro {U}u U -acro {V}v V -acro {W}v W -acro {X}ks X -acro {Y}y Y -acro {Z}z Z -acro {Ż}ž Ż -acro {J}jot J -acro {Z}zet Z -acro {Ż}žet Ż -acro {v}v V -acro {x}ks X -acro {j}jot J -acro {z}zet z -acro {ż}žet Ż - -acro a A ε - - -@rules -acro {A}a A δ -acro {B}b B δ -acro {C}c C δ -acro {C}k C δ -acro {Ć}t′ Ć δ -acro {D}d D δ -acro {E}e E δ -acro {F}f F δ -acro {G}g G δ -acro {H}h H δ -acro {I}j I δ -acro {J}j J δ -acro {K}k K δ -acro {L}l L δ -acro {Ł}ł Ł δ -acro {M}m M δ -acro {N}n N δ -acro {O}o O δ -acro {P}p P δ -acro {R}r R δ -acro {S}s S δ -acro {Ś}s′ Ś δ -acro {T}t T δ -acro {U}u U δ -acro {V}v V δ -acro {W}v W δ -acro {X}ks X δ -acro {Y}y Y δ -acro {Z}z Z δ -acro {Ż}ž Ż δ -acro {J}jot J-ot δ -acro {Z}zet Z-et δ -acro {Ż}žet Ż-et δ -acro {z}zet z-et δ - -acro {B}b′e B-ie ε -acro {D}d′e D-zie ε -acro {F}f′e F-ie ε -acro {M}m′e M-ie ε -acro {N}n′e N-ie ε -acro {P}p′e P-ie ε -acro {R}ře R-ze ε -acro {S}s′e S-ie ε -acro {T}t′e -cie ε -acro {V}v′e V-ie ε -acro {W}v W-ie ε -acro {X}ks′e X-ie ε -acro {Z}z Z-ie ε -acro {J}jot′e J-ocie ε -acro {Z}zet′e Z-ecie ε -acro {Ż}žet′e Ż-ecie ε - -acro {B}b′e Bie ε -acro {D}d′e Dzie ε -acro {F}f′e Fie ε -acro {M}m′e Mie ε -acro {N}n′e Nie ε -acro {P}p′e Pie ε -acro {R}ře Rze ε -acro {S}s′e Sie ε -acro {T}t′e cie ε -acro {V}v′e Vie ε -acro {W}v Wie ε -acro {X}ks′e Xie ε -acro {Z}z Zie ε -acro {J}jot′e Jocie ε -acro {Z}zet′e Zecie ε -acro {Ż}žet′e Żecie ε - -acro b′e b-ie ε -acro d′e d-zie ε -acro f′e f-ie ε -acro m′e m-ie ε -acro n′e n-ie ε -acro p′e p-ie ε -acro ře r-ze ε -acro s′e s-ie ε -acro t′e -cie ε -acro {v}v′e v-ie ε -acro v w-ie ε -acro {x}ks′e x-ie ε -acro z z-ie ε -acro {j}jot′e j-ocie ε -acro {z}zet′e z-ecie ε -acro {ż}žet′e ż-ecie ε - -acro {T}t′e CIE ε -acro {A}a{T}t′e acie ε -acro {E}e{T}t′e ecie ε -acro {I}j{T}t′e icie ε -acro {O}o{T}t′e ocie ε -acro {U}u{T}t′e ucie ε - -acro {C}k′i C-i ε -acro {C}k′em C-iem ε -acro {G}g′i G-i ε -acro {G}g′em G-iem ε -acro {J}ji J-i ε -acro {K}k′i K-i ε -acro {K}k′em K-iem ε -acro {L}li L-i ε -acro - γ - a a ε - ax ach ε - am′i ami ε - ą ą ε - e e ε - ę ę ε - em em ε - o o ε - om om ε - ov′i owi ε - ov′e owie ε - u u ε - óv ów ε - y y ε - - diff --git a/morphology/data/fonetics_acro_old.dic b/morphology/data/fonetics_acro_old.dic deleted file mode 100644 index ee2cd79..0000000 --- a/morphology/data/fonetics_acro_old.dic +++ /dev/null @@ -1,81 +0,0 @@ -@symbols - - -@rev_symbols - - -@rev_rules -!a A -!b B -!c C -!t′ Ć -!d D -!e E -!f F -!g G -!h H -!i I -#!j J -!k K -!l L -!ł Ł -!m M -!n N -!o O -!p P -!r R -!s S -!s′ Ś -!t T -!u U -!v W -!y Y -!z Z -!ż Ż -#x ch ε -ax -ach ε -ę -ę ε -ʲi -i ε -a A ε -!jot′e J-ocie ε -!jotax J-otach ε -!jot J ε -t′e -cie ε - - -@rules -!a A -!b B -!c C -!t′ Ć -!d D -!e E -!f F -!g G -!h H -!i I -#!jot J -!k K -!l L -!ł Ł -!m M -!n N -!o O -!p P -!r R -!s S -!s′ Ś -!t T -!u U -!v W -!y Y -!z Z -!ž Ż -ax -ach ε -ę -ę ε -ʲi -i ε -!jot′e J-ocie ε -!jotax J-otach ε -t′e -cie ε - - diff --git a/morphology/data/fonetics_de.dic b/morphology/data/fonetics_de.dic deleted file mode 100644 index 111cfcb..0000000 --- a/morphology/data/fonetics_de.dic +++ /dev/null @@ -1,88 +0,0 @@ -@symbols -ω a ą e ę o ó u -δ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y z ź ż - ε -γ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y ź ż - ε -ξ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y - ε -μ a ą b c ć d e ę f g k l ł m n ń o ó p r s ś t u w y ź ż - ε -λ b c ć d f g h k l ł m n ń p r s ś t w z ź ż - ε -κ b c ć d f g h i j k l ł m n ń p r s ś t w z ź ż - ε -σ a ą b c ć d e ę f g j h k l ł m n ń o ó p r s ś t u w y z ź ż - ε -ρ aε achε amiε ąε eε ęε iε oε omε onε umε ówε uε owiε omε emε yε owieε emuε egoε goε mε muε ε - -@rev_symbols -ω a ą e ę o ó u -δ a ą b c ć č d ʒ ǯ e ę f g h x i k l ł m n ń o ó p r ř s ś š t u v w y z ź ž - ε -λ b c ć č d ʒ ǯ f g h x k l ł m n ń p r ř s ś š t v w z ź ž - ε -κ b c ć č d ʒ ǯ f g h x i j ʲ k l ł m n ń p r ř s ś š t v w z ź ž - ε -ρ aε axε am′iε ąε eε ęε iε oε omε onε umε óvε uε ov′iε omε emε ε - -@rev_rules -de r rh -de r r -de ta tha -de ks x - -de g′e ge ε -de k′e ke ε - -de k′i ky ε - - -@rules -de š sch -de r rh -de c z -de ał au - -de ře rze ε -de ks′e ksie ε -de t′e cie ε -de v′e vie ε - -de óv ów ε -de ov′i owi ε -de ov′e owie ε -de am′i ami ε -de ax ach ε - -de g′e ge ch -de g′e ge go -de g′e ge m -de g′e ge mi -de g′e ge mu -de k′e ke ch -de k′e ke go -de k′e ke m -de k′e ke mi -de k′e ke mu -de k′i ky ch -de k′i ky mi -de k′i ky m -de x ch ε -de m′i mi ε - -de kj ky ’ -de ’ ρ - -de góv gów ε -de gov′i gowi ε -de gov′e gowie ε -de gam′i gami ε -de gax gach ε -de gom gom ε -de kóv ków ε -de kov′i kowi ε -de kov′e kowie ε -de kam′i kami ε -de kax kach ε -de kom kom ε - -de k′ix kich ε -de k′e kie ε -de k′im kim ε -de k′im′i kimi ε -de k′ego kiego ε -de k′emu kiemu ε - - - diff --git a/morphology/data/fonetics_en.dic b/morphology/data/fonetics_en.dic deleted file mode 100644 index acf207d..0000000 --- a/morphology/data/fonetics_en.dic +++ /dev/null @@ -1,212 +0,0 @@ -@symbols -ω a ą e ę o ó u -δ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y z ź ż - ε -γ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y ź ż - ε -ξ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y - ε -μ a ą b c ć d e ę f g k l ł m n ń o ó p r s ś t u w y ź ż - ε -λ b c ć d f g h k l ł m n ń p r s ś t w z ź ż - ε -κ b c ć d f g h i j k l ł m n ń p r s ś t w z ź ż - ε -σ a ą b c ć d e ę f g j h k l ł m n ń o ó p r s ś t u w y z ź ż - ε -ρ aε achε amiε ąε eε ęε iε oε omε onε umε ówε uε owiε omε emε yε owieε emuε egoε ε - -@rev_symbols -ω a ą e ę o ó u -δ a ą b c ć č d ʒ ǯ e ę f g h x i k l ł m n ń o ó p r ř s ś š t u v w y z ź ž - ε -λ b c ć č d ʒ ǯ f g h x k l ł m n ń p r ř s ś š t v w z ź ž - ε -κ b c ć č d ʒ ǯ f g h x i j ʲ k l ł m n ń p r ř s ś š t v w z ź ž - ε -ρ aε axε am′iε ąε eε ęε iε oε omε onε umε óvε uε ov′iε omε emε ε - -@rev_rules -en ks x -en č tch -en ł w -en aj ay ρ -en ej ey ρ -en oj oy ρ - -en d de ε -en d d ε -en f ph ε -en k ke ε -en k k ε -en k kes ε -en k c -en k ky ε -en t te ε -en t t ε -en t th -en t the ε -en r re ε -en r r ε -en r rh ε -en m me ε -en m m ε -en n ne ε -en n n ε -en s se ε -en s s ε -en s ce ε -en s th ε -en s the ε - -en a a ε -en a ah ε -en ka kha ε -en vja via ε - -en ʲi ie ε -en ʲi i ε -en ʲi y ε - -@rules -en ks x -en č tch -en ł w -en d dh -en g gh -en k kh -en l leigh -en aj ay ρ -en ej ey ρ -en oj oy ρ -en aʲi ai ε -en oʲi oi ε -en eʲi ei ε -en uʲi ui ε - -en vj vi ρ -en v′ij vij ε - -en ce ce ε -en f′e fie ε -en ře rze ε -en t′e cie ε -en kt′e kcie ε -en s′t′e ście ε -en m′e mie ε -en n′e nie ε -en d′e dzie ε -en p′e pie ε -en s′e sie ε -en ks′e ksie ε -en v′e vie ε -en z′e zie ε - -en k′i ki ε -en k′em kiem ε -en k k iem -en řy rzy ε - - -en ʲi ie ch -en ʲi ie mi -en ʲi ie m -en ʲi ee ch -en ʲi ee mi -en li ly ch -en li ly mi -en li ly m -en ʲego iego ε -en ʲemu iemu ε -en ʲem iem ε -en ʲim im ε -en ʲix ich ε -en ʲe ie ε -en ʲim′i imi ε -en ʲov′e iowie ε -en ′e ie m -en ′i i ε -en i i ε -en x ch ε -en m′i mi ε -en óv ów ε -en ov′i owi ε -en ov′e owie ε -en am′i ami ε -en ax ach ε - -en oł oe ’ -en b by ’ -en k′ ky ’ -en r ry ’ -en t thy ’ -en d dy ’ -en m my ’ -en s cy ’ -en l ly ’ -en d dy ’ -en s ce ’ -en d de ’ -en f fe ’ -en ǯ ge ’ -en k ke ’ -en l le ’ -en m me ’ -en n ne ’ -en r re ’ -en s se ’ -en t te ’ -en t the ’ -en v ve ’ -en ł we ’ -en ’ ρ - -en s ce ε -en d de ε -en f fe ε -en ǯ ge ε -en k ke ε -en l le ε -en le le ε -en m me ε -en me me ε -en n ne ε -en r re ε -en re re ε -en s se ε -en t te ε -en v ve ε -en ł we ε - - -#en os′t′ ość ε -#en os′t′ą ością ε -#en os′t′om ościom ε -#en os′t′am′i ościami ε -#en os′t′ax ościach ε -#en os′t′i ości ε -#en ovą ową ε -#en ovym′i owymi ε -#en ovym owym ε -#en ovyx owych ε -#en ovy owy ε -#en ovo owo ε -#en ov′i owi ε -#en ovemu owemu ε -#en ovej owej ε -#en ovego owego ε -#en ove owe ε -#en ova owa ε -#en oły owy ε -#en oła owa ε -#en k′ix kich ε -#en k′e kie ε -#en k′ej kiej ε -#en k′im kim ε -#en k′im′i kimi ε -#en ka ka ε -#en ką ką ε -#en ko ko ε -#en ku ku ε -#en če cze ε -#en n′ja nia ε -#en n′ją nią ε -#en n′je nie ε -#en n′ję nię ε -#en n′ji nii ε -#en n′ij nij ε -#en n′jom niom ε -#en n′jo nio ε -#en n′jax niach ε -#en n′jam′i niami ε - diff --git a/morphology/data/fonetics_fr.dic b/morphology/data/fonetics_fr.dic deleted file mode 100644 index 43c028d..0000000 --- a/morphology/data/fonetics_fr.dic +++ /dev/null @@ -1,121 +0,0 @@ -@symbols -ω a ą e ę o ó u -δ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y z ź ż - ε -γ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y ź ż - ε -ξ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y - ε -μ a ą b c ć d e ę f g k l ł m n ń o ó p r s ś t u w y ź ż - ε -λ b c ć d f g h k l ł m n ń p r s ś t w z ź ż - ε -κ b c ć d f g h i j k l ł m n ń p r s ś t w z ź ż - ε -σ a ą b c ć d e ę f g j h k l ł m n ń o ó p r s ś t u w y z ź ż - ε -ρ aε achε amiε ąε eε ęε iε oε omε onε umε ówε uε owiε omε emε yε owieε emuε egoε goε mε muε ε - -@rev_symbols -ω a ą e ę o ó u -δ a ą b c ć č d ʒ ǯ e ę f g h x i k l ł m n ń o ó p r ř s ś š t u v w y z ź ž - ε -λ b c ć č d ʒ ǯ f g h x k l ł m n ń p r ř s ś š t v w z ź ž - ε -κ b c ć č d ʒ ǯ f g h x i j ʲ k l ł m n ń p r ř s ś š t v w z ź ž - ε -ρ aε axε am′iε ąε eε ęε iε oε omε onε umε óvε uε ov′iε omε emε ε - -@rev_rules -fr ks x -fr b bes ε -fr d d ε -fr d de ε -fr f phe ε -fr g gue ε -fr g gues ε -fr k c ε -fr k que ε -fr k q ε -fr k cq ε -fr k ques ε -fr k cques ε -fr m me ε -fr n n ε -fr n ne ε -fr n nes ε -fr n′ ng ε -fr r r ε -fr r re ε -fr r res ε -fr r rs ε -fr s s ε -fr s se ε -fr s ce ε -fr t t ε -fr t tes ε -fr t thes ε -fr z se ε - -@rules -fr k q -fr n ng - -fr b′e bie ε -fr d′e dzie ε -fr f′e fie ε -fr m′e mie ε -fr n′e nie ε -fr s′n′e śnie ε -fr ře rze ε -fr s′e sie ε -fr ks′e ksie ε -fr t′e cie ε -fr s′t′e ście ε -fr v′e vie ε -fr z′e zie ε - -fr řy rzy ε -fr t′i ci ε -fr v′i vi ε - -fr g′em giem ε -fr g′i gi ε -fr k′em kiem ε -fr k′i ki ε - -fr ʲi ieu ch -fr ʲe ieu go -fr ʲi ieu m -fr ʲi ieu mi -fr ʲe ieu mu -fr llj lli ego -fr llj lli emu -fr llj lli e -fr e ée go -fr e ée m -fr e ée mu -fr x ch ε -fr m′i mi ε - -fr óv ów ε -fr ov′i owi ε -fr ov′e owie ε -fr am′i ami ε -fr ax ach ε - -fr e ès ’ -fr e ais ’ -fr k que ’ -fr f phe ’ -fr sj sy ’ -fr fj phy ’ -fr rj ry ’ -fr š che ’ -fr d de ’ -fr r re ’ -fr z se ’ -fr ž ge ’ -fr # ine ’ -fr t te ’ -fr l le ’ -fr m me ’ -fr n′ gne ’ -fr n ne ’ -fr v ve ’ -fr g gue ’ -fr s ce ’ -fr gj guy ’ -fr ’ ρ - - diff --git a/morphology/data/fonetics_pl.dic b/morphology/data/fonetics_pl.dic deleted file mode 100644 index c0b90ea..0000000 --- a/morphology/data/fonetics_pl.dic +++ /dev/null @@ -1,790 +0,0 @@ -@symbols -ω a ą e ę o ó u -δ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y z ź ż - ε v x q -#γ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y ź ż - ε -#ξ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y - ε -#μ a ą b c ć d e ę f g k l ł m n ń o ó p r s ś t u w y ź ż - ε -λ b c ć d f g h k l ł m n ń p r s ś t w z ź ż - ε v x q -κ b c ć d f g h i j k l ł m n ń p r s ś t w z ź ż - ε v x q -#σ a ą b c ć d e ę f g j h k l ł m n ń o ó p r s ś t u w y z ź ż - ε -ρ aε achε amiε ąε eε ęε iε oε omε onε umε -α ε goε muε mε -ά ε goε muε -β ε chε miε mε -γ aε achε amiε ąε eε ęε iε oε omε onε umε ówε uε owiε omε emε yε owieε emuε egoε goε mε muε ε - -@rev_symbols -ω a ą e ę o ó u {eu} -δ a ą b c ć č d ʒ ǯ e ę f g h x i k l ł m n ń o ó p r ř s ś š t u v w y z ź ž - ε { -λ b c ć č d ʒ ǯ f g h x k l ł m n ń p r ř s ś š t v w z ź ž - ε { q -κ b c ć č d ʒ ǯ f g h x i j ʲ k l ł m n ń p r ř s ś š t v w z ź ž - ε { -ρ aε axε am′iε ąε eε ęε iε oε omε onε umε - -@rev_rules -pl p′ pi ω -pl p′ p i -pl pj pi ρ -pl b′ bi ω -pl b′ b i -pl bj bi ρ -pl m′ mi ω -pl m′ m i -pl m′j mi ρ -pl f′ fi ω -pl f′ f i -pl fj fi ρ -pl v′ wi ω -pl v′ w i -pl v w δ -pl vj wi ρ -#pl v w j -pl t′ ć λ -pl t′ ci ω -pl t′ c i -pl d′ dź λ -pl d′ dzi ω -pl d′ dz i -pl ʒ dz -pl tʲ ti ω -pl tʲ t i -pl tj ti ρ -pl dʲ di ω -pl dʲ d i -pl dj di ρ -pl č cz δ -pl čʲ czi ω -pl čʲ cz i -pl č cz j -pl ǯ dż δ -pl ǯʲ dżi ω -pl ǯʲ dż i -pl ǯj dżi ρ -pl s′ ś λ -pl s′ si ω -pl s′ s i -pl z′ ź λ -pl z′ zi ω -pl z′ z i -pl š sz δ -pl š sz j -pl šʲ szi ω -pl šʲ sz i -pl ž ż δ -pl žʲ żi ω -pl žʲ ż i -pl žj żi ρ -pl lʲ li ω -pl lj li ρ -pl ř rz δ -pl ř rz j -pl rʲ ri ω -pl rʲ r i -pl rj ri ρ -pl n′ ń λ -pl n′ ni ω -pl n′ n i -pl n′j ni ρ -pl k′ ki ω -pl k′ k i -pl k′j ki ρ -pl g′ gi ω -pl g′ g i -pl g′j gi ρ -pl x ch δ -pl xʲ chi ω -pl xʲ ch i -pl xʲj chi ρ -pl hʲ hi ω -pl hʲ h i -pl aʲ a i -pl eʲ e i -pl oʲ o i -pl óʲ ó i -pl uʲ u i -pl iʲ i i -pl yʲ y i - -fr {ch}š ch -fr {q}k q -fr {ng}n ng -en {tch}č tch -en {w}ł w -en {leigh}l leigh -de {au}ał au -de {sch}š sch -de {tsch}č tsch -de {z}c z - -en {ay}aj ay ε -en {ey}ej ey ε -en {oy}oj oy ε -en {ay}aj ay ω -en {ey}ej ey ω -en {oy}oj oy ω - -en {dh}dʲ dhi ω -en {dh}dʲ dh iκ -en {dh}d dh δ -en {gh}g′ ghi ω -en {gh}g′ gh iκ -en {gh}g gh δ -en {kh}k′ khi ω -en {kh}k′ kh iκ -en {kh}k kh δ -en {th}tʲ thi ω -en {th}tʲ th iκ -en {th}t th δ -en {v}v′ vi ω -en {v}v′ v iκ -en {v}v v δ -en {v}vj vi ρ -de-en-fr {x}ks′ xi ω -de-en-fr {x}ks′ x iκ -de-en-fr {x}ks x δ - -#lemma=e/y gender=m1 -fr {dieu}dʲe dieu ε -fr {dieu}dʲi dieu ε -fr {quieu}k′e quieu ε -fr {quieu}k′i quieu ε -fr {lieu}lʲe lieu ε -fr {lieu}lʲi lieu ε -en {rie}rʲe rie ε -en {rie}rʲi rie ε -en {gie}ǯʲe gie ε -en {gie}ǯʲi gie ε -en {kie}k′e kie ε -en {kie}k′i kie ε -en {tie}tʲe tie ε -en {tie}tʲi tie ε -en {pie}p′e pie ε -en {pie}p′i pie ε -en {die}dʲe die ε -en {die}dʲi die ε -en {bee}b′e bee ε -en {bee}b′i bee ε - -#lemma=e gender=m1 -fr {mée}me mée ε -de {ge}g′e ge ε -de {ke}k′e ke ε - -#lemma=y gender=m1 -en {by}b′i by ε -en {dy}dʲi dy ε -es {dí}dʲi dí ε -fr {phy}f′i phy ε -fr {guy}g′i guy ε -de-en {ky}k′i ky ε -en {my}m′i my ε -fr {li}li li ε -en {ly}li ly ε -en-fr {ry}rʲi ry ε -fr {sy}sʲi sy ε -en {cy}sʲi cy ε -en {thy}tʲi thy ε - -#lemma=e/ndm gender=m1 -fr {chais}še chais ε -fr {lais}le lais ε -fr {nais}ne nais ε -fr {rès}re rès ε - -#lemma=ε -en-fr {de}d de ε -en {fe}f fe ε -fr {phe}f phe ε -en {ge}ǯ ge ε -fr {gue}g gue ε -en {ke}k ke ε -fr {que}k que ε -en-fr {le}l le ε -en-fr {me}m me ε -en-fr {ne}n ne ε -fr {gne}n′ gne ε -en-fr {re}r re ε -en {se}s se ε -en-fr {ce}s ce ε -fr {che}š che ε -en-fr {te}t te ε -en {the}t the ε -en-fr {ve}v ve ε -en {we}ł we ε -fr {se}z se ε -fr {ge}ž ge ε -en {oe}oł oe ε - - -de g′el gel ε - -de g′e ge ε -de k′e ke ε -de k′i ky ε -de-en-fr ks x ε - - r r a -de r rh a - t t a -de t th a - k k -en k kh a -de-en-fr ks x a -en k c a - -en k c o - - b b ε -fr b bes ε - d d ε -en-fr d de ε - f f ε -fr f phe ε -en f ph ε - g g ε -fr g gue ε -fr g gues ε - k k ε -fr k c ε -fr k que ε -fr k q ε -fr k cq ε -fr k ques ε -fr k cques ε -en kt ct ε - m m ε -en-fr m me ε - n n ε -en-fr n ne ε -fr n nes ε -fr n ng ε - r r ε -en-fr r re ε -fr r res ε -fr r rs ε -en r rh ε - s s ε -en-fr s se ε -en-fr s ce ε -en s th ε - t t ε -fr t te ε -fr t tes ε -fr t thes ε -en t th ε -en t the ε - - e e j -en ej ey ε - - a a ε -en a ah ε -en aja ayah ε - -acro {A}a A -acro {B}b B -acro {C}c C -acro {C}k C -acro {Ć}t′ Ć -acro {D}d D -acro {E}e E -acro {F}f F -acro {G}g G -acro {H}h H -acro {I}j I -acro {J}j J -acro {K}k K -acro {L}l L -acro {Ł}ł Ł -acro {M}m M -acro {N}n N -acro {O}o O -acro {P}p P -acro {R}r R -acro {S}s S -acro {Ś}s′ Ś -acro {T}t T -acro {U}u U -acro {V}v V -acro {W}v W -acro {X}ks X -acro {Y}y Y -acro {Z}z Z -acro {Ż}ž Ż -acro {J}jot J -acro {Z}zet Z -acro {Ż}žet Ż -acro {v}v V -acro {x}ks X -acro {j}jot J -acro {z}zet z -acro {ż}žet Ż - -acro a A ε - -@rules -pl p′ pi ω -pl p′ p iκ -#pl p p δ -#pl p p j -pl pj pi ρ -pl b′ bi ω -pl b′ b iκ -#pl b b δ -pl bj bi ρ -#zbitka -#pl b b j -pl m′ mi ω -pl m′ m iκ -#pl m m δ -pl m′j mi ρ -pl f′ fi ω -pl f′ f iκ -#pl f f δ -pl fj fi ρ -pl v′ wi ω -pl v′ w iκ -pl v w δ -pl vj wi ρ -#zbitka -#pl v w j -pl t′ ć λ -#zbitki -#pl t′ ć u -#pl t′ ć a -#pl t′ ć i -pl t′ ci ω -#pl c c j -pl t′ c iκ -#pl c c μ -pl d′ dź λ -pl d′ dzi ω -pl d′ dz iκ -pl ʒ dz δ -#zbitka -#pl ʒ dz j -#pl t t δ -#pl t t j -pl tʲ ti ω -pl tʲ t iκ -pl tj ti ρ -#pl d d ξ -pl dʲ di ω -pl dʲ d iκ -#pl d d j -pl dj di ρ -pl č cz δ -pl čʲ czi ω -pl čʲ cz iκ -#pl čʲ cz j -pl ǯ dż δ -pl ǯʲ dżi ω -pl ǯʲ dż iκ -pl ǯj dżi ρ -pl s′ ś λ -pl s′ si ω -#pl s s j -pl s′ s iκ -#pl s s γ -pl z′ ź λ -pl z′ zi ω -#pl z z j -pl z′ z iκ -#pl z z δ -pl š sz δ -#pl šʲ sz j -pl šʲ szi ω -pl šʲ sz iκ -pl ž ż δ -pl žʲ żi ω -pl žʲ ż iκ -pl žj żi ρ -#pl l l δ -pl lʲ li ω -pl l l iκ -#pl l l j -#pl ł ł δ -#zbitka -#pl ł ł i -#zbitka -#pl ł ł j -pl lj li ρ -pl ř rz δ -#zbitka -#pl ř rz j -pl r r zi -#pl r r γ -#pl r r j -pl rʲ ri ω -pl rʲ r iκ -pl rj ri ρ -pl n′ ń λ -pl n′ ni ω -pl n′ n iκ -#pl n n δ -#pl n n j -pl n′j ni ρ -pl k′ ki ω -pl k′ k iκ -#pl k k δ -#pl k k j -pl k′j ki ρ -pl g′ gi ω -pl g′ g iκ -#pl g g δ -pl g′j gi ρ -pl x ch δ -pl xʲ chi ω -pl xʲ ch iκ -pl xʲj chi ρ -#pl h h δ -pl hʲ hi ω -pl hʲ h iκ -pl mar mar z -pl m′er mier z -pl n′e nie i -#pl a a σ -pl aʲ a i -#pl a a i -#pl e e σ -pl eʲ e i -#pl e e i -#pl o o σ -pl oʲ o i -#pl o o i -#pl ó ó σ -pl óʲ ó i -#pl ó ó i -#pl u u σ -pl uʲ u i -#pl u u i -#pl i i σ -pl iʲ i i -#pl i i i -#pl y y σ -pl yʲ y i -#pl y y i -#pl ą ą σ -#pl ę ę σ -#pl j j -#pl A A -#pl B B -#pl C C -#pl Ć Ć -#pl D D -#pl E E -#pl F F -#pl G G -#pl H H -#pl I I -#pl J J -#pl K K -#pl L L -#pl Ł Ł -#pl M M -#pl N N -#pl O O -#pl P P -#pl R R -#pl S S -#pl Ś Ś -#pl T T -#pl U U -#pl W W -#pl Y Y -#pl Z Z -#pl Ż Ż -#pl - - -#pl χ́ chi -#pl h́ hi - -fr {ch}š ch γ -fr {q}k q γ -fr {ng}n ng γ -en {tch}č tch γ -en {w}ł w -en {leigh}l leigh γ -de {au}ał au γ -de {sch}š sch γ -de {tsch}č tsch γ -de {z}c z γ - -en {ay}aj ay γ -en {ey}ej ey γ -en {oy}oj oy γ - -en {dh}dʲ dhi ω -en {dh}dʲ dh iκ -en {dh}d dh δ -en {gh}g′ ghi ω -en {gh}g′ gh iκ -en {gh}g gh δ -en {kh}k′ khi ω -en {kh}k′ kh iκ -en {kh}k kh δ -en {th}tʲ thi ω -en {th}tʲ th iκ -en {th}t th δ -en {v}v′ vi ω -en {v}v′ v iκ -en {v}v v δ -en {v}vj vi ρ -de-en-fr {x}ks′ xi ω -de-en-fr {x}ks′ x iκ -de-en-fr {x}ks x δ - -#lemma=e/y gender=m1 -fr {dieu}dʲe dieu α -fr {dieu}dʲi dieu β -fr {quieu}k′e quieu α -fr {quieu}k′i quieu β -fr {lieu}lʲe lieu α -fr {lieu}lʲi lieu β -en {rie}rʲe rie α -en {rie}rʲi rie β -en {gie}ǯʲe gie α -en {gie}ǯʲi gie β -en {kie}k′e kie α -en {kie}k′i kie β -en {tie}tʲe tie α -en {tie}tʲi tie β -en {pie}p′e pie α -en {pie}p′i pie β -en {die}dʲe die α -en {die}dʲi die β -en {bee}b′e bee α -en {bee}b′i bee β - -#lemma=e gender=m1 -fr {mée}me mée α -de {ge}g′e ge α -de {ke}k′e ke α - -#lemma=y gender=m1 -en {by}b′ by ’eά -en {by}b′i by β -en {dy}dʲ dy ’eά -en {dy}dʲi dy β -es {dí}dʲ dí eά -es {dí}dʲi dí β -fr {phy}f′ phy ’eά -fr {phy}f′i phy β -fr {guy}g′ guy ’eά -fr {guy}g′i guy β -de-en {ky}k′ ky ’eά -de-en {ky}k′i ky β -en {my}m′ my ’eά -en {my}m′i my β -fr {li}l li eά -fr {li}l l iβ -en {ly}l ly ’eά -en {ly}li ly β -en-fr {ry}rʲ ry ’eά -en-fr {ry}rʲi ry β -fr {sy}sʲ sy ’eά -fr {sy}sʲi sy β -en {cy}sʲ cy ’eά -en {cy}sʲi cy β -en {thy}tʲ thy ’eά -en {thy}tʲi thy β - -#lemma=e/ndm gender=m1 -fr {chais}še chais ’ -fr {lais}le lais ’ -fr {nais}ne nais ’ -fr {rès}re rès ’ - -#lemma=ε -en-fr {de}d de ’ -en-fr {de}d de ε -en {fe}f fe ’ -en {fe}f fe ε -fr {phe}f phe ’ -fr {phe}f phe ε -en {ge}ǯ ge ’ -en {ge}ǯ ge ε -fr {gue}g gue ’ -fr {gue}g gue ε -en {ke}k ke ’ -en {ke}k ke ε -fr {que}k que ’ -fr {que}k que ε -en-fr {le}l le ’ -en-fr {le}l le ε -en-fr {me}m me ’ -en-fr {me}m me ε -en-fr {ne}n ne ’ -en-fr {ne}n ne ε -fr {gne}n′ gne ’ -fr {gne}n′ gne ε -en-fr {re}r re ’ -en-fr {re}r re ε -en {se}s se ’ -en {se}s se ε -en-fr {ce}s ce ’ -en-fr {ce}s ce ε -fr {che}š che ’ -fr {che}š che ε -en-fr {te}t te ’ -en-fr {te}t te ε -en {the}t the ’ -en {the}t the ε -en-fr {ve}v ve ’ -en-fr {ve}v ve ε -en {we}ł we ’ -en {we}ł we ε -fr {se}z se ’ -fr {se}z se ε -fr {ge}ž ge ’ -fr {ge}ž ge ε -#z wyjątkiem sg:loc.voc -en {oe}oł oe ’ -en {oe}oł oe ε - -de-en-fr ’ γ - - n n g - z z γ - b b e - b b y - d d y - m m y - s s y - c c y - r r y - k k y - l l y - l l a - n n a - r r è - d d e - f f e - p p h - g g e - g g u - k k e - q q u - l l e - m m e - n n e - g g n - r r e - s s e - c c e - c c h - t t e - t t h - v v e - w w e - s s e - o o e - m m é - a a y - e e y - o o y - -acro a A ε - -acro {A}a A -acro {B}b B -acro {C}c C -acro {C}k C -acro {Ć}t′ Ć -acro {D}d D -acro {E}e E -acro {F}f F -acro {G}g G -acro {H}h H -acro {I}j I -acro {J}j J -acro {K}k K -acro {L}l L -acro {Ł}ł Ł -acro {M}m M -acro {N}n N -acro {O}o O -acro {P}p P -acro {R}r R -acro {S}s S -acro {Ś}s′ Ś -acro {T}t T -acro {U}u U -acro {V}v V -acro {W}v W -acro {X}ks X -acro {Y}y Y -acro {Z}z Z -acro {Ż}ž Ż -acro {J}jot J-ot δ -acro {Z}zet Z-et δ -acro {Ż}žet Ż-et δ -acro {z}zet z-et δ - -acro {B}b′e B-ie ε -acro {D}d′e D-zie ε -acro {F}f′e F-ie ε -acro {M}m′e M-ie ε -acro {N}n′e N-ie ε -acro {P}p′e P-ie ε -acro {R}ře R-ze ε -acro {S}s′e S-ie ε -acro {T}t′e -cie ε -acro {V}v′e V-ie ε -acro {W}v W-ie ε -acro {X}ks′e X-ie ε -acro {Z}z Z-ie ε -acro {J}jot′e J-ocie ε -acro {Z}zet′e Z-ecie ε -acro {Ż}žet′e Ż-ecie ε - -acro {B}b′e Bie ε -acro {D}d′e Dzie ε -acro {F}f′e Fie ε -acro {M}m′e Mie ε -acro {N}n′e Nie ε -acro {P}p′e Pie ε -acro {R}ře Rze ε -acro {S}s′e Sie ε -acro {T}t′e cie ε -acro {V}v′e Vie ε -acro {W}v Wie ε -acro {X}ks′e Xie ε -acro {Z}z Zie ε -acro {J}jot′e Jocie ε -acro {Z}zet′e Zecie ε -acro {Ż}žet′e Żecie ε - -acro b′e b-ie ε -acro d′e d-zie ε -acro f′e f-ie ε -acro m′e m-ie ε -acro n′e n-ie ε -acro p′e p-ie ε -acro ře r-ze ε -acro s′e s-ie ε -acro t′e -cie ε -acro {v}v′e v-ie ε -acro v w-ie ε -acro {x}ks′e x-ie ε -acro z z-ie ε -acro {j}jot′e j-ocie ε -acro {z}zet′e z-ecie ε -acro {ż}žet′e ż-ecie ε - -acro {T}t′e CIE ε -acro {A}a{T}t′e acie ε -acro {E}e{T}t′e ecie ε -acro {I}j{T}t′e icie ε -acro {O}o{T}t′e ocie ε -acro {U}u{T}t′e ucie ε - -acro {C}k′i C-i ε -acro {C}k′em C-iem ε -acro {G}g′i G-i ε -acro {G}g′em G-iem ε -acro {J}ji J-i ε -acro {K}k′i K-i ε -acro {K}k′em K-iem ε -acro {L}li L-i ε -acro - γ - - diff --git a/morphology/data/fonetics_pl_old.dic b/morphology/data/fonetics_pl_old.dic deleted file mode 100644 index 0422ff2..0000000 --- a/morphology/data/fonetics_pl_old.dic +++ /dev/null @@ -1,127 +0,0 @@ -@symbols -ω a ą e ę o ó u -δ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y z ź ż - ε -γ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y ź ż - ε -ξ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y - ε -μ a ą b c ć d e ę f g k l ł m n ń o ó p r s ś t u w y ź ż - ε -λ b c ć d f g h k l ł m n ń p r s ś t w z ź ż - ε -κ b c ć d f g h j k l ł m n ń p r s ś t w z ź ż - ε -σ a ą b c ć d e ę f g i h k l ł m n ń o ó p r s ś t u w y z ź ż - ε - -@rules -ṕ pi ω -ṕ p iκ -p p δ -b́ bi ω -b′ bj ω -b́ b iκ -b b δ -f́ fi ω -f́ f iκ -f f δ -v́ wi ω -v́ w iκ -v w δ -ć ć λ -ć ci ω -c′ cj ω -ć c iκ -c c μ -ʒ́ dź λ -ʒ́ dzi ω -ʒ́ dz iκ -ʒ dz δ -t t δ -t′ ti ω -t′ t iκ -d d ξ -d′ di ω -d′ d iκ -č cz δ -ǯ dż δ -ś ś λ -ś si ω -s′ sj ω -ś s iκ -s s γ -ź ź λ -ź zi ω -z′ zj ω -ź z iκ -z z δ -š sz δ -ž ż δ -l l δ -l′ li ω -l′ l iκ -ł ł δ -ř rz δ -r r γ -r′ ri ω -r′ r iκ -ḿ mi ω -ḿ m iκ -m m δ -ń ń λ -ń ni ω -ń n iκ -n n δ -ḱ ki ω -ḱ k iκ -k k δ -ǵ gi ω -ǵ g iκ -g g δ -χ ch δ -x′ chi ω -x′ ch iκ -h h δ -h′ hi ω -h′ h iκ -a a σ -a′ aj -e e σ -e′ ej -o o σ -o′ oj -ó ó σ -ó′ ój -u u σ -u′ uj -i i σ -i′ ij -y y σ -y′ yj -ą ą σ -ę ę σ -j j -A A -B B -C C -D D -E E -F F -G G -H H -I I -J J -K K -L L -Ł Ł -M M -N N -O O -P P -R R -S S -Ś Ś -T T -U U -W W -Y Y -Z Z -Ż Ż -- - -#χ́ chi -#h́ hi - diff --git a/morphology/data/interp_rules.dic b/morphology/data/interp_rules.dic index c27631d..31b6bf7 100644 --- a/morphology/data/interp_rules.dic +++ b/morphology/data/interp_rules.dic @@ -334,8 +334,10 @@ * cat=ndm subst:sg.pl:nom.gen.dat.acc.inst.loc.voc:n:ncol # 836 Aachen Age Aires:s Angeles Argos:s1 Arkansas:s1 Arkansas:s3 Arnhem Auschwitz Austerlitz Baku Betlejem Birkenau Bizancjum Bonn Borneo Borodino Buffalo:s1 Burgas CB-radio * cat=ndm subst:sg.pl:nom.gen.dat.acc.inst.loc.voc:m3 # 147 Aachen Arkansas:s1 Arkansas:s2 Bator Kansas:s2 Kara-Kum Kent Kyzył-Kum Michigan Missisipi:s1 Mississippi:s2 Missouri:s1 Montgomery:s3 Szoa Tet Ułan Westminster Windows Wisconsin Woodstock * cat=ndm subst:sg.pl:nom.gen.dat.acc.inst.loc.voc:m2 # 36 aguti aksis argali bankiwa boa:s1 czau-czau dingo dodo emu ferrari galago grizli kagu karibu kiwi:s2 koati koendu kuandu maki mehari -#* cat=ndm subst:sg.pl:nom.gen.dat.acc.inst.loc.voc:m1 # 90 Allende Ante Bambo Bantu Bartoszcze Belmondo Brando Buffalo:s2 Capone Caruso Castro Crusoe Curie Defoe Diego Domingo Fo Gard Giuseppe Hugo:s2 +* cat=ndm subst:sg.pl:nom.gen.dat.acc.inst.loc.voc:m1 # 90 Allende Ante Bambo Bantu Bartoszcze Belmondo Brando Buffalo:s2 Capone Caruso Castro Crusoe Curie Defoe Diego Domingo Fo Gard Giuseppe Hugo:s2 +* cat=ndm subst:sg.pl:nom.gen.dat.acc.inst.loc.voc:m1|depr:pl:nom.acc.voc:m2 # * cat=ndm subst:sg.pl:nom.gen.dat.acc.inst.loc.voc:f # 150 Abisag Abiszag Akropolis Ananke Arachne Arkansas:s2 Ata:s Babel Beatrycze Berenike Carmen Colorado:s2 Dafne Demeter Dolores Doris Dudziak:s2 Edith Ferdydurke Gabi +* cat=ndm subst:pl:nom.gen.dat.acc.inst.loc.voc:n:pt # * cat=noun flex=ego lemma=a subst:sg:gen.acc:m1 # 9 burgrabia gograbia grabia hrabia margrabia murgrabia sędzia:s1 wicehrabia współsędzia * cat=noun flex=ego lemma=a subst:sg:gen:m1 # 9 burgrabia gograbia grabia hrabia margrabia murgrabia sędzia:s1 wicehrabia współsędzia @@ -424,8 +426,6 @@ * cat=adj flex=i lemma=ε adj:pl:nom:m1:pos # 4 kontent:a niekontent:a nierad:a rad:a * cat=adj flex=ych lemma=ε adj:pl:gen.loc:m1.m2.m3.f.n:pos|adj:pl:acc:m1:pos # 22 czyj jeden:a mój:a nasz niczyj niejeden:a1 niejeden:a2 niesamoswój nieswój niewart oboj obój:a pewien sam:a samojeden samoswój swój twój:a wart:a wasz * cat=adj flex=ymi lemma=ε adj:pl:inst:m1.m2.m3.f.n:pos # 23 czyj jeden:a mój:a nasz niczyj niejeden:a1 niejeden:a2 niesamoswój nieswój niewart oboj obój:a pewien sam:a samojeden samoswój swój twój:a wart:a wasz -* cat=adj flex=ε lemma=ε adj:sg.pl:nom.gen.dat.acc.inst.loc.voc:m1.m2.m3.f.n:pos|adja # 1 pop:a -* cat=adj flex=ε lemma=ε adj:sg.pl:nom.gen.dat.acc.inst.loc.voc:m1.m2.m3.f.n:pos # 26 a-z a-ż beż:a blond:a ciemnoblond:a ck cool:a dum-dum fest:a frysz ful:a git:a instant jasnoblond lilaróż:a maksi nonajron:a okej:a open perfekt:a cat=adj:grad pref=ε flex=ą lemma=y adj:sg:acc.inst:f:com # 1076 absurdalny adekwatny agresywny aktualny aktywny akuratny ambitny apetyczny arystokratyczny atrakcyjny autentyczny baczny bajeczny banalny barwny bezbożny:a bezbronny bezczelny bezecny bezpieczny cat=adj:grad pref=ε flex=ymi lemma=y adj:pl:inst:m1.m2.m3.f.n:com # 1076 absurdalny adekwatny agresywny aktualny aktywny akuratny ambitny apetyczny arystokratyczny atrakcyjny autentyczny baczny bajeczny banalny barwny bezbożny:a bezbronny bezczelny bezecny bezpieczny diff --git a/morphology/dict.ml b/morphology/dict.ml index d269b64..3d55257 100644 --- a/morphology/dict.ml +++ b/morphology/dict.ml @@ -172,6 +172,7 @@ let split_language lang_filename in_path filename out_path = let dict_map = Xlist.fold dict StringMap.empty (fun dict_map e -> try let lang = StringMap.find map e.lemma in + (* let lang = "all" in *) StringMap.add_inc dict_map lang [e] (fun l -> e :: l) with Not_found -> dict_map) in StringMap.iter dict_map (fun lang dict -> @@ -214,10 +215,43 @@ let rec get_aspect lemma = function | _ -> get_aspect lemma l) | [] -> failwith ("get_aspect: " ^ lemma) +let epsilon_lemmata = StringSet.of_list [ + "Berkeley"; "Blake"; "Bourdelle"; "Boyle"; "Boy"; "Braille"; "Braque"; "Brooke"; "Halley"; + "Constable"; "Corneille"; "Delavigne"; "Doyle"; "Drake"; "Dunaway"; "Faraday"; "Gable"; "Gay"; + "George"; "Heaney"; "Google"; "Desargues"; "Disney"; "Doumergue"; "Gaulle"; "Hemingway"; "Hubble"; + "Huxley"; "Jokai"; "Joule"; "Jókai"; "Lisle"; "Locke"; "Losey"; "Macaulay"; "May"; + "McCartney"; "Remarque"; "Searle"; "Shelley"; "Sisley"; "Stanley"; "Thackeray"; "Updike"; "Winfrey"; + "assemblage"; "boutique"; "branle"; "cartridge"; "chippendale"; "collage"; "collége"; "decoupage"; "dodge"; + "entourage"; "freestyle"; "grisaille"; "grunge"; "joule"; "quiche"; "remake"; "rocaille"; "scrabble"; + "siècle"; "playboy"; "oldboy"; "jockey"; "image"; "college"; "cockney"; "Montaigne"; ""; + "allemande"; "anglaise"; "beguine"; "breakdance"; "courante"; "ensemble"; "high-life"; "iPhone"; "iphone"; + "rolls-royce"; "Redgrave"; "cowboy"; "drive"; "Nightingale"; "Presley"; ""; ""; ""; + ""; ""; ""; ""; ""; ""; ""; ""; ""; + ""; ""; ""; ""; ""; ""; ""; ""; ""; + ""; ""; ""; ""; ""; ""; ""; ""; ""; + ] + +let e_lemmata = StringSet.of_list [ + "Barrès"; "Beaumarchais"; "Marchais"; "Montesquieu"; "Rabelais"; "Resnais"; "Richelieu"; ""; ""; + ""; ""; ""; ""; ""; ""; ""; ""; ""; + ""; ""; ""; ""; ""; ""; ""; ""; ""; + ] + +let y_lemmata = StringSet.of_list [ + "Bogorodckij"; "Gaudí"; "Szeptyćkyj"; "Toynbee"; "Trubieckoj"; "Wołżskij"; ""; ""; ""; + ""; ""; ""; ""; ""; ""; ""; ""; ""; + ""; ""; ""; ""; ""; ""; ""; ""; ""; + ""; ""; ""; ""; ""; ""; ""; ""; ""; + ] + let get_lemma_suf lemma = + let lemma = Stem.simplify_lemma lemma in + if StringSet.mem epsilon_lemmata lemma then "ε" else + if StringSet.mem e_lemmata lemma then "e" else + if StringSet.mem y_lemmata lemma then "y" else let lemma_suf = if lemma = "" then "" else - List.hd (List.rev (Xunicode.utf8_chars_of_utf8_string (Stem.simplify_lemma lemma))) in + List.hd (List.rev (Xunicode.utf8_chars_of_utf8_string lemma)) in match lemma_suf with "a" -> "a" | "e" -> "e" @@ -366,7 +400,7 @@ let merge_interps lemma forms = | _,["depr:pl:nom.acc.voc:m2";"subst:sg.pl:nom.gen.dat.acc.inst.loc.voc:m1"] -> {empty_form with orth=orth; interp="subst:sg.pl:nom.gen.dat.acc.inst.loc.voc:m1|depr:pl:nom.acc.voc:m2"} :: forms | _,[interp] -> {empty_form with orth=orth; interp=interp} :: forms | _,interps -> - (* print_endline ("merge_interps: " (*^ lemma_suf*) ^ " [\"" ^ String.concat "\";\"" interps ^ "\"]"); *) + (* print_endline ("merge_interps: " ^ lemma_suf ^ " " ^ orth ^ " [\"" ^ String.concat "\";\"" interps ^ "\"]"); *) Xlist.fold interps forms (fun forms interp -> {empty_form with orth=orth; interp=interp} :: forms)) @@ -460,7 +494,7 @@ let find_kolwiek_suffixes dict = let exceptional_lemmata = StringSet.of_list ([ (* wiele stemów *) - "Apollo"; "Aujeszky"; "Białystok"; "Gózd"; "Krasnystaw"; "Różanystok"; "Wielkanoc"; "białagłowa"; +(* "Apollo"; "Aujeszky"; "Białystok"; "Gózd"; "Krasnystaw"; "Różanystok"; "Wielkanoc"; "białagłowa"; "deszcz"; "imćpan"; "iściec"; "otrząs"; "rzeczpospolita"; "wilczełyko"; "woleoczko"; "prapraojciec"; "praojciec"; "ojciec"; "współbrat"; "spółbrat"; "półbrat"; "brat"; @@ -569,9 +603,12 @@ let exceptional_lemmata = StringSet.of_list ([ "nieunixowość"; "niesurvivalowy"; "niesurvivalowość"; "niesoftware’owy"; "niesoftware’owość"; "nierock’n’rollowy"; "nierock’n’rollowość"; "nierock’n’rollowo"; "niequmrańskość"; "niequmrański"; "niequizowy"; "niequizowość"; "niequeerowość"; "niequeerowo"; "niequadowy"; "nieoxfordzkość"; "nieoxfordzki"; "nienovellowskość"; "nienovellowski"; "nienewage'owy"; "nieliverpoolskość"; "nieliverpoolski"; "niekickboxingowy"; "niekickboxingowość"; "nieheavymetalowy"; "nieheavymetalowość"; "nieheavymetalowo"; "nieharvardzkość"; "nieharvardzki"; "niedeveloperskość"; - "niedeveloperski"; "niedaviscupowy"; "niedaviscupowość"; "niebrexitowość"; "nieampexowy"; "nieampexowość"; "newage'owy"; "newage'owość"; "newage'owo"; "liverpoolskość"; "liverpoolski"; + "niedeveloperski"; "niedaviscupowy"; "niedaviscupowość"; "niebrexitowość"; "nieampexowy"; "nieampexowość"; "newage'owy"; "newage'owość"; "newage'owo"; "liverpoolskość"; "liverpoolski";*) + "Akademgorodok"; "berceuse"; "colloquium"; "cornflakes"; "dacia"; "Dziubanii"; "epeisodion"; "facsimile"; "felicia"; "Garcia"; "Giedroyc"; "glediczia"; + "głasnost"; "hippie"; "Kodaly"; "KRRiT"; "lancia"; "Murii"; "Nagy"; "PAGART"; "paparazzo"; "Praha"; "pudźa"; "Selye"; + "welwiczia"; "yuppi"; "yuppie"; "Zápolya"; "Zrínyi"; ""; ""; ""; ""; ""; ""; ""; ] (*@ File.load_lines "../morphology/data/obce.tab" @ File.load_lines "../morphology/data/akronimy.tab" @*) - @ File.fold_tab "../morphology/data/obce_langs.tab" [] (fun l x -> List.hd x :: l) @ + (*@ File.fold_tab "../morphology/data/obce_langs.tab" [] (fun l x -> List.hd x :: l)*) @ (*File.load_lines "../morphology/data/nieregularne.tab" @ File.load_lines "results/interp_validated_verb.tab" @ *) (*File.load_lines "results/interp_validated_noun.tab" @ File.load_lines "results/interp_validated_adj.tab" @ File.load_lines "../morphology/data/validated_adj.tab" @ File.load_lines "../morphology/data/validated_noun.tab" @ @@ -619,21 +656,57 @@ let lowercase_lu dict = lemma2=Xunicode.lowercase_utf8_string entry.lemma2}) -let fonetic_translation dict = - Xlist.fold dict [] (fun dict e -> - try +let fonetic_translation_entry e = (* let lemma = Stem.simplify_lemma e.lemma in *) (* let phon_lemma = Fonetics.translate_and_check true Fonetics.rules Fonetics.rev_rules lemma in *) - let phon_stem = Fonetics.translate(*_and_check*) true Fonetics.rules (*Fonetics.rev_rules*) e.stem in + let rules = match e.cat with + "noun" -> Fonetics.rules + | "" -> failwith "fonetic_translation" + | _ -> Fonetics.core_rules in + let phon_stem = Fonetics.translate true rules e.stem in {e with (*phon_lemma = phon_lemma;*) phon_stem=Xlist.map phon_stem (fun s -> s.phon); forms = Xlist.map e.forms (fun f -> - let phon_orth = Fonetics.translate(*_and_check*) true Fonetics.rules (*Fonetics.rev_rules*) f.orth in - {f with phon_orth = phon_orth})} :: dict + let phon_orth = Fonetics.translate true rules f.orth in + {f with phon_orth = phon_orth})} + +(* let fonetic_translation dict = + Xlist.fold dict [] (fun dict e -> + try (fonetic_translation_entry e) :: dict with Fonetics.NotFound(x,s) -> printf "NF %s %s %s\n%!" e.lemma x s; dict | Fonetics.NotEqual(x,s,t) -> printf "NE %s %s %s %s\n%!" e.lemma x s t; dict | Fonetics.MulipleSolutions(x,s,l) -> printf "MS %s %s %s: %s\n%!" e.lemma x s (String.concat " " l); dict - | _ -> dict) + | _ -> dict) *) + +let select_rev_rules e = + match e.cat with + "noun" -> Fonetics.rev_rules + | "" -> failwith "select_rev_rules" + | _ -> Fonetics.core_rev_rules + +let create_candidates interp_flag rules e = + let e = try fonetic_translation_entry e with _ -> print_endline ("fonetic_translation_entry: " ^ e.lemma); {e with forms=[]} in + let simple_lemma = Stem.simplify_lemma e.lemma in + let phon_rev_rules = select_rev_rules e in + let forms = Xlist.rev_map e.forms (fun f -> + (* if f.orth = "poljom" then printf "phon_orths: \n %s\n%!" (String.concat "\n " (Xlist.map f.phon_orth Fonetics.string_of_phon)); *) + let candidates = Xlist.fold f.phon_orth [] (fun candidates s -> + Xlist.fold (Rules.CharTrees.find rules s.phon) candidates (fun candidates (stem,rule) -> + let candidate_lemmas = Fonetics.translate_simple true phon_rev_rules (stem ^ rule.set) in + let b = Xlist.fold candidate_lemmas false (fun b candidate_lemma -> + (* if f.orth = "poljom" then (if candidate_lemma = simple_lemma then printf "E" else printf " "); + if f.orth = "poljom" then printf " %s %s %s %s\n%!" s.phon stem (string_of_rule rule) candidate_lemma; *) + candidate_lemma = simple_lemma || b) in + if b && ((not interp_flag) || f.interp = rule.interp) then (stem,rule,s) :: candidates else candidates)) in + {f with candidates=candidates}) in + {e with forms=forms} + +let phon_validate rules dict = + Xlist.rev_map dict (fun entry -> + let entry = create_candidates false rules entry in + let forms = Xlist.rev_map entry.forms (fun form -> + if form.candidates = [] then {form with validated=false} else {form with validated=true}) in + {entry with forms=forms}) (*let validate rules dict = Xlist.rev_map dict (fun entry -> @@ -645,18 +718,6 @@ let fonetic_translation dict = if candidates = [] then {form with validated=false} else {form with validated=true}) in {entry with forms=forms})*) -let phon_validate rules dict = - Xlist.rev_map dict (fun entry -> - let simple_lemma = Stem.simplify_lemma entry.lemma in - let forms = Xlist.rev_map entry.forms (fun form -> - let candidates = Xlist.fold form.phon_orth [] (fun candidates s -> - Xlist.fold (Rules.CharTrees.find rules s.phon) candidates (fun candidates (stem,rule) -> - let candidate_lemmas = Fonetics.rev_translate2 true Fonetics.rev_rules (stem ^ rule.set) s.mapping in - let b = Xlist.fold candidate_lemmas false (fun b candidate_lemma -> candidate_lemma = simple_lemma || b) in - if b then (stem,rule) :: candidates else candidates)) in - if candidates = [] then {form with validated=false} else {form with validated=true}) in - {entry with forms=forms}) - let validate_lu rules dict = Xlist.rev_map dict (fun entry -> let candidates1 = Rules.CharTrees.find rules entry.lemma1 in @@ -674,20 +735,19 @@ let validate_interp rules dict = Xlist.rev_map dict (fun entry -> let simple_lemma = Stem.simplify_lemma entry.lemma in let forms = Xlist.rev_map entry.forms (fun form -> - let candidates = Xlist.fold form.phon_orth [] (fun candidates s -> - Xlist.fold (Rules.CharTrees.find rules s.phon) candidates (fun candidates (stem,rule) -> - (* if rule.star = Ndm && not entry.ndm then candidates else - if rule.star <> Ndm && entry.ndm then candidates else *) - (* let candidate_lemma = Fonetics.rev_translate true Fonetics.rev_rules (stem ^ rule.set) s.mapping in - if candidate_lemma = simple_lemma && form.interp = rule.interp then - (stem,rule) :: candidates else candidates)) in *) - let candidate_lemmas = Fonetics.rev_translate2 true Fonetics.rev_rules (stem ^ rule.set) s.mapping in - let b = Xlist.fold candidate_lemmas false (fun b candidate_lemma -> candidate_lemma = simple_lemma || b) in - if b && form.interp = rule.interp then - (stem,rule) :: candidates else candidates)) in + let candidates = Xlist.fold (Rules.CharTrees.find rules form.orth) [] (fun candidates (stem,rule) -> + if stem ^ rule.set = simple_lemma && form.interp = rule.interp then + (stem,rule) :: candidates else candidates) in if candidates = [] then ((*printf "validate_interp: %s\t%s\t%s\n" form.orth entry.lemma form.interp;*) {form with validated=false}) else {form with validated=true}) in {entry with forms=forms}) +let phon_validate_interp rules dict = + Xlist.rev_map dict (fun entry -> + let entry = create_candidates true rules entry in + let forms = Xlist.rev_map entry.forms (fun form -> + if form.candidates = [] then {form with validated=false} else {form with validated=true}) in + {entry with forms=forms}) + let remove_validated_forms dict = Xlist.fold dict [] (fun dict entry -> let forms = Xlist.fold entry.forms [] (fun forms form -> @@ -791,11 +851,11 @@ let generate_rules rules path filename rules_filename = let dict = remove_exceptional_lemmata dict in let dict = find_kolwiek_suffixes dict in (* FIXME: lematy z kolwiek_suffixes nie są walidowane *) let dict = generate_stem dict in - let dict = fonetic_translation dict in let dict = phon_validate rules dict in let dict = remove_validated_forms dict in let dict = remove_sup_neg_forms dict in (* FIXME *) let rules = Xlist.fold dict StringMap.empty (fun rules entry -> + let entry = fonetic_translation_entry entry in Xlist.fold (RuleGenerator.phon_generate_rules_entry entry) rules (fun rules (key,rule) -> let rules2 = try StringMap.find rules key with Not_found -> StringMap.empty in let rules2 = StringMap.add_inc rules2 rule (1,[entry.lemma]) (fun (q,l) -> q+1, if q < 20 then entry.lemma :: l else l) in @@ -837,13 +897,13 @@ let generate_interp_rules rules interp_rules selected_tags path filename rules_f let dict = remove_exceptional_lemmata dict in (* let dict = find_kolwiek_suffixes dict in *) (* let dict = generate_stem dict in *) - let dict = fonetic_translation dict in - let dict = validate_interp interp_rules dict in + let dict = phon_validate_interp interp_rules dict in let dict = remove_validated_forms dict in let interp_rules = Xlist.fold dict StringMap.empty (fun interp_rules entry -> - let simple_lemma = Stem.simplify_lemma entry.lemma in + (* let simple_lemma = Stem.simplify_lemma entry.lemma in *) + let entry = create_candidates false rules entry in Xlist.fold entry.forms interp_rules (fun interp_rules form -> - let candidates = RuleGenerator.phon_generate_interp_rules rules selected_tags simple_lemma form in + let candidates = RuleGenerator.phon_generate_interp_rules (*rules*) selected_tags (*simple_lemma*) form in Xlist.fold candidates interp_rules (fun interp_rules (v,cand) -> (* StringMap.add_inc interp_rules cand (1,[entry.lemma]) (fun (q,l) -> q+1, if q < 20 then entry.lemma :: l else l)))) in *) StringMap.add_inc interp_rules cand (v,StringSet.singleton entry.lemma) (fun (v,set) -> v,StringSet.add set entry.lemma)))) in @@ -862,6 +922,12 @@ let generate_interp_rules rules interp_rules selected_tags path filename rules_f fst (Rules.RuleQMap.fold freq_rules (Rules.RuleQMap.empty,1) (fun (freq_rules,i) rule freq -> Rules.RuleQMap.add_val freq_rules {rule with id = "N" ^ string_of_int i} freq, i+1)) *) +let manage_x_lemma stem suffix lemma = + match suffix with + "s" -> if Xstring.check_sufix "x" lemma && Xstring.check_sufix "k" stem then Xstring.cut_sufix "k" stem else stem + | "sa" -> if Xstring.check_sufix "xa" lemma && Xstring.check_sufix "k" stem then Xstring.cut_sufix "k" stem else stem + | _ -> stem + let generate_rule_frequencies rules path filename rules_filename = let dict = load_tab (path ^ filename) in let dict = merge_entries dict in @@ -871,16 +937,81 @@ let generate_rule_frequencies rules path filename rules_filename = let freq_rules = generate_ndm_rules (remove_not_ndm dict) in let dict = remove_ndm dict in *) let dict = remove_exceptional_lemmata dict in - let dict = generate_stem dict in - let dict = fonetic_translation dict in + (* let dict = generate_stem dict in *) let freq_rules = Xlist.fold dict Rules.RuleQMap.empty(*freq_rules*) (fun freq_rules entry -> + let entry = create_candidates true rules entry in let simple_lemma = Stem.simplify_lemma entry.lemma in + (* print_endline simple_lemma; *) Xlist.fold entry.forms freq_rules (fun freq_rules form -> - let candidates = Rules.CharTrees.find rules form.orth in + let candidates = Xlist.fold form.candidates [] (fun candidates (stem,rule,s) -> + let x_stem = manage_x_lemma stem rule.set simple_lemma in + let rule,pref_stem,short_stem = match rule.pref with + "naj" -> + let pref_stem = Fonetics.get_short_stem "" ("naj" ^ x_stem) s.mapping in + rule, pref_stem, Xstring.cut_prefix "naj" pref_stem + | "n′e" -> + let pref_stem = Fonetics.get_short_stem "" ("nie" ^ x_stem) s.mapping in + {rule with pref="nie"}, pref_stem, Xstring.cut_prefix "nie" pref_stem + | "" -> + let pref_stem = Fonetics.get_short_stem "" x_stem s.mapping in + rule, pref_stem, pref_stem + | _ -> failwith "generate_rule_frequencies" in + (* printf "%s %s %s\n%!" simple_lemma stem pref_stem; *) + let rule = {rule with + find = Xstring.cut_prefix pref_stem form.orth; + set = Xstring.cut_prefix short_stem simple_lemma} in + (stem,rule) :: candidates) in + (* printf "%s %s %d\n%!" simple_lemma form.orth (Xlist.size candidates); *) + (* let candidates = Rules.CharTrees.find rules form.orth in let candidates = Xlist.fold candidates [] (fun candidates (stem,rule) -> if stem ^ rule.set = simple_lemma && form.interp = rule.interp then - (stem,rule) :: candidates else candidates) in - if candidates = [] then freq_rules else Rules.RuleQMap.add freq_rules (snd (List.hd candidates)))) in + (stem,rule) :: candidates else candidates) in *) + let candidates2 = + Xlist.fold candidates [] (fun candidates2 (stem,rule) -> + if rule.star = Star then candidates2 else (stem,rule) :: candidates2) in + let candidates = if candidates2 = [] then candidates else candidates2 in + let _,candidates = Xlist.fold candidates (max_int,[]) (fun (min_n,min_l) (stem,rule) -> + let n = Xstring.size rule.find in + if n < min_n then n,[stem,rule] else + if n > min_n then min_n,min_l else + min_n, (stem,rule) :: min_l) in + let map = Xlist.fold candidates StringMap.empty (fun map (_,r) -> StringMap.add map (string_of_rule r) r) in + match StringMap.fold map [] (fun l s r -> (s,Rules.get_tag r.tags "con",Rules.get_tag r.tags "group",Rules.get_tag r.tags "lemma",r) :: l) with + (* match Rules.RuleSet.to_list (Xlist.fold candidates Rules.RuleSet.empty (fun set (_,r) -> Rules.RuleSet.add set r)) with *) + [] -> freq_rules + | [_,_,_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"ʲ",_,_,_;_,"j",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"ʲ",_,_,_;_,"r",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"ʲ",_,_,_;_,"c",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"ʲ",_,_,_;_,"d",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"ʲ",_,_,_;_,"s",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"ʲ",_,_,_;_,"a",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"m′",_,_,_;_,"m",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"b′",_,_,_;_,"b",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"f′",_,_,_;_,"f",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"v′",_,_,_;_,"j",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"z",_,_,_;_,"s",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"c",_,_,_;_,"z",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"p′",_,_,_;_,"p",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"ǯ",_,_,_;_,"ž",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"v′",_,_,_;_,"v",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"g′",_,_,_;_,"g",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"š",_,_,_;_,"x",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"j",_,_,r;_,"b′",_,_,_] -> Rules.RuleQMap.add freq_rules r + | [_,"j",_,_,r;_,"g′",_,_,_] -> Rules.RuleQMap.add freq_rules r + | [_,"j",_,_,r;_,"k",_,_,_] -> Rules.RuleQMap.add freq_rules r + | [_,"n′",_,_,_;_,"j",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"ł",_,_,_;_,"v",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"k′",_,_,_;_,"k",_,_,r] -> Rules.RuleQMap.add freq_rules r + | [_,"c",_,_,r;_,"k",_,_,_] -> Rules.RuleQMap.add freq_rules r + | [_,_,_,"ε",_;_,_,_,"e",r] -> Rules.RuleQMap.add freq_rules r + | [_,_,_,"y",_;_,_,_,"e",r] -> Rules.RuleQMap.add freq_rules r + (* | [_;"\t\t\tcat=ndm",r] -> Rules.RuleQMap.add freq_rules r + | [_;_;"\t\t\tcat=ndm",r] -> Rules.RuleQMap.add freq_rules r *) + | l -> + printf "%s %s\n %s\n" form.orth entry.lemma (String.concat "\n " (Xlist.map l (fun (s,_,_,_,_) -> s))); + let _,_,_,_,r = List.hd l in + Rules.RuleQMap.add freq_rules r)) in File.file_out rules_filename (fun file -> Rules.RuleQMap.iter freq_rules (fun rule freq -> fprintf file "%s\n" (Rules.string_of_freq_rule {rule with freq=freq}))) @@ -892,9 +1023,9 @@ let generate_stem_dict rules_filename path filename out_filename = let dict = merge_entries dict in let dict = process_interps dict in let dict = remove_cat "cond" dict in - let dict = fonetic_translation dict in (* let dict = mark_ndm dict in *) let stems = Xlist.fold dict StringMap.empty (fun stems entry -> + let entry = fonetic_translation_entry entry in let simple_lemma,lemma_suf = Stem.simplify_lemma_full entry.lemma in Xlist.fold entry.forms stems (fun stems form -> let candidates = Rules.CharTrees.find rules form.orth in diff --git a/morphology/fonetics.ml b/morphology/fonetics.ml index 53e73be..760a5e1 100644 --- a/morphology/fonetics.ml +++ b/morphology/fonetics.ml @@ -4,6 +4,9 @@ open Printf type status = Idle | Symbols | Rules | RevSymbols | RevRules type rule = {set: string; find: string; suf: string; lang: string} +let string_of_phon p = + Printf.sprintf "%s %s" p.Types.phon (String.concat " " (Xlist.map p.Types.mapping (fun (a,b) -> a ^ "->" ^ b))) + module CharTree = struct type t = M of t CharMap.t * rule list @@ -64,14 +67,18 @@ let prepare_rules symbol_defs rules = {r with suf=String.concat "" l}))) in CharTree.create rules -let rules, rev_rules = - (* let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics_acro.dic" in *) - let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics_pl.dic" in - (* let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics_en.dic" in *) - (* let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics_fr.dic" in *) - (* let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics_de.dic" in *) +let select_rules lang rules = + Xlist.fold rules [] (fun rules r -> + if r.lang = lang then r :: rules else rules) + +let rules, rev_rules, core_rules, core_rev_rules = + let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics.dic" in + let core_rules = select_rules "core" rules in + let core_rev_rules = select_rules "core" rev_rules in prepare_rules symbol_defs rules, - prepare_rules rev_symbol_defs rev_rules + prepare_rules rev_symbol_defs rev_rules, + prepare_rules symbol_defs core_rules, + prepare_rules rev_symbol_defs core_rev_rules let sufs = ["ω"; "iκ"; "ρ"; "δ"; "λ"; "i"(*; "zi"*)] @@ -132,22 +139,22 @@ let rec translate_rec closure found found_maping rules s = let l = if l = [] && closure then let c,s = Xunicode.first_utf8_char_of_utf8_string s in [s,{find=c; set=c; suf=""; lang=""}] else l in - (* let n = String.length s in - let c = String.sub s 0 1 in - [String.sub s 1 (n-1),{find=c; set=c; suf=""}] else l in *) List.flatten (Xlist.rev_map l (fun (t,r) -> translate_rec closure (r.set :: found) ((r.find,r.set) :: found_maping) rules (r.suf ^ t))) let translate closure rules s = (* printf "translate 1: %s\n%!" s; *) let ll = translate_rec closure [] [] rules (s ^ "ε") in - Xlist.rev_map ll (fun (phon,mapping) -> {Types.phon=String.concat "" phon; Types.mapping=[](*mapping*)}) -(* let s = String.concat "" l in - (* printf "translate 2: %s\n%!" s; *) - s*) + Xlist.rev_map ll (fun (phon,mapping) -> {Types.phon=String.concat "" phon; Types.mapping=mapping}) -let string_of_phon p = - Printf.sprintf "%s %s" p.Types.phon (String.concat " " (Xlist.map p.Types.mapping (fun (a,b) -> a ^ "->" ^ b))) +exception NotFound of string * string +exception NotEqual of string * string * string +exception MulipleSolutions of string * string * string list + +let translate_simple closure rules s = + let ll = translate_rec closure [] [] rules (s ^ "ε") in + if ll = [] then raise (NotFound(s,"")) else + Xlist.rev_map ll (fun (phon,mapping) -> String.concat "" phon) let print_phon p = print_endline (string_of_phon p) @@ -161,22 +168,18 @@ let _ = Xlist.iter (translate true rules "łódź") print_phon let _ = Xlist.iter (translate true rules "Łódź") print_phon *) (* let _ = translate true rules "izolował" *) -exception NotFound of string * string -exception NotEqual of string * string * string -exception MulipleSolutions of string * string * string list - -let translate_and_check closure rules rev_rules x = - let l = translate closure rules x in +let translate_and_check closure rules rev_rules orth = + let l = translate closure rules orth in Xlist.iter l (fun s -> let y = translate closure rev_rules s.Types.phon in let y = Xlist.map y (fun s -> s.Types.phon) in match StringSet.to_list (StringSet.of_list y) with - [] -> raise (NotFound(x,s.Types.phon)) - | [t] -> if t <> x then raise (NotEqual(x,s.Types.phon,t)) - | l -> raise (MulipleSolutions(x,s.Types.phon,l))); - l + [] -> raise (NotFound(orth,s.Types.phon)) + | [t] -> if t <> orth then raise (NotEqual(orth,s.Types.phon,t)) + | l -> raise (MulipleSolutions(orth,s.Types.phon,l))) + -let translate_single closure rules x = +(*let translate_single closure rules x = let y = translate closure rev_rules x in let y = Xlist.map y (fun s -> s.Types.phon) in match StringSet.to_list (StringSet.of_list y) with @@ -196,11 +199,17 @@ let rec rev_translate_rec x s = function let rev_translate closure rev_rules s m = let x,s,_ = rev_translate_rec "" s m in if s = "" then x else - x ^ (translate_single closure rev_rules s) + x ^ (translate_single closure rev_rules s)*) -let rev_translate2 closure rev_rules s m = +(*let rev_translate2 closure rev_rules s m = let x,s,_ = rev_translate_rec "" s m in if s = "" then [x] else let l = translate closure rev_rules s in if l = [] then raise (NotFound(s,"")) else - Xlist.rev_map l (fun y -> x ^ y.Types.phon) + Xlist.rev_map l (fun y -> x ^ y.Types.phon)*) + +let rec get_short_stem x s = function + [] -> if s = "" then x else failwith "get_short_stem" + | (a,b) :: m -> + if Xstring.check_prefix b s then get_short_stem (x^a) (Xstring.cut_prefix b s) m + else x diff --git a/morphology/generate.ml b/morphology/generate.ml index 8170cf9..ee91e37 100644 --- a/morphology/generate.ml +++ b/morphology/generate.ml @@ -156,6 +156,7 @@ let _ = (* test_process_interps results_path adj_sgjp_filename; *) (* test_process_interps results_path verb_sgjp_filename; *) (* test_process_interps results_path noun_sgjp_filename; *) + (* test_process_interps results_path "interp_not_validated_lang_all.tab"; *) () @@ -180,9 +181,15 @@ let _ = let test_fonetics path filename = let dict = Dict.load_tab_full (path ^ filename) in - let dict = Dict.remove_exceptional_lemmata_gen obce dict in - let _ = Dict.fonetic_translation dict in - () + (* let dict = Dict.remove_exceptional_lemmata_gen obce dict in *) + Xlist.iter dict (fun e -> + Xlist.iter e.forms (fun f -> + try + Fonetics.translate_and_check true Fonetics.core_rules Fonetics.core_rev_rules f.orth + with + Fonetics.NotFound(orth,phon) -> printf "NF lemma=%s orth=%s phon=%s\n%!" e.lemma orth phon + | Fonetics.NotEqual(orth,phon,t) -> printf "NE lemma=%s orth=%s phon=%s: %s\n%!" e.lemma orth phon t + | Fonetics.MulipleSolutions(orth,phon,l) -> printf "MS lemma=%s orth=%s phon=%s: %s\n%!" e.lemma orth phon (String.concat " " l))) (* Test translacji fonetycznej *) let _ = @@ -215,7 +222,6 @@ let check_stem_generation path filename = (* let dict = Dict.mark_ndm dict in *) let dict = Dict.remove_exceptional_lemmata dict in let dict = Dict.find_kolwiek_suffixes dict in - (* let dict = Dict.fonetic_translation dict in *) let _ = Dict.generate_stem dict in () @@ -239,7 +245,7 @@ let interp_compound_rule_trees = Rules.make_interp_compound_rule_trees compound_ let find_not_validated_forms rules path filename out_filename = let dict = Dict.load_tab (path ^ filename) in - let dict = Dict.fonetic_translation dict in + let dict = Dict.assign_entry_cat dict in let dict = Dict.phon_validate rules dict in let dict = Dict.remove_validated_forms dict in Dict.print out_filename dict @@ -250,8 +256,7 @@ let find_not_validated_entries rules path filename out_filename = let dict = Dict.process_interps dict in (* let dict = Dict.mark_ndm dict in let dict = Dict.remove_ndm dict in *) - let dict = Dict.remove_exceptional_lemmata dict in - let dict = Dict.fonetic_translation dict in + (* let dict = Dict.remove_exceptional_lemmata dict in *) let dict = Dict.phon_validate rules dict in let dict = Dict.remove_validated_entries dict in Dict.print out_filename dict @@ -281,6 +286,10 @@ let _ = (* find_not_validated_forms compound_rule_trees results_path "lang_acro_sgjp-20170730.tab" "results/lang_acro.tab"; *) (* find_not_validated_forms compound_rule_trees results_path "lang_la_sgjp-20170730.tab" "results/lang_la.tab"; *) (* find_not_validated_forms compound_rule_trees results_path "lang_es_sgjp-20170730.tab" "results/lang_es.tab"; *) + (* find_not_validated_forms compound_rule_trees results_path "lang_all_sgjp-20170730.tab" "results/lang_all.tab"; *) + (* find_not_validated_entries compound_rule_trees results_path "lang_all_sgjp-20170730.tab" "results/lang_all.tab"; *) + (* find_not_validated_entries compound_rule_trees results_path noun_polimorf_filename "results/not_validated_p_noun.tab"; *) + (* find_not_validated_forms compound_rule_trees results_path "not_validated_p_noun.tab" "results/not_validated_p_noun2.tab"; *) () let find_not_validated_lemmata rules path filename out_filename = @@ -288,7 +297,6 @@ let find_not_validated_lemmata rules path filename out_filename = let dict = Dict.merge_entries dict in let dict = Dict.process_interps dict in let dict = Dict.remove_exceptional_lemmata dict in - let dict = Dict.fonetic_translation dict in let dict = Dict.phon_validate rules dict in let dict = Dict.remove_validated_entries dict in Dict.print_lemmata out_filename dict @@ -304,7 +312,6 @@ let find_validated_lemmata rules path filename out_filename = let dict = Dict.load_tab (path ^ filename) in let dict = Dict.merge_entries dict in let dict = Dict.process_interps dict in - let dict = Dict.fonetic_translation dict in let dict = Dict.phon_validate rules dict in let dict = Dict.remove_not_validated_entries dict in Dict.print_lemmata out_filename dict @@ -319,11 +326,11 @@ let _ = let test_lemmatize lemma orth = printf "test_lemmatize: %s %s\n%!" lemma orth; let simple_lemma = Stem.simplify_lemma lemma in - let phon_orths = Fonetics.translate(*_and_check*) true Fonetics.rules (*Fonetics.rev_rules*) orth in + let phon_orths = Fonetics.translate true Fonetics.core_rules orth in printf "phon_orths: \n %s\n%!" (String.concat "\n " (Xlist.map phon_orths Fonetics.string_of_phon)); Xlist.iter phon_orths (fun phon_orth -> Xlist.iter (Rules.CharTrees.find compound_rule_trees phon_orth.phon) (fun (stem,rule) -> - let candidate_lemmas = Fonetics.rev_translate2 true Fonetics.rev_rules (stem ^ rule.set) (phon_orth.mapping) in + let candidate_lemmas = Fonetics.translate_simple true Fonetics.core_rev_rules (stem ^ rule.set) in Xlist.iter candidate_lemmas (fun candidate_lemma -> if candidate_lemma = simple_lemma then printf "E" else printf " "; printf " %s %s %s %s\n%!" phon_orth.phon stem (string_of_rule rule) candidate_lemma))) @@ -430,6 +437,20 @@ let _ = (* test_lemmatize "Lefebvre" "Lefebvre’a"; *) (* test_lemmatize "Kayah" "Kayom"; *) (* test_lemmatize "Dixa" "Diksie"; *) + (* test_lemmatize "White" "Whicie"; + test_lemmatize "Voit" "Voicie"; + test_lemmatize "Violeta" "Violecie"; + test_lemmatize "veto" "vecie"; *) + (* test_lemmatize "Andrássy" "Andrássyowie"; *) + (* test_lemmatize "Cezanne" "Cezanne’ami"; + test_lemmatize "Connery" "Connery’ego"; *) + (* test_lemmatize "Barrés" "Barrés’go"; *) + (* test_lemmatize "IKEA" "IKE-i"; *) + (* test_lemmatize "mix" "miksowi"; + test_lemmatize "Laxa" "Laksie"; *) + (* test_lemmatize "münsterski" "münstersku"; *) + (* test_lemmatize "würzburski" "würzburskiemu"; *) + (* test_lemmatize "polje" "poljom"; *) (* test_lemmatize "" ""; test_lemmatize "" ""; test_lemmatize "" ""; *) @@ -461,8 +482,7 @@ let find_interp_validated_lemmata interp_rules path filename out_filename = (* let dict = Dict.remove_exceptional_lemmata dict in *) (* let dict = find_kolwiek_suffixes dict in *) (* let dict = generate_stem dict in *) - let dict = Dict.fonetic_translation dict in - let dict = Dict.validate_interp interp_rules dict in + let dict = Dict.phon_validate_interp interp_rules dict in let dict = Dict.remove_not_validated_entries dict in Dict.print_lemmata out_filename dict @@ -474,8 +494,7 @@ let find_not_interp_validated_lemmata interp_rules path filename out_filename = (* let dict = Dict.remove_exceptional_lemmata dict in *) (* let dict = find_kolwiek_suffixes dict in *) (* let dict = generate_stem dict in *) - let dict = Dict.fonetic_translation dict in - let dict = Dict.validate_interp interp_rules dict in + let dict = Dict.phon_validate_interp interp_rules dict in let dict = Dict.remove_validated_entries dict in Dict.print_lemmata out_filename dict @@ -485,8 +504,7 @@ let find_not_interp_validated_entries interp_rules path filename out_filename = let dict = Dict.process_interps dict in (* let dict = Dict.mark_ndm dict in let dict = Dict.remove_ndm dict in *) - let dict = Dict.fonetic_translation dict in - let dict = Dict.validate_interp interp_rules dict in + let dict = Dict.phon_validate_interp interp_rules dict in let dict = Dict.remove_validated_entries dict in Dict.print out_filename dict @@ -494,8 +512,8 @@ let find_not_interp_validated_forms interp_rules path filename out_filename = let dict = Dict.load_tab (path ^ filename) in let dict = Dict.merge_entries dict in let dict = Dict.process_interps dict in - let dict = Dict.fonetic_translation dict in - let dict = Dict.validate_interp interp_rules dict in + let dict = Dict.remove_exceptional_lemmata dict in + let dict = Dict.phon_validate_interp interp_rules dict in let dict = Dict.remove_validated_forms dict in Dict.print out_filename dict @@ -506,8 +524,8 @@ let _ = find_interp_validated_lemmata interp_compound_rule_trees results_path adj_sgjp_filename "results/interp_validated_adj.tab"; find_not_interp_validated_lemmata interp_compound_rule_trees results_path adj_sgjp_filename "results/interp_not_validated_adj.tab"; find_interp_validated_lemmata interp_compound_rule_trees results_path adv_sgjp_filename "results/interp_validated_adv.tab"; - find_not_interp_validated_lemmata interp_compound_rule_trees results_path adv_sgjp_filename "results/interp_not_validated_adv.tab"; *) - (* find_interp_validated_lemmata interp_compound_rule_trees results_path verb_sgjp_filename "results/interp_validated_verb.tab"; + find_not_interp_validated_lemmata interp_compound_rule_trees results_path adv_sgjp_filename "results/interp_not_validated_adv.tab"; + find_interp_validated_lemmata interp_compound_rule_trees results_path verb_sgjp_filename "results/interp_validated_verb.tab"; find_not_interp_validated_lemmata interp_compound_rule_trees results_path verb_sgjp_filename "results/interp_not_validated_verb.tab"; find_interp_validated_lemmata interp_compound_rule_trees results_path "verb_sgjp_no_pref.tab" "results/interp_validated_no_pref_verb.tab"; find_not_interp_validated_lemmata interp_compound_rule_trees results_path "verb_sgjp_no_pref.tab" "results/interp_not_validated_no_pref_verb.tab"; *) @@ -519,19 +537,18 @@ let _ = (* find_not_interp_validated_forms interp_compound_rule_trees results_path "lang_fr_sgjp-20170730.tab" "results/lang_fr.tab"; *) (* find_not_interp_validated_forms interp_compound_rule_trees results_path "lang_de_sgjp-20170730.tab" "results/lang_de.tab"; *) (* find_not_interp_validated_forms interp_compound_rule_trees results_path "lang_acro_sgjp-20170730.tab" "results/lang_acro.tab"; *) + (* find_not_interp_validated_forms interp_compound_rule_trees results_path "lang_all_sgjp-20170730.tab" "results/lang_all.tab"; *) + (* find_not_interp_validated_entries interp_compound_rule_trees results_path "lang_all_sgjp-20170730.tab" "results/interp_not_validated_lang_all.tab"; *) () let test_interp_lemmatize lemma orth = printf "test_interp_lemmatize: %s %s\n%!" lemma orth; - let simple_lemma = Stem.simplify_lemma lemma in - let phon_orths = Fonetics.translate(*_and_check*) true Fonetics.rules (*Fonetics.rev_rules*) orth in - printf "phon_orths: \n %s\n%!" (String.concat "\n " (Xlist.map phon_orths Fonetics.string_of_phon)); - Xlist.iter phon_orths (fun phon_orth -> - Xlist.iter (Rules.CharTrees.find interp_compound_rule_trees phon_orth.phon) (fun (stem,rule) -> - let candidate_lemmas = Fonetics.rev_translate2 true Fonetics.rev_rules (stem ^ rule.set) (phon_orth.mapping) in - Xlist.iter candidate_lemmas (fun candidate_lemma -> - if candidate_lemma = simple_lemma then (*printf "E" else printf " ";*) - printf " %s %s %s %s %s\n%!" phon_orth.phon stem (string_of_rule rule) candidate_lemma rule.interp))) + let entry = {empty_entry with lemma=lemma; cat="noun"; forms=[{empty_form with orth=orth}]} in + let entry = Dict.create_candidates false interp_compound_rule_trees entry in + Xlist.iter entry.forms (fun f -> + printf "phon_orths: \n %s\n%!" (String.concat "\n " (Xlist.map f.phon_orth Fonetics.string_of_phon)); + Xlist.iter f.candidates (fun (stem,rule,s) -> + printf "%s %s %s %s\n%!" s.phon stem (string_of_rule rule) rule.interp)) let _ = (* test_interp_lemmatize "Benveniste" "Benveniście"; *) @@ -539,6 +556,19 @@ let _ = (* test_interp_lemmatize "Depardieu" "Depardieuch"; *) (* test_interp_lemmatize "Braille" "Braille’u"; *) (* test_interp_lemmatize "FAMA" "FAMA"; *) + (* test_interp_lemmatize "lichy" "lichego"; *) + (* test_interp_lemmatize "niekaraluszy" "niekaraluszych"; *) + (* test_interp_lemmatize "ninja" "ninjami"; *) + (* test_interp_lemmatize "ninja" "ninji"; *) + (* test_interp_lemmatize "mix" "miksowi"; *) + (* test_interp_lemmatize "münsterski" "münstersku"; *) + (* test_interp_lemmatize "Trubieckoj" "Trubieckich"; *) + (* test_interp_lemmatize "Balazs" "Balazsu"; *) + (* test_interp_lemmatize "Blake" "Blakiem"; + test_interp_lemmatize "Bogorodckij" "Bogorodckiego"; + test_interp_lemmatize "BUW" "BUW-ie"; + test_interp_lemmatize "Bush" "Bushe"; *) + (* test_interp_lemmatize "ensemble" "ensemblowi"; *) (*test_interp_lemmatize "" ""; test_interp_lemmatize "" ""; test_interp_lemmatize "" "";*) @@ -569,7 +599,11 @@ let _ = (* Generowanie złożonych reguł zaopatrzonych we frekwencje *) let _ = (* Dict.generate_rule_frequencies interp_compound_rule_trees results_path adj_sgjp_filename "results/freq_rules-adj.tab"; *) + (* Dict.generate_rule_frequencies interp_compound_rule_trees results_path "verb_sgjp_no_pref.tab" "results/freq_rules-verb.tab"; *) + (* Dict.generate_rule_frequencies interp_compound_rule_trees results_path verb_sgjp_filename "results/freq_rules-verb.tab"; *) + (* Dict.generate_rule_frequencies interp_compound_rule_trees results_path "lang_all_sgjp-20170730.tab" "results/freq_rules-lang.tab"; *) (* Dict.generate_rule_frequencies interp_compound_rule_trees sgjp_path sgjp_filename "results/freq_rules.tab"; *) + (* ignore (Sys.command "totem ~/Dokumenty/Inne/gong/gong_05m_00s.ogg"); *) () let generate_alt rules_filename path filename out_filename = @@ -588,6 +622,7 @@ let generate_alt rules_filename path filename out_filename = let _ = (* generate_alt "results/freq_rules-adj.tab" results_path adj_sgjp_filename "results/alt-adj.tab"; *) (* generate_alt "results/freq_rules.tab" sgjp_path sgjp_filename "results/alt.tab"; *) + (* generate_alt "results/freq_rules-lang.tab" results_path "lang_all_sgjp-20170730.tab" "results/alt-lang.tab"; *) () (* Generowanie stemów z regułami *) diff --git a/morphology/ruleGenerator.ml b/morphology/ruleGenerator.ml index a0c7416..6c31839 100644 --- a/morphology/ruleGenerator.ml +++ b/morphology/ruleGenerator.ml @@ -306,9 +306,9 @@ let calculate_person_value = function let calculate_rule_value tags interp = if interp = "" then failwith "calculate_rule_value: empty interp" else - let cat = try Xlist.assoc tags "cat" with Not_found -> "" in - let lemma = try Xlist.assoc tags "lemma" with Not_found -> "" in - let group = try Xlist.assoc tags "group" with Not_found -> "" in + let cat = Rules.get_tag tags "cat" in + let lemma = Rules.get_tag tags "lemma" in + let group = Rules.get_tag tags "group" in if cat = "noun" || cat = "adj" then let lemma_val = match lemma with "a" -> 20 @@ -405,7 +405,15 @@ let calculate_rule_value tags interp = 10000000 * lemma_val + 10000 * group_val + interp_val else 0 -let phon_generate_interp_rules rules selected_tags simple_lemma form = +let phon_generate_interp_rules selected_tags form = + Xlist.rev_map form.candidates (fun (_,rule,_) -> + let tags = Xlist.fold rule.tags [] (fun tags (k,v) -> + if StringSet.mem selected_tags k then (k,v) :: tags else tags) in + let tags = Xlist.sort tags Rules.compare_tag in + calculate_rule_value rule.tags form.interp, + String.concat " " (Xlist.map tags (fun (k,v) -> k ^ "=" ^ v)) ^ "\t" ^ form.interp) + +(* let phon_generate_interp_rules rules selected_tags simple_lemma form = Xlist.fold form.phon_orth [] (fun found orth -> let candidates = Rules.CharTrees.find rules orth.phon in (* printf "S %d\n" (Xlist.size forms); *) @@ -418,4 +426,4 @@ let phon_generate_interp_rules rules selected_tags simple_lemma form = if StringSet.mem selected_tags k then (k,v) :: tags else tags) in let tags = Xlist.sort tags Rules.compare_tag in calculate_rule_value rule.tags form.interp, - String.concat " " (Xlist.map tags (fun (k,v) -> k ^ "=" ^ v)) ^ "\t" ^ form.interp) @ found) + String.concat " " (Xlist.map tags (fun (k,v) -> k ^ "=" ^ v)) ^ "\t" ^ form.interp) @ found) *) diff --git a/morphology/rules.ml b/morphology/rules.ml index 76fca97..259f77a 100644 --- a/morphology/rules.ml +++ b/morphology/rules.ml @@ -139,6 +139,9 @@ let rec extract_tag s rev = function [] -> "", List.rev rev | (k,v) :: l -> if s = k then v, List.rev rev @ l else extract_tag s ((k,v) :: rev) l +let get_tag l tag = + try Xlist.assoc l tag with Not_found -> "" + let create_compound_rules schemata rule_map = let found = Xlist.fold schemata [] (fun found schema -> let compounds = Xlist.fold schema [{star=Productive;pref="";find="";set="";tags=[];interp=""; id=""; freq=0}] (fun compounds rule_set_name -> @@ -667,9 +670,9 @@ let latex_of_schemata () = print_endline "\\]\\end{scriptsize}\n") let make_rule_key r = - let cat = try Xlist.assoc r.tags "cat" with Not_found -> "" in - let lemma = try Xlist.assoc r.tags "lemma" with Not_found -> "" in - let group = try Xlist.assoc r.tags "group" with Not_found -> "" in + let cat = get_tag r.tags "cat" in + let lemma = get_tag r.tags "lemma" in + let group = get_tag r.tags "group" in let gender = match Xstring.split ":" (List.hd (Xstring.split "|" r.interp)) with "subst" :: n :: c :: g -> (String.concat ":" g) @@ -708,8 +711,8 @@ let rec get_gender = function | [] -> failwith "get_gender" let latex_of_noun_interp_rules l = - let cat = Xlist.assoc (List.hd l).tags "cat" in - let lemma = Xlist.assoc (List.hd l).tags "lemma" in + let cat = get_tag (List.hd l).tags "cat" in + let lemma = get_tag (List.hd l).tags "lemma" in let gender = get_gender (Xlist.map l (fun r -> r.interp)) in let l = Xlist.map l (fun r -> {r with tags = snd (extract_tag "cat" [] r.tags)}) in let l = Xlist.map l (fun r -> {r with tags = snd (extract_tag "lemma" [] r.tags)}) in @@ -750,12 +753,12 @@ let latex_of_noun_interp_rules l = let latex_of_interp_rules_table l tags = print_endline ("\\begin{longtable}{p{7cm}|" ^ String.concat "|" (Xlist.map tags (fun _ -> "l")) ^ "}"); print_endline ("interpretation & " ^ String.concat " & " tags ^ "\\\\\n\\hline"); - Xlist.iter (List.rev l) (fun r -> + Xlist.iter (List.rev l) (fun (r: rule) -> let interp = String.concat "" (Xstring.split ":imperf\\.perf" r.interp) in let interp = String.concat " " (Xstring.split "|" interp) in print_endline ((if r.star = Star then "$\\star$" else "") ^ interp ^ " & " ^ String.concat " & " (Xlist.map tags (fun tag -> - latex_escape_string (Xlist.assoc r.tags tag))) ^ "\\\\")); + latex_escape_string (get_tag r.tags tag))) ^ "\\\\")); print_endline "\\end{longtable}" diff --git a/morphology/types.ml b/morphology/types.ml index 72f9595..67bd83f 100644 --- a/morphology/types.ml +++ b/morphology/types.ml @@ -1,17 +1,17 @@ +type star = Productive | Star | Ndm | Dial + +type rule = {star: star; pref: string; find: string; set: string; tags: (string * string) list; + interp: string; id: string; freq: int} + type phon_orth = {phon: string; mapping: (string * string) list} -type form = {orth: string; phon_orth: phon_orth list; interp: string; freq: int; genre: string; validated: bool} +type form = {orth: string; phon_orth: phon_orth list; interp: string; freq: int; genre: string; validated: bool; candidates: (string * rule * phon_orth) list} type entry = {lemma: string; (*phon_lemma: string list;*) cat: string; forms: form list; proper_type: string; (*ndm: bool;*) stem: string; phon_stem: string list; aspect: string} -let empty_form = {orth=""; phon_orth=[]; interp=""; freq=1; genre=""; validated=false} +let empty_form = {orth=""; phon_orth=[]; interp=""; freq=1; genre=""; validated=false; candidates=[]} let empty_entry = {lemma=""; (*phon_lemma=[];*) cat=""; forms=[]; proper_type=""; (*ndm=false;*) stem=""; phon_stem=[]; aspect=""} -type star = Productive | Star | Ndm | Dial - -type rule = {star: star; pref: string; find: string; set: string; tags: (string * string) list; - interp: string; id: string; freq: int} - let empty_rule = {star=Productive; pref=""; find=""; set=""; tags=[]; interp=""; id=""; freq=0} let string_of_rule r =