Commit f7ed70921566b15c59f8fb980203a9815e636b11
1 parent
512dab52
Warstwa fonetyczna dla podstawowych języków
Showing
5 changed files
with
720 additions
and
246 deletions
morphology/data/alternations.dic
... | ... | @@ -25,6 +25,7 @@ |
25 | 25 | j ji j |
26 | 26 | a aʲi a |
27 | 27 | e eʲi e |
28 | +# } }i } | |
28 | 29 | |
29 | 30 | @funkcjonalnie_miekkie_iy2 |
30 | 31 | b′ b′i b′ |
... | ... | @@ -76,6 +77,7 @@ |
76 | 77 | k′ k′ k′ |
77 | 78 | a a a |
78 | 79 | e e e |
80 | +# } } } | |
79 | 81 | |
80 | 82 | @funkcjonalnie_miekkie_ae2 |
81 | 83 | b′ b′ b′ |
... | ... | @@ -218,6 +220,7 @@ |
218 | 220 | k k′e k |
219 | 221 | o oe o |
220 | 222 | u ue u |
223 | +# } }e } | |
221 | 224 | |
222 | 225 | @funkcjonalnie_twarde_a |
223 | 226 | b b b |
... | ... |
morphology/data/fonetics_acro.dic
1 | 1 | @symbols |
2 | +δ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y z ź ż - ε v x q | |
3 | +γ aε achε amiε ąε eε ęε iε oε omε onε umε ówε uε owiε omε emε yε owieε emuε egoε goε mε muε ε | |
2 | 4 | |
3 | 5 | |
4 | 6 | @rev_symbols |
5 | 7 | |
6 | 8 | |
7 | 9 | @rev_rules |
8 | -a A | |
9 | -b B | |
10 | -c C | |
11 | -#t′ Ć | |
12 | -d D | |
13 | -e E | |
14 | -f F | |
15 | -g G | |
16 | -h H | |
17 | -i I | |
18 | -j J | |
19 | -k K | |
20 | -l L | |
21 | -ł Ł | |
22 | -m M | |
23 | -n N | |
24 | -o O | |
25 | -p P | |
26 | -r R | |
27 | -s S | |
28 | -#s′ Ś | |
29 | -t T | |
30 | -u U | |
31 | -v W | |
32 | -y Y | |
33 | -z Z | |
34 | -ż Ż | |
35 | -#x ch ε | |
36 | -#ax -ach ε | |
37 | -#ę -ę ε | |
38 | -#ʲi -i ε | |
39 | -#a A ε | |
40 | -#jot′e J-ocie ε | |
41 | -#jotax J-otach ε | |
42 | -#jot J ε | |
43 | -#t′e -cie ε | |
10 | +acro {A}a A | |
11 | +acro {B}b B | |
12 | +acro {C}c C | |
13 | +acro {C}k C | |
14 | +acro {Ć}t′ Ć | |
15 | +acro {D}d D | |
16 | +acro {E}e E | |
17 | +acro {F}f F | |
18 | +acro {G}g G | |
19 | +acro {H}h H | |
20 | +acro {I}j I | |
21 | +acro {J}j J | |
22 | +acro {K}k K | |
23 | +acro {L}l L | |
24 | +acro {Ł}ł Ł | |
25 | +acro {M}m M | |
26 | +acro {N}n N | |
27 | +acro {O}o O | |
28 | +acro {P}p P | |
29 | +acro {R}r R | |
30 | +acro {S}s S | |
31 | +acro {Ś}s′ Ś | |
32 | +acro {T}t T | |
33 | +acro {U}u U | |
34 | +acro {V}v V | |
35 | +acro {W}v W | |
36 | +acro {X}ks X | |
37 | +acro {Y}y Y | |
38 | +acro {Z}z Z | |
39 | +acro {Ż}ž Ż | |
40 | +acro {J}jot J | |
41 | +acro {Z}zet Z | |
42 | +acro {Ż}žet Ż | |
43 | +acro {v}v V | |
44 | +acro {x}ks X | |
45 | +acro {j}jot J | |
46 | +acro {z}zet z | |
47 | +acro {ż}žet Ż | |
48 | + | |
49 | +acro a A ε | |
44 | 50 | |
45 | 51 | |
46 | 52 | @rules |
47 | -a A | |
48 | -b B | |
49 | -c C | |
50 | -t′ Ć | |
51 | -d D | |
52 | -e E | |
53 | -f F | |
54 | -g G | |
55 | -h H | |
56 | -i I | |
57 | -j J | |
58 | -k K | |
59 | -l L | |
60 | -ł Ł | |
61 | -m M | |
62 | -n N | |
63 | -o O | |
64 | -p P | |
65 | -r R | |
66 | -s S | |
67 | -s′ Ś | |
68 | -t T | |
69 | -u U | |
70 | -v W | |
71 | -y Y | |
72 | -z Z | |
73 | -ž Ż | |
74 | -ax -ach ε | |
75 | -ę -ę ε | |
76 | -ʲi -i ε | |
77 | -#jot′e J-ocie ε | |
78 | -#jotax J-otach ε | |
79 | -t′e -cie ε | |
80 | -u -u ε | |
81 | -′e -ie ε | |
82 | -t′e cie ε | |
83 | -t′e CIE ε | |
53 | +acro {A}a A δ | |
54 | +acro {B}b B δ | |
55 | +acro {C}c C δ | |
56 | +acro {C}k C δ | |
57 | +acro {Ć}t′ Ć δ | |
58 | +acro {D}d D δ | |
59 | +acro {E}e E δ | |
60 | +acro {F}f F δ | |
61 | +acro {G}g G δ | |
62 | +acro {H}h H δ | |
63 | +acro {I}j I δ | |
64 | +acro {J}j J δ | |
65 | +acro {K}k K δ | |
66 | +acro {L}l L δ | |
67 | +acro {Ł}ł Ł δ | |
68 | +acro {M}m M δ | |
69 | +acro {N}n N δ | |
70 | +acro {O}o O δ | |
71 | +acro {P}p P δ | |
72 | +acro {R}r R δ | |
73 | +acro {S}s S δ | |
74 | +acro {Ś}s′ Ś δ | |
75 | +acro {T}t T δ | |
76 | +acro {U}u U δ | |
77 | +acro {V}v V δ | |
78 | +acro {W}v W δ | |
79 | +acro {X}ks X δ | |
80 | +acro {Y}y Y δ | |
81 | +acro {Z}z Z δ | |
82 | +acro {Ż}ž Ż δ | |
83 | +acro {J}jot J-ot δ | |
84 | +acro {Z}zet Z-et δ | |
85 | +acro {Ż}žet Ż-et δ | |
86 | +acro {z}zet z-et δ | |
87 | + | |
88 | +acro {B}b′e B-ie ε | |
89 | +acro {D}d′e D-zie ε | |
90 | +acro {F}f′e F-ie ε | |
91 | +acro {M}m′e M-ie ε | |
92 | +acro {N}n′e N-ie ε | |
93 | +acro {P}p′e P-ie ε | |
94 | +acro {R}ře R-ze ε | |
95 | +acro {S}s′e S-ie ε | |
96 | +acro {T}t′e -cie ε | |
97 | +acro {V}v′e V-ie ε | |
98 | +acro {W}v W-ie ε | |
99 | +acro {X}ks′e X-ie ε | |
100 | +acro {Z}z Z-ie ε | |
101 | +acro {J}jot′e J-ocie ε | |
102 | +acro {Z}zet′e Z-ecie ε | |
103 | +acro {Ż}žet′e Ż-ecie ε | |
104 | + | |
105 | +acro {B}b′e Bie ε | |
106 | +acro {D}d′e Dzie ε | |
107 | +acro {F}f′e Fie ε | |
108 | +acro {M}m′e Mie ε | |
109 | +acro {N}n′e Nie ε | |
110 | +acro {P}p′e Pie ε | |
111 | +acro {R}ře Rze ε | |
112 | +acro {S}s′e Sie ε | |
113 | +acro {T}t′e cie ε | |
114 | +acro {V}v′e Vie ε | |
115 | +acro {W}v Wie ε | |
116 | +acro {X}ks′e Xie ε | |
117 | +acro {Z}z Zie ε | |
118 | +acro {J}jot′e Jocie ε | |
119 | +acro {Z}zet′e Zecie ε | |
120 | +acro {Ż}žet′e Żecie ε | |
121 | + | |
122 | +acro b′e b-ie ε | |
123 | +acro d′e d-zie ε | |
124 | +acro f′e f-ie ε | |
125 | +acro m′e m-ie ε | |
126 | +acro n′e n-ie ε | |
127 | +acro p′e p-ie ε | |
128 | +acro ře r-ze ε | |
129 | +acro s′e s-ie ε | |
130 | +acro t′e -cie ε | |
131 | +acro {v}v′e v-ie ε | |
132 | +acro v w-ie ε | |
133 | +acro {x}ks′e x-ie ε | |
134 | +acro z z-ie ε | |
135 | +acro {j}jot′e j-ocie ε | |
136 | +acro {z}zet′e z-ecie ε | |
137 | +acro {ż}žet′e ż-ecie ε | |
138 | + | |
139 | +acro {T}t′e CIE ε | |
140 | +acro {A}a{T}t′e acie ε | |
141 | +acro {E}e{T}t′e ecie ε | |
142 | +acro {I}j{T}t′e icie ε | |
143 | +acro {O}o{T}t′e ocie ε | |
144 | +acro {U}u{T}t′e ucie ε | |
145 | + | |
146 | +acro {C}k′i C-i ε | |
147 | +acro {C}k′em C-iem ε | |
148 | +acro {G}g′i G-i ε | |
149 | +acro {G}g′em G-iem ε | |
150 | +acro {J}ji J-i ε | |
151 | +acro {K}k′i K-i ε | |
152 | +acro {K}k′em K-iem ε | |
153 | +acro {L}li L-i ε | |
154 | +acro - γ | |
155 | + a a ε | |
156 | + ax ach ε | |
157 | + am′i ami ε | |
158 | + ą ą ε | |
159 | + e e ε | |
160 | + ę ę ε | |
161 | + em em ε | |
162 | + o o ε | |
163 | + om om ε | |
164 | + ov′i owi ε | |
165 | + ov′e owie ε | |
166 | + u u ε | |
167 | + óv ów ε | |
168 | + y y ε | |
84 | 169 | |
85 | 170 | |
... | ... |
morphology/data/fonetics_pl.dic
1 | 1 | @symbols |
2 | 2 | ω a ą e ę o ó u |
3 | -δ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y z ź ż - ε | |
4 | -γ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y ź ż - ε | |
5 | -ξ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y - ε | |
6 | -μ a ą b c ć d e ę f g k l ł m n ń o ó p r s ś t u w y ź ż - ε | |
7 | -λ b c ć d f g h k l ł m n ń p r s ś t w z ź ż - ε | |
8 | -κ b c ć d f g h i j k l ł m n ń p r s ś t w z ź ż - ε | |
9 | -σ a ą b c ć d e ę f g j h k l ł m n ń o ó p r s ś t u w y z ź ż - ε | |
10 | -ρ aε achε amiε ąε eε ęε iε oε omε onε umε ówε uε owiε omε emε yε owieε emuε egoε goε mε muε ε | |
3 | +δ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y z ź ż - ε v x q | |
4 | +#γ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y ź ż - ε | |
5 | +#ξ a ą b c ć d e ę f g h k l ł m n ń o ó p r s ś t u w y - ε | |
6 | +#μ a ą b c ć d e ę f g k l ł m n ń o ó p r s ś t u w y ź ż - ε | |
7 | +λ b c ć d f g h k l ł m n ń p r s ś t w z ź ż - ε v x q | |
8 | +κ b c ć d f g h i j k l ł m n ń p r s ś t w z ź ż - ε v x q | |
9 | +#σ a ą b c ć d e ę f g j h k l ł m n ń o ó p r s ś t u w y z ź ż - ε | |
10 | +ρ aε achε amiε ąε eε ęε iε oε omε onε umε | |
11 | +α ε goε muε mε | |
12 | +ά ε goε muε | |
13 | +β ε chε miε mε | |
14 | +γ aε achε amiε ąε eε ęε iε oε omε onε umε ówε uε owiε omε emε yε owieε emuε egoε goε mε muε ε | |
11 | 15 | |
12 | 16 | @rev_symbols |
13 | -ω a ą e ę o ó u | |
14 | -δ a ą b c ć č d ʒ ǯ e ę f g h x i k l ł m n ń o ó p r ř s ś š t u v w y z ź ž - ε | |
15 | -λ b c ć č d ʒ ǯ f g h x k l ł m n ń p r ř s ś š t v w z ź ž - ε | |
16 | -κ b c ć č d ʒ ǯ f g h x i j ʲ k l ł m n ń p r ř s ś š t v w z ź ž - ε | |
17 | +ω a ą e ę o ó u {eu} | |
18 | +δ a ą b c ć č d ʒ ǯ e ę f g h x i k l ł m n ń o ó p r ř s ś š t u v w y z ź ž - ε { | |
19 | +λ b c ć č d ʒ ǯ f g h x k l ł m n ń p r ř s ś š t v w z ź ž - ε { q | |
20 | +κ b c ć č d ʒ ǯ f g h x i j ʲ k l ł m n ń p r ř s ś š t v w z ź ž - ε { | |
17 | 21 | ρ aε axε am′iε ąε eε ęε iε oε omε onε umε |
18 | 22 | |
19 | 23 | @rev_rules |
20 | 24 | pl p′ pi ω |
21 | -pl p′ p iκ | |
25 | +pl p′ p i | |
22 | 26 | pl pj pi ρ |
23 | 27 | pl b′ bi ω |
24 | -pl b′ b iκ | |
28 | +pl b′ b i | |
25 | 29 | pl bj bi ρ |
26 | 30 | pl m′ mi ω |
27 | -pl m′ m iκ | |
31 | +pl m′ m i | |
28 | 32 | pl m′j mi ρ |
29 | 33 | pl f′ fi ω |
30 | -pl f′ f iκ | |
34 | +pl f′ f i | |
31 | 35 | pl fj fi ρ |
32 | 36 | pl v′ wi ω |
33 | -pl v′ w iκ | |
37 | +pl v′ w i | |
34 | 38 | pl v w δ |
35 | 39 | pl vj wi ρ |
36 | 40 | #pl v w j |
37 | 41 | pl t′ ć λ |
38 | 42 | pl t′ ci ω |
39 | -pl t′ c iκ | |
43 | +pl t′ c i | |
40 | 44 | pl d′ dź λ |
41 | 45 | pl d′ dzi ω |
42 | -pl d′ dz iκ | |
46 | +pl d′ dz i | |
43 | 47 | pl ʒ dz |
44 | 48 | pl tʲ ti ω |
45 | -pl tʲ t iκ | |
49 | +pl tʲ t i | |
46 | 50 | pl tj ti ρ |
47 | 51 | pl dʲ di ω |
48 | -pl dʲ d iκ | |
52 | +pl dʲ d i | |
49 | 53 | pl dj di ρ |
50 | 54 | pl č cz δ |
51 | 55 | pl čʲ czi ω |
52 | -pl čʲ cz iκ | |
56 | +pl čʲ cz i | |
53 | 57 | pl č cz j |
54 | 58 | pl ǯ dż δ |
55 | 59 | pl ǯʲ dżi ω |
56 | -pl ǯʲ dż iκ | |
60 | +pl ǯʲ dż i | |
57 | 61 | pl ǯj dżi ρ |
58 | 62 | pl s′ ś λ |
59 | 63 | pl s′ si ω |
60 | -pl s′ s iκ | |
64 | +pl s′ s i | |
61 | 65 | pl z′ ź λ |
62 | 66 | pl z′ zi ω |
63 | -pl z′ z iκ | |
67 | +pl z′ z i | |
64 | 68 | pl š sz δ |
65 | 69 | pl š sz j |
66 | 70 | pl šʲ szi ω |
67 | -pl šʲ sz iκ | |
71 | +pl šʲ sz i | |
68 | 72 | pl ž ż δ |
69 | 73 | pl žʲ żi ω |
70 | -pl žʲ ż iκ | |
74 | +pl žʲ ż i | |
71 | 75 | pl žj żi ρ |
72 | 76 | pl lʲ li ω |
73 | 77 | pl lj li ρ |
74 | 78 | pl ř rz δ |
75 | 79 | pl ř rz j |
76 | 80 | pl rʲ ri ω |
77 | -pl rʲ r iκ | |
81 | +pl rʲ r i | |
78 | 82 | pl rj ri ρ |
79 | 83 | pl n′ ń λ |
80 | 84 | pl n′ ni ω |
81 | -pl n′ n iκ | |
85 | +pl n′ n i | |
82 | 86 | pl n′j ni ρ |
83 | 87 | pl k′ ki ω |
84 | -pl k′ k iκ | |
88 | +pl k′ k i | |
85 | 89 | pl k′j ki ρ |
86 | 90 | pl g′ gi ω |
87 | -pl g′ g iκ | |
91 | +pl g′ g i | |
88 | 92 | pl g′j gi ρ |
89 | 93 | pl x ch δ |
90 | 94 | pl xʲ chi ω |
91 | -pl xʲ ch iκ | |
95 | +pl xʲ ch i | |
92 | 96 | pl xʲj chi ρ |
93 | 97 | pl hʲ hi ω |
94 | -pl hʲ h iκ | |
98 | +pl hʲ h i | |
95 | 99 | pl aʲ a i |
96 | 100 | pl eʲ e i |
97 | 101 | pl oʲ o i |
... | ... | @@ -100,82 +104,222 @@ pl uʲ u i |
100 | 104 | pl iʲ i i |
101 | 105 | pl yʲ y i |
102 | 106 | |
107 | +fr {ch}š ch | |
108 | +fr {q}k q | |
109 | +fr {ng}n ng | |
110 | +en {tch}č tch | |
111 | +en {w}ł w | |
112 | +en {leigh}l leigh | |
113 | +de {au}ał au | |
103 | 114 | de {sch}š sch |
104 | -de {rh}r rh | |
115 | +de {tsch}č tsch | |
105 | 116 | de {z}c z |
106 | -de {au}ał au | |
107 | -fr {q}k q | |
108 | -fr {nq}n ng | |
109 | -fr {qui}kʲ qui | |
110 | -de-fr {x}ks x | |
111 | -de r rh | |
112 | -de r r | |
113 | -de ta tha ε | |
114 | - t t | |
115 | -de-fr ks x | |
116 | -de {v}f v ε | |
117 | -fr {v}v v | |
118 | 117 | |
119 | -de g′e ge ε | |
120 | -de k′e ke ε | |
118 | +en {ay}aj ay ε | |
119 | +en {ey}ej ey ε | |
120 | +en {oy}oj oy ε | |
121 | +en {ay}aj ay ω | |
122 | +en {ey}ej ey ω | |
123 | +en {oy}oj oy ω | |
124 | + | |
125 | +en {dh}dʲ dhi ω | |
126 | +en {dh}dʲ dh iκ | |
127 | +en {dh}d dh δ | |
128 | +en {gh}g′ ghi ω | |
129 | +en {gh}g′ gh iκ | |
130 | +en {gh}g gh δ | |
131 | +en {kh}k′ khi ω | |
132 | +en {kh}k′ kh iκ | |
133 | +en {kh}k kh δ | |
134 | +en {th}tʲ thi ω | |
135 | +en {th}tʲ th iκ | |
136 | +en {th}t th δ | |
137 | +en {v}v′ vi ω | |
138 | +en {v}v′ v iκ | |
139 | +en {v}v v δ | |
140 | +en {v}vj vi ρ | |
141 | +de-en-fr {x}ks′ xi ω | |
142 | +de-en-fr {x}ks′ x iκ | |
143 | +de-en-fr {x}ks x δ | |
144 | + | |
145 | +#lemma=e/y gender=m1 | |
146 | +fr {dieu}dʲe dieu ε | |
147 | +fr {dieu}dʲi dieu ε | |
148 | +fr {quieu}k′e quieu ε | |
149 | +fr {quieu}k′i quieu ε | |
150 | +fr {lieu}lʲe lieu ε | |
151 | +fr {lieu}lʲi lieu ε | |
152 | +en {rie}rʲe rie ε | |
153 | +en {rie}rʲi rie ε | |
154 | +en {gie}ǯʲe gie ε | |
155 | +en {gie}ǯʲi gie ε | |
156 | +en {kie}k′e kie ε | |
157 | +en {kie}k′i kie ε | |
158 | +en {tie}tʲe tie ε | |
159 | +en {tie}tʲi tie ε | |
160 | +en {pie}p′e pie ε | |
161 | +en {pie}p′i pie ε | |
162 | +en {die}dʲe die ε | |
163 | +en {die}dʲi die ε | |
164 | +en {bee}b′e bee ε | |
165 | +en {bee}b′i bee ε | |
166 | + | |
167 | +#lemma=e gender=m1 | |
168 | +fr {mée}me mée ε | |
169 | +de {ge}g′e ge ε | |
170 | +de {ke}k′e ke ε | |
171 | + | |
172 | +#lemma=y gender=m1 | |
173 | +en {by}b′i by ε | |
174 | +en {dy}dʲi dy ε | |
175 | +es {dí}dʲi dí ε | |
176 | +fr {phy}f′i phy ε | |
177 | +fr {guy}g′i guy ε | |
178 | +de-en {ky}k′i ky ε | |
179 | +en {my}m′i my ε | |
180 | +fr {li}li li ε | |
181 | +en {ly}li ly ε | |
182 | +en-fr {ry}rʲi ry ε | |
183 | +fr {sy}sʲi sy ε | |
184 | +en {cy}sʲi cy ε | |
185 | +en {thy}tʲi thy ε | |
186 | + | |
187 | +#lemma=e/ndm gender=m1 | |
188 | +fr {chais}še chais ε | |
189 | +fr {lais}le lais ε | |
190 | +fr {nais}ne nais ε | |
191 | +fr {rès}re rès ε | |
192 | + | |
193 | +#lemma=ε | |
194 | +en-fr {de}d de ε | |
195 | +en {fe}f fe ε | |
196 | +fr {phe}f phe ε | |
197 | +en {ge}ǯ ge ε | |
198 | +fr {gue}g gue ε | |
199 | +en {ke}k ke ε | |
200 | +fr {que}k que ε | |
201 | +en-fr {le}l le ε | |
202 | +en-fr {me}m me ε | |
203 | +en-fr {ne}n ne ε | |
204 | +fr {gne}n′ gne ε | |
205 | +en-fr {re}r re ε | |
206 | +en {se}s se ε | |
207 | +en-fr {ce}s ce ε | |
208 | +fr {che}š che ε | |
209 | +en-fr {te}t te ε | |
210 | +en {the}t the ε | |
211 | +en-fr {ve}v ve ε | |
212 | +en {we}ł we ε | |
213 | +fr {se}z se ε | |
214 | +fr {ge}ž ge ε | |
215 | +en {oe}oł oe ε | |
216 | + | |
217 | + | |
121 | 218 | de g′el gel ε |
122 | 219 | |
220 | +de g′e ge ε | |
221 | +de k′e ke ε | |
123 | 222 | de k′i ky ε |
223 | +de-en-fr ks x ε | |
224 | + | |
225 | + r r a | |
226 | +de r rh a | |
227 | + t t a | |
228 | +de t th a | |
229 | + k k | |
230 | +en k kh a | |
231 | +de-en-fr ks x a | |
232 | +en k c a | |
233 | + | |
234 | +en k c o | |
124 | 235 | |
236 | + b b ε | |
125 | 237 | fr b bes ε |
126 | -fr d d ε | |
127 | -fr d de ε | |
238 | + d d ε | |
239 | +en-fr d de ε | |
240 | + f f ε | |
128 | 241 | fr f phe ε |
242 | +en f ph ε | |
243 | + g g ε | |
129 | 244 | fr g gue ε |
130 | 245 | fr g gues ε |
246 | + k k ε | |
131 | 247 | fr k c ε |
132 | 248 | fr k que ε |
133 | 249 | fr k q ε |
134 | 250 | fr k cq ε |
135 | 251 | fr k ques ε |
136 | 252 | fr k cques ε |
137 | -fr m me ε | |
138 | -fr n n ε | |
139 | -fr n ne ε | |
253 | +en kt ct ε | |
254 | + m m ε | |
255 | +en-fr m me ε | |
256 | + n n ε | |
257 | +en-fr n ne ε | |
140 | 258 | fr n nes ε |
141 | 259 | fr n ng ε |
142 | -fr r r ε | |
143 | -fr r re ε | |
260 | + r r ε | |
261 | +en-fr r re ε | |
144 | 262 | fr r res ε |
145 | 263 | fr r rs ε |
146 | -fr s s ε | |
147 | -fr s se ε | |
148 | -fr s ce ε | |
149 | -fr t t ε | |
264 | +en r rh ε | |
265 | + s s ε | |
266 | +en-fr s se ε | |
267 | +en-fr s ce ε | |
268 | +en s th ε | |
269 | + t t ε | |
270 | +fr t te ε | |
150 | 271 | fr t tes ε |
151 | 272 | fr t thes ε |
152 | -fr z se ε | |
273 | +en t th ε | |
274 | +en t the ε | |
153 | 275 | |
154 | -de {ky}kj ky ε | |
155 | -fr {ès}e ès ε | |
156 | -fr {ais}e ais ε | |
157 | -fr {que}k que ε | |
158 | -fr {phe}f phe ε | |
159 | -fr {sy}sj sy ε | |
160 | -fr {phy}fj phy ε | |
161 | -fr {ry}rj ry ε | |
162 | -fr {che}š che ε | |
163 | -fr {de}d de ε | |
164 | -fr {re}r re ε | |
165 | -fr {se}z se ε | |
166 | -fr {ge}ž ge ε | |
167 | -fr {te}t te ε | |
168 | -fr {le}l le ε | |
169 | -fr {me}m me ε | |
170 | -fr {gne}n′ gne ε | |
171 | -fr {ne}n ne ε | |
172 | -#fr ine ε | |
173 | -fr {ve}v ve ε | |
174 | -fr {gue}g gue ε | |
175 | -fr {ce}s ce ε | |
176 | -fr {guy}gj guy ε | |
276 | + e e j | |
277 | +en ej ey ε | |
278 | + | |
279 | + a a ε | |
280 | +en a ah ε | |
281 | +en aja ayah ε | |
282 | + | |
283 | +acro {A}a A | |
284 | +acro {B}b B | |
285 | +acro {C}c C | |
286 | +acro {C}k C | |
287 | +acro {Ć}t′ Ć | |
288 | +acro {D}d D | |
289 | +acro {E}e E | |
290 | +acro {F}f F | |
291 | +acro {G}g G | |
292 | +acro {H}h H | |
293 | +acro {I}j I | |
294 | +acro {J}j J | |
295 | +acro {K}k K | |
296 | +acro {L}l L | |
297 | +acro {Ł}ł Ł | |
298 | +acro {M}m M | |
299 | +acro {N}n N | |
300 | +acro {O}o O | |
301 | +acro {P}p P | |
302 | +acro {R}r R | |
303 | +acro {S}s S | |
304 | +acro {Ś}s′ Ś | |
305 | +acro {T}t T | |
306 | +acro {U}u U | |
307 | +acro {V}v V | |
308 | +acro {W}v W | |
309 | +acro {X}ks X | |
310 | +acro {Y}y Y | |
311 | +acro {Z}z Z | |
312 | +acro {Ż}ž Ż | |
313 | +acro {J}jot J | |
314 | +acro {Z}zet Z | |
315 | +acro {Ż}žet Ż | |
316 | +acro {v}v V | |
317 | +acro {x}ks X | |
318 | +acro {j}jot J | |
319 | +acro {z}zet z | |
320 | +acro {ż}žet Ż | |
177 | 321 | |
178 | -fr {li}lj li ε | |
322 | +acro a A ε | |
179 | 323 | |
180 | 324 | @rules |
181 | 325 | pl p′ pi ω |
... | ... | @@ -353,91 +497,294 @@ pl yʲ y i |
353 | 497 | #pl χ́ chi |
354 | 498 | #pl h́ hi |
355 | 499 | |
356 | -de {sch}š sch | |
357 | -de {rh}r rh | |
358 | -de {z}c z | |
359 | -de {au}ał au | |
360 | -fr {q}k q | |
361 | -fr {nq}n ng | |
362 | - n n g | |
363 | -fr {qui}kʲ qui ω | |
364 | -de-fr {x}ks x | |
365 | - | |
366 | -de ks′e ksie ε | |
367 | -de {v}f′e vie ε | |
368 | -fr {v}v′i vi ε | |
369 | -fr {v}v′e vie ε | |
370 | -fr {v}v v | |
371 | - | |
372 | -de {ge}g′e ge | |
373 | -de {ke}k′e ke | |
374 | -de {ky}k′i ky | |
375 | -fr {eu}i eu chε | |
376 | -fr {eu}e eu goε | |
377 | -fr {eu}i eu mε | |
378 | -fr {eu}i eu miε | |
379 | -fr {eu}e eu muε | |
380 | -fr {li}lj li egoε | |
381 | -fr {li}lj li emuε | |
382 | -fr {li}lj li eε | |
383 | -fr {ée}e ée goε | |
384 | -fr {ée}e ée mε | |
385 | -fr {ée}e ée muε | |
386 | - | |
387 | -de {ky}kj ky ’ | |
388 | -fr {ès}e ès ’ | |
389 | -fr {ais}e ais ’ | |
390 | -fr {que}k que ’ | |
500 | +fr {ch}š ch γ | |
501 | +fr {q}k q γ | |
502 | +fr {ng}n ng γ | |
503 | +en {tch}č tch γ | |
504 | +en {w}ł w | |
505 | +en {leigh}l leigh γ | |
506 | +de {au}ał au γ | |
507 | +de {sch}š sch γ | |
508 | +de {tsch}č tsch γ | |
509 | +de {z}c z γ | |
510 | + | |
511 | +en {ay}aj ay γ | |
512 | +en {ey}ej ey γ | |
513 | +en {oy}oj oy γ | |
514 | + | |
515 | +en {dh}dʲ dhi ω | |
516 | +en {dh}dʲ dh iκ | |
517 | +en {dh}d dh δ | |
518 | +en {gh}g′ ghi ω | |
519 | +en {gh}g′ gh iκ | |
520 | +en {gh}g gh δ | |
521 | +en {kh}k′ khi ω | |
522 | +en {kh}k′ kh iκ | |
523 | +en {kh}k kh δ | |
524 | +en {th}tʲ thi ω | |
525 | +en {th}tʲ th iκ | |
526 | +en {th}t th δ | |
527 | +en {v}v′ vi ω | |
528 | +en {v}v′ v iκ | |
529 | +en {v}v v δ | |
530 | +en {v}vj vi ρ | |
531 | +de-en-fr {x}ks′ xi ω | |
532 | +de-en-fr {x}ks′ x iκ | |
533 | +de-en-fr {x}ks x δ | |
534 | + | |
535 | +#lemma=e/y gender=m1 | |
536 | +fr {dieu}dʲe dieu α | |
537 | +fr {dieu}dʲi dieu β | |
538 | +fr {quieu}k′e quieu α | |
539 | +fr {quieu}k′i quieu β | |
540 | +fr {lieu}lʲe lieu α | |
541 | +fr {lieu}lʲi lieu β | |
542 | +en {rie}rʲe rie α | |
543 | +en {rie}rʲi rie β | |
544 | +en {gie}ǯʲe gie α | |
545 | +en {gie}ǯʲi gie β | |
546 | +en {kie}k′e kie α | |
547 | +en {kie}k′i kie β | |
548 | +en {tie}tʲe tie α | |
549 | +en {tie}tʲi tie β | |
550 | +en {pie}p′e pie α | |
551 | +en {pie}p′i pie β | |
552 | +en {die}dʲe die α | |
553 | +en {die}dʲi die β | |
554 | +en {bee}b′e bee α | |
555 | +en {bee}b′i bee β | |
556 | + | |
557 | +#lemma=e gender=m1 | |
558 | +fr {mée}me mée α | |
559 | +de {ge}g′e ge α | |
560 | +de {ke}k′e ke α | |
561 | + | |
562 | +#lemma=y gender=m1 | |
563 | +en {by}b′ by ’eά | |
564 | +en {by}b′i by β | |
565 | +en {dy}dʲ dy ’eά | |
566 | +en {dy}dʲi dy β | |
567 | +es {dí}dʲ dí eά | |
568 | +es {dí}dʲi dí β | |
569 | +fr {phy}f′ phy ’eά | |
570 | +fr {phy}f′i phy β | |
571 | +fr {guy}g′ guy ’eά | |
572 | +fr {guy}g′i guy β | |
573 | +de-en {ky}k′ ky ’eά | |
574 | +de-en {ky}k′i ky β | |
575 | +en {my}m′ my ’eά | |
576 | +en {my}m′i my β | |
577 | +fr {li}l li eά | |
578 | +fr {li}l l iβ | |
579 | +en {ly}l ly ’eά | |
580 | +en {ly}li ly β | |
581 | +en-fr {ry}rʲ ry ’eά | |
582 | +en-fr {ry}rʲi ry β | |
583 | +fr {sy}sʲ sy ’eά | |
584 | +fr {sy}sʲi sy β | |
585 | +en {cy}sʲ cy ’eά | |
586 | +en {cy}sʲi cy β | |
587 | +en {thy}tʲ thy ’eά | |
588 | +en {thy}tʲi thy β | |
589 | + | |
590 | +#lemma=e/ndm gender=m1 | |
591 | +fr {chais}še chais ’ | |
592 | +fr {lais}le lais ’ | |
593 | +fr {nais}ne nais ’ | |
594 | +fr {rès}re rès ’ | |
595 | + | |
596 | +#lemma=ε | |
597 | +en-fr {de}d de ’ | |
598 | +en-fr {de}d de ε | |
599 | +en {fe}f fe ’ | |
600 | +en {fe}f fe ε | |
391 | 601 | fr {phe}f phe ’ |
392 | -fr {sy}sj sy ’ | |
393 | -fr {phy}fj phy ’ | |
394 | -fr {ry}rj ry ’ | |
602 | +fr {phe}f phe ε | |
603 | +en {ge}ǯ ge ’ | |
604 | +en {ge}ǯ ge ε | |
605 | +fr {gue}g gue ’ | |
606 | +fr {gue}g gue ε | |
607 | +en {ke}k ke ’ | |
608 | +en {ke}k ke ε | |
609 | +fr {que}k que ’ | |
610 | +fr {que}k que ε | |
611 | +en-fr {le}l le ’ | |
612 | +en-fr {le}l le ε | |
613 | +en-fr {me}m me ’ | |
614 | +en-fr {me}m me ε | |
615 | +en-fr {ne}n ne ’ | |
616 | +en-fr {ne}n ne ε | |
617 | +fr {gne}n′ gne ’ | |
618 | +fr {gne}n′ gne ε | |
619 | +en-fr {re}r re ’ | |
620 | +en-fr {re}r re ε | |
621 | +en {se}s se ’ | |
622 | +en {se}s se ε | |
623 | +en-fr {ce}s ce ’ | |
624 | +en-fr {ce}s ce ε | |
395 | 625 | fr {che}š che ’ |
396 | -fr {de}d de ’ | |
397 | -fr {re}r re ’ | |
626 | +fr {che}š che ε | |
627 | +en-fr {te}t te ’ | |
628 | +en-fr {te}t te ε | |
629 | +en {the}t the ’ | |
630 | +en {the}t the ε | |
631 | +en-fr {ve}v ve ’ | |
632 | +en-fr {ve}v ve ε | |
633 | +en {we}ł we ’ | |
634 | +en {we}ł we ε | |
398 | 635 | fr {se}z se ’ |
636 | +fr {se}z se ε | |
399 | 637 | fr {ge}ž ge ’ |
400 | -fr {te}t te ’ | |
401 | -fr {le}l le ’ | |
402 | -fr {me}m me ’ | |
403 | -fr {gne}n′ gne ’ | |
404 | -fr {ne}n ne ’ | |
405 | -#fr ine ’ | |
406 | -fr {ve}v ve ’ | |
407 | -fr {gue}g gue ’ | |
408 | -fr {ce}s ce ’ | |
409 | -fr {guy}gj guy ’ | |
410 | -de-fr ’ ρ | |
411 | - | |
412 | -de góv gów ε | |
413 | -de gov′i gowi ε | |
414 | -de gov′e gowie ε | |
415 | -de gam′i gami ε | |
416 | -de gax gach ε | |
417 | -de gom gom ε | |
418 | -de kóv ków ε | |
419 | -de kov′i kowi ε | |
420 | -de kov′e kowie ε | |
421 | -de kam′i kami ε | |
422 | -de kax kach ε | |
423 | -de kom kom ε | |
424 | - | |
425 | -de k′ix kich ε | |
426 | -de k′e kie ε | |
427 | -de k′im kim ε | |
428 | -de k′im′i kimi ε | |
429 | -de k′ego kiego ε | |
430 | -de k′emu kiemu ε | |
431 | - | |
432 | -de ksax ksach ε | |
433 | -de ksa ksa ε | |
434 | -de ksam′i ksami ε | |
435 | -de ksem ksem ε | |
436 | -de ksom ksom ε | |
437 | -de ksov′e ksowie ε | |
438 | -de ksov′i ksowi ε | |
439 | -de ksóv ksów ε | |
440 | -de ksy ksy ε | |
441 | -fr ksu ksu ε | |
442 | - | |
443 | -de gli gli ε | |
638 | +fr {ge}ž ge ε | |
639 | +#z wyjątkiem sg:loc.voc | |
640 | +en {oe}oł oe ’ | |
641 | +en {oe}oł oe ε | |
642 | + | |
643 | +de-en-fr ’ γ | |
644 | + | |
645 | + n n g | |
646 | + z z γ | |
647 | + b b e | |
648 | + b b y | |
649 | + d d y | |
650 | + m m y | |
651 | + s s y | |
652 | + c c y | |
653 | + r r y | |
654 | + k k y | |
655 | + l l y | |
656 | + l l a | |
657 | + n n a | |
658 | + r r è | |
659 | + d d e | |
660 | + f f e | |
661 | + p p h | |
662 | + g g e | |
663 | + g g u | |
664 | + k k e | |
665 | + q q u | |
666 | + l l e | |
667 | + m m e | |
668 | + n n e | |
669 | + g g n | |
670 | + r r e | |
671 | + s s e | |
672 | + c c e | |
673 | + c c h | |
674 | + t t e | |
675 | + t t h | |
676 | + v v e | |
677 | + w w e | |
678 | + s s e | |
679 | + o o e | |
680 | + m m é | |
681 | + a a y | |
682 | + e e y | |
683 | + o o y | |
684 | + | |
685 | +acro a A ε | |
686 | + | |
687 | +acro {A}a A | |
688 | +acro {B}b B | |
689 | +acro {C}c C | |
690 | +acro {C}k C | |
691 | +acro {Ć}t′ Ć | |
692 | +acro {D}d D | |
693 | +acro {E}e E | |
694 | +acro {F}f F | |
695 | +acro {G}g G | |
696 | +acro {H}h H | |
697 | +acro {I}j I | |
698 | +acro {J}j J | |
699 | +acro {K}k K | |
700 | +acro {L}l L | |
701 | +acro {Ł}ł Ł | |
702 | +acro {M}m M | |
703 | +acro {N}n N | |
704 | +acro {O}o O | |
705 | +acro {P}p P | |
706 | +acro {R}r R | |
707 | +acro {S}s S | |
708 | +acro {Ś}s′ Ś | |
709 | +acro {T}t T | |
710 | +acro {U}u U | |
711 | +acro {V}v V | |
712 | +acro {W}v W | |
713 | +acro {X}ks X | |
714 | +acro {Y}y Y | |
715 | +acro {Z}z Z | |
716 | +acro {Ż}ž Ż | |
717 | +acro {J}jot J-ot δ | |
718 | +acro {Z}zet Z-et δ | |
719 | +acro {Ż}žet Ż-et δ | |
720 | +acro {z}zet z-et δ | |
721 | + | |
722 | +acro {B}b′e B-ie ε | |
723 | +acro {D}d′e D-zie ε | |
724 | +acro {F}f′e F-ie ε | |
725 | +acro {M}m′e M-ie ε | |
726 | +acro {N}n′e N-ie ε | |
727 | +acro {P}p′e P-ie ε | |
728 | +acro {R}ře R-ze ε | |
729 | +acro {S}s′e S-ie ε | |
730 | +acro {T}t′e -cie ε | |
731 | +acro {V}v′e V-ie ε | |
732 | +acro {W}v W-ie ε | |
733 | +acro {X}ks′e X-ie ε | |
734 | +acro {Z}z Z-ie ε | |
735 | +acro {J}jot′e J-ocie ε | |
736 | +acro {Z}zet′e Z-ecie ε | |
737 | +acro {Ż}žet′e Ż-ecie ε | |
738 | + | |
739 | +acro {B}b′e Bie ε | |
740 | +acro {D}d′e Dzie ε | |
741 | +acro {F}f′e Fie ε | |
742 | +acro {M}m′e Mie ε | |
743 | +acro {N}n′e Nie ε | |
744 | +acro {P}p′e Pie ε | |
745 | +acro {R}ře Rze ε | |
746 | +acro {S}s′e Sie ε | |
747 | +acro {T}t′e cie ε | |
748 | +acro {V}v′e Vie ε | |
749 | +acro {W}v Wie ε | |
750 | +acro {X}ks′e Xie ε | |
751 | +acro {Z}z Zie ε | |
752 | +acro {J}jot′e Jocie ε | |
753 | +acro {Z}zet′e Zecie ε | |
754 | +acro {Ż}žet′e Żecie ε | |
755 | + | |
756 | +acro b′e b-ie ε | |
757 | +acro d′e d-zie ε | |
758 | +acro f′e f-ie ε | |
759 | +acro m′e m-ie ε | |
760 | +acro n′e n-ie ε | |
761 | +acro p′e p-ie ε | |
762 | +acro ře r-ze ε | |
763 | +acro s′e s-ie ε | |
764 | +acro t′e -cie ε | |
765 | +acro {v}v′e v-ie ε | |
766 | +acro v w-ie ε | |
767 | +acro {x}ks′e x-ie ε | |
768 | +acro z z-ie ε | |
769 | +acro {j}jot′e j-ocie ε | |
770 | +acro {z}zet′e z-ecie ε | |
771 | +acro {ż}žet′e ż-ecie ε | |
772 | + | |
773 | +acro {T}t′e CIE ε | |
774 | +acro {A}a{T}t′e acie ε | |
775 | +acro {E}e{T}t′e ecie ε | |
776 | +acro {I}j{T}t′e icie ε | |
777 | +acro {O}o{T}t′e ocie ε | |
778 | +acro {U}u{T}t′e ucie ε | |
779 | + | |
780 | +acro {C}k′i C-i ε | |
781 | +acro {C}k′em C-iem ε | |
782 | +acro {G}g′i G-i ε | |
783 | +acro {G}g′em G-iem ε | |
784 | +acro {J}ji J-i ε | |
785 | +acro {K}k′i K-i ε | |
786 | +acro {K}k′em K-iem ε | |
787 | +acro {L}li L-i ε | |
788 | +acro - γ | |
789 | + | |
790 | + | |
... | ... |
morphology/fonetics.ml
... | ... | @@ -45,13 +45,13 @@ let load_rules filename = |
45 | 45 | (match status with |
46 | 46 | Symbols -> status, StringMap.add symbol_defs key (Xstring.split " " vals), rev_symbol_defs, rules, rev_rules |
47 | 47 | | RevSymbols -> status, symbol_defs, StringMap.add rev_symbol_defs key (Xstring.split " " vals), rules, rev_rules |
48 | - | _ -> failwith "Fonetics.load_rules: status 1") | |
48 | + | _ -> failwith ("Fonetics.load_rules status 1: " ^ key ^ "\t" ^ vals)) | |
49 | 49 | | [lang;v;r;s] -> |
50 | 50 | (match status with |
51 | 51 | Rules -> status, symbol_defs, rev_symbol_defs, {set=v; find=r; suf=s; lang=lang} :: rules, rev_rules |
52 | 52 | | RevRules -> status, symbol_defs, rev_symbol_defs, rules, {set=r; find=v; suf=s; lang=lang} :: rev_rules |
53 | 53 | | _ -> failwith "Fonetics.load_rules: status 2") |
54 | - | line -> failwith ("load_rules: " ^ (String.concat "\t" line))) in | |
54 | + | line -> failwith ("load_rules: " ^ (String.concat "\t" line))) in | |
55 | 55 | if status <> Rules && status <> RevRules then failwith "Fonetics.load_rules: status 3" else |
56 | 56 | symbol_defs, rev_symbol_defs, rules, rev_rules |
57 | 57 | |
... | ... |
morphology/generate.ml
... | ... | @@ -276,8 +276,11 @@ let _ = |
276 | 276 | find_not_validated_forms compound_rule_trees results_path verb_polimorf_filename "results/not_validated_p_verb2.tab"; *) |
277 | 277 | (* find_not_validated_forms compound_rule_trees results_path "sgjp_selected.tab" "results/not_validated_verb.tab"; *) |
278 | 278 | (* find_not_validated_forms compound_rule_trees results_path "lang_en_sgjp-20170730.tab" "results/lang_en.tab"; *) |
279 | - find_not_validated_forms compound_rule_trees results_path "lang_fr_sgjp-20170730.tab" "results/lang_fr.tab"; | |
279 | + (* find_not_validated_forms compound_rule_trees results_path "lang_fr_sgjp-20170730.tab" "results/lang_fr.tab"; *) | |
280 | 280 | (* find_not_validated_forms compound_rule_trees results_path "lang_de_sgjp-20170730.tab" "results/lang_de.tab"; *) |
281 | + (* find_not_validated_forms compound_rule_trees results_path "lang_acro_sgjp-20170730.tab" "results/lang_acro.tab"; *) | |
282 | + (* find_not_validated_forms compound_rule_trees results_path "lang_la_sgjp-20170730.tab" "results/lang_la.tab"; *) | |
283 | + (* find_not_validated_forms compound_rule_trees results_path "lang_es_sgjp-20170730.tab" "results/lang_es.tab"; *) | |
281 | 284 | () |
282 | 285 | |
283 | 286 | let find_not_validated_lemmata rules path filename out_filename = |
... | ... | @@ -409,14 +412,24 @@ let _ = |
409 | 412 | (* test_lemmatize "Radetzky" "Radetzky’ego"; *) |
410 | 413 | (* test_lemmatize "Max" "Maksa"; *) |
411 | 414 | (* test_lemmatize "Montesquieu" "Montesquieugo"; *) |
415 | + (* test_lemmatize "Depardieu" "Depardieugo"; *) | |
412 | 416 | (* test_lemmatize "Java" "Javie"; *) |
413 | 417 | (* test_lemmatize "anglaise" "anglaise’a"; *) |
414 | - test_lemmatize "Aristide" "Aristide’a"; | |
415 | - test_lemmatize "Beaumarchais" "Beaumarchais’go"; | |
418 | + (* test_lemmatize "Aristide" "Aristide’a"; *) | |
419 | + (* test_lemmatize "Barrès" "Barrès’go"; *) | |
420 | + (* test_lemmatize "Beaumarchais" "Beaumarchais’go"; *) | |
416 | 421 | (* test_lemmatize "Beauvoir" "Beauvoira"; *) |
417 | 422 | (* test_lemmatize "Bernoulli" "Bernoulliego"; *) |
418 | 423 | (* test_lemmatize "Astaire" "Astaire’a"; *) |
419 | 424 | (* test_lemmatize "Avignon" "Avignonami"; *) |
425 | + (* test_lemmatize "Benveniste" "Benveniste’a"; *) | |
426 | + (* test_lemmatize "Kayah" "Kai"; *) | |
427 | + (* test_lemmatize "jockey" "jockei"; *) | |
428 | + (* test_lemmatize "Radetzky" "Radetzky’ego"; *) | |
429 | + (* test_lemmatize "bonvivant" "bonvivantach"; *) | |
430 | + (* test_lemmatize "Lefebvre" "Lefebvre’a"; *) | |
431 | + (* test_lemmatize "Kayah" "Kayom"; *) | |
432 | + (* test_lemmatize "Dixa" "Diksie"; *) | |
420 | 433 | (* test_lemmatize "" ""; |
421 | 434 | test_lemmatize "" ""; |
422 | 435 | test_lemmatize "" ""; *) |
... | ... | @@ -503,6 +516,32 @@ let _ = |
503 | 516 | (* find_not_interp_validated_entries interp_compound_rule_trees results_path verb_sgjp_filename "results/selected_verb.tab"; *) |
504 | 517 | (* find_not_interp_validated_entries interp_compound_rule_trees results_path "verb_sgjp_no_pref.tab" "results/selected_verb.tab"; *) |
505 | 518 | (* find_not_interp_validated_forms interp_compound_rule_trees results_path "lang_en_sgjp-20170730.tab" "results/lang_en.tab"; *) |
519 | + (* find_not_interp_validated_forms interp_compound_rule_trees results_path "lang_fr_sgjp-20170730.tab" "results/lang_fr.tab"; *) | |
520 | + (* find_not_interp_validated_forms interp_compound_rule_trees results_path "lang_de_sgjp-20170730.tab" "results/lang_de.tab"; *) | |
521 | + (* find_not_interp_validated_forms interp_compound_rule_trees results_path "lang_acro_sgjp-20170730.tab" "results/lang_acro.tab"; *) | |
522 | + () | |
523 | + | |
524 | +let test_interp_lemmatize lemma orth = | |
525 | + printf "test_interp_lemmatize: %s %s\n%!" lemma orth; | |
526 | + let simple_lemma = Stem.simplify_lemma lemma in | |
527 | + let phon_orths = Fonetics.translate(*_and_check*) true Fonetics.rules (*Fonetics.rev_rules*) orth in | |
528 | + printf "phon_orths: \n %s\n%!" (String.concat "\n " (Xlist.map phon_orths Fonetics.string_of_phon)); | |
529 | + Xlist.iter phon_orths (fun phon_orth -> | |
530 | + Xlist.iter (Rules.CharTrees.find interp_compound_rule_trees phon_orth.phon) (fun (stem,rule) -> | |
531 | + let candidate_lemmas = Fonetics.rev_translate2 true Fonetics.rev_rules (stem ^ rule.set) (phon_orth.mapping) in | |
532 | + Xlist.iter candidate_lemmas (fun candidate_lemma -> | |
533 | + if candidate_lemma = simple_lemma then (*printf "E" else printf " ";*) | |
534 | + printf " %s %s %s %s %s\n%!" phon_orth.phon stem (string_of_rule rule) candidate_lemma rule.interp))) | |
535 | + | |
536 | +let _ = | |
537 | + (* test_interp_lemmatize "Benveniste" "Benveniście"; *) | |
538 | + (* test_interp_lemmatize "allemande" "allemandzie"; *) | |
539 | + (* test_interp_lemmatize "Depardieu" "Depardieuch"; *) | |
540 | + (* test_interp_lemmatize "Braille" "Braille’u"; *) | |
541 | + (* test_interp_lemmatize "FAMA" "FAMA"; *) | |
542 | + (*test_interp_lemmatize "" ""; | |
543 | + test_interp_lemmatize "" ""; | |
544 | + test_interp_lemmatize "" "";*) | |
506 | 545 | () |
507 | 546 | |
508 | 547 | (* Generowanie reguł dla interpretacji *) |
... | ... |