Commit 27ff726e3f89173b19f0b1a410c79fa25f606ed2
1 parent
af8828aa
przetwarzenie realizacji z Walentego
Showing
24 changed files
with
849 additions
and
901 deletions
lexSemantics/.gitignore
0 → 100644
1 | +test | |
... | ... |
lexSemantics/ENIAMlexSemantics.ml
1 | 1 | (* |
2 | 2 | * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
3 | + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | 5 | * |
6 | 6 | * This library is free software: you can redistribute it and/or modify |
7 | 7 | * it under the terms of the GNU Lesser General Public License as published by |
... | ... |
lexSemantics/ENIAMlexSemanticsData.ml
1 | 1 | (* |
2 | 2 | * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
3 | + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | 5 | * |
6 | 6 | * This library is free software: you can redistribute it and/or modify |
7 | 7 | * it under the terms of the GNU Lesser General Public License as published by |
... | ... |
lexSemantics/ENIAMlexSemanticsTypes.ml
1 | 1 | (* |
2 | 2 | * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
3 | + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | 5 | * |
6 | 6 | * This library is free software: you can redistribute it and/or modify |
7 | 7 | * it under the terms of the GNU Lesser General Public License as published by |
... | ... |
lexSemantics/ENIAMvalence.ml
0 → 100644
1 | +(* | |
2 | + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. | |
3 | + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | + * | |
6 | + * This library is free software: you can redistribute it and/or modify | |
7 | + * it under the terms of the GNU Lesser General Public License as published by | |
8 | + * the Free Software Foundation, either version 3 of the License, or | |
9 | + * (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + * GNU Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public License | |
17 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | + *) | |
19 | + | |
20 | +open ENIAMwalTypes | |
21 | +open Xstd | |
22 | + | |
23 | +let simplify_pos = function | |
24 | + "subst" -> "noun" | |
25 | + | "depr" -> "noun" | |
26 | + | "psubst" -> "noun" | |
27 | + | "pdepr" -> "noun" | |
28 | + | "adj" -> "adj" | |
29 | + | "adjc" -> "adj" | |
30 | + | "adjp" -> "adj" | |
31 | + | "ger" -> "verb" | |
32 | + | "pact" -> "verb" | |
33 | + | "ppas" -> "verb" | |
34 | + | "fin" -> "verb" | |
35 | + | "bedzie" -> "verb" | |
36 | + | "praet" -> "verb" | |
37 | + | "winien" -> "verb" | |
38 | + | "impt" -> "verb" | |
39 | + | "imps" -> "verb" | |
40 | + | "inf" -> "verb" | |
41 | + | "pcon" -> "verb" | |
42 | + | "pant" -> "verb" | |
43 | + | "pred" -> "verb" | |
44 | + | "ppron12" -> "pron" | |
45 | + | "ppron3" -> "pron" | |
46 | + | "siebie" -> "pron" | |
47 | + | s -> s | |
48 | + | |
49 | +let transform_zeby = function | |
50 | + Aff -> [Comp "że"] | |
51 | + | Negation -> [Comp "że";Comp "żeby"] | |
52 | + | NegationUndef -> [Comp "że";Comp "żeby"] | |
53 | + | |
54 | +let transform_gdy = function | |
55 | + "indicative" -> [Comp "gdy"] | |
56 | + | "imperative" -> [Comp "gdy"] | |
57 | + | "conditional" -> [Comp "gdyby"] | |
58 | + | "gerundial" -> [Comp "gdy"] | |
59 | + | "" -> [Comp "gdy";Comp "gdyby"] | |
60 | + | s -> failwith ("transform_gdy: " ^ s) | |
61 | + | |
62 | +let transform_comp negation mood = function | |
63 | + Comp comp -> [Comp comp] | |
64 | + | Zeby -> transform_zeby negation | |
65 | + | Gdy -> transform_gdy mood | |
66 | + | CompUndef -> [CompUndef] | |
67 | + | |
68 | +let transform_str mood negation = | |
69 | + if mood = "gerundial" then [Case "gen"] else | |
70 | + match negation with | |
71 | + Aff -> [Case "acc"] | |
72 | + | Negation -> [Case "gen"] | |
73 | + | NegationUndef -> [Case "acc";Case "gen"] | |
74 | + | |
75 | +let transform_np_phrase lemma = function | |
76 | + NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)] | |
77 | + | NP(CaseAgr) -> [NP(CaseAgr)(*;NumP(CaseAgr)*)] | |
78 | + | NCP(Case c,ctype,comp) -> [NCP(Case c,ctype,comp)] | |
79 | + | AdjP(Case _) as morf -> [morf] (* tylko 'zagłada adjp(postp)' *) | |
80 | + | AdjP(CaseAgr) -> [AdjP(AllAgr)] | |
81 | + | AdjP(Str) -> [AdjP(AllAgr)] (* chyba błąd walentego, tylko 'barwa', 'bieda', 'głód', 'kolor', 'nędza', 'śmierć', 'usta' *) | |
82 | + | CP(ctype,comp) as morf -> [morf] | |
83 | + | PrepNP _ as morf -> [morf] | |
84 | + | PrepAdjP _ as morf -> [morf] (* to wygląda seryjny błąd w Walentym xp(abl[prepadjp(z,gen)]) *) | |
85 | + | ComprepNP _ as morf -> [morf] | |
86 | + | ComparP _ as morf -> [morf] | |
87 | + | PrepNCP _ as morf -> [morf] | |
88 | + | AdvP as morf -> [morf] (* występuje tylko w lematach: cyrk, trwałość x2, zagłada *) | |
89 | + | FixedP _ as morf -> [morf] | |
90 | + | Or as morf -> [morf] | |
91 | + (* | Pro as morf -> [morf] *) | |
92 | + | Null as morf -> [morf] | |
93 | + | phrase -> print_endline ("transform_np_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase] | |
94 | + | |
95 | +let transform_np_pos lemma = function | |
96 | + | SUBST(_,Case _) as morf -> [morf] | |
97 | + | PPRON3(_,Case _) as morf -> [morf] | |
98 | + | SUBST(_,CaseAgr) as morf -> [morf] | |
99 | + | SUBST(n,Str) -> [ADJ(n,AllAgr,GenderUndef,Grad "pos")] (* błąd walentym: 'zła godzina' *) | |
100 | + | ADJ(_,Case _,_,_) as morf -> [morf] | |
101 | + | ADJ(n,CaseAgr,GenderAgr,gr) -> [ADJ(n,AllAgr,GenderAgr,gr)] | |
102 | + | PACT(n,CaseAgr,g,a,neg) -> [PACT(n,AllAgr,g,a,neg)] | |
103 | + | PPAS(_,Case _,_,_,_) as morf -> [morf] | |
104 | + | PPAS(n,CaseAgr,g,a,neg) -> [PPAS(n,AllAgr,g,a,neg)] | |
105 | + | ADJ(n,Str,g,gr) -> [ADJ(n,AllAgr,g,gr)] | |
106 | + | PPAS(n,Str,g,a,neg) -> [PPAS(n,AllAgr,g,a,neg)] | |
107 | + | PREP(Case _) as morf -> [morf] | |
108 | + | ADV _ as morf -> [morf] (* tu trafiają przysłówkowe realizacje *) | |
109 | + | COMP _ as morf -> [morf] | |
110 | + | QUB as morf -> [morf] | |
111 | + | PERS _ as morf -> [morf] | |
112 | + | pos -> print_endline ("transform_np_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos] | |
113 | + | |
114 | +let transform_adj_phrase lemma = function | |
115 | + NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)] | |
116 | + | NP(Part) -> [NP(Case "gen");NP(Case "acc")(*;NumP(Case "gen");NumP(Case "acc")*)] (* jedno wystąpienie 'krewny' *) | |
117 | + | NCP(Case c,ctype,comp) -> [NCP(Case c,ctype,comp)] | |
118 | + | AdjP(CaseAgr) -> [AdjP(AllAgr)] (* jedno wystąpienie 'cały szczęśliwy', może się przydać podniesienie typu *) | |
119 | + | CP(ctype,comp) as morf -> [morf] | |
120 | + | PrepNP _ as morf -> [morf] | |
121 | + | PrepAdjP _ as morf -> [morf] | |
122 | + | ComprepNP _ as morf -> [morf] | |
123 | + | ComparP _ as morf -> [morf] | |
124 | + | PrepNCP _ as morf -> [morf] | |
125 | + | InfP _ as morf -> [morf] | |
126 | + | AdvP as morf -> [morf] | |
127 | + (* | FixedP _ as morf -> [morf]*) | |
128 | + | Or as morf -> [morf] (* jedno wystąpienie 'jednoznaczny' *) | |
129 | + (* | Pro as morf -> [morf] *) | |
130 | + | Null as morf -> [morf] | |
131 | + | morf -> print_endline ("transform_adj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf] | |
132 | + | |
133 | +let transform_adj_pos lemma = function | |
134 | + | ADJ(n,CaseAgr,g,gr) -> [ADJ(n,AllAgr,g,gr)] | |
135 | + | PREP(Case _) as morf -> [morf] | |
136 | + | ADV _ as morf -> [morf] | |
137 | + | QUB as morf -> [morf] | |
138 | + | morf -> print_endline ("transform_adj_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos morf); [morf] | |
139 | + | |
140 | +let transform_adv_phrase lemma = function | |
141 | + NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)] | |
142 | + | NCP(Case c,ctype,comp) -> [NCP(Case c,ctype,comp)] | |
143 | + | CP(ctype,comp) as morf -> [morf] | |
144 | + | PrepNP _ as morf -> [morf] | |
145 | + | PrepAdjP _ as morf -> [morf] | |
146 | + | ComprepNP _ as morf -> [morf] | |
147 | + | ComparP _ as morf -> [morf] | |
148 | + | PrepNCP _ as morf -> [morf] | |
149 | + | InfP _ as morf -> [morf] | |
150 | + | AdvP as morf -> [morf] | |
151 | +(* | Or as morf -> [morf]*) | |
152 | + (* | Pro as morf -> [morf] *) | |
153 | + | Null as morf -> [morf] | |
154 | +(* | AdjP(CaseAgr) as morf -> [morf]*) | |
155 | + (* | FixedP _ as morf -> [morf]*) | |
156 | + | morf -> print_endline ("transform_adv_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf] | |
157 | + | |
158 | +let transform_adv_pos lemma = function | |
159 | + SUBST(_,Case _) as morf -> [morf] | |
160 | + (* | ADJ(_,CaseAgr,_,_) as morf -> [morf]*) | |
161 | + | COMP _ as morf -> [morf] | |
162 | + | PREP(Case _) as morf -> [morf] | |
163 | + | COMPAR _ as morf -> [morf] | |
164 | + | ADV _ as morf -> [morf] (* tu trafiają przysłówkowe realizacje *) | |
165 | + | morf -> print_endline ("transform_adv_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos morf); [morf] | |
166 | + | |
167 | +let transform_prep_phrase lemma = function | |
168 | + | phrase -> print_endline ("transform_prep_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase] | |
169 | + | |
170 | +let transform_prep_pos lemma = function | |
171 | + | SUBST(_,Case _) as morf -> [morf] | |
172 | + | SIEBIE(Case _) as morf -> [morf] | |
173 | + | PPRON12(_,Case _) as morf -> [morf] | |
174 | + | PPRON3(_,Case _) as morf -> [morf] | |
175 | + | SUBST(n,Str) -> [SUBST(n,CaseAgr)] | |
176 | + | NUM(Case _,_) as morf -> [morf] | |
177 | + | ADJ(_,Case _,_,_) as morf -> [morf] | |
178 | + | GER(_,Case _,_,_,_) as morf -> [morf] | |
179 | + | PPAS(_,Case _,_,_,_) as morf -> [morf] | |
180 | +(* | ADV _ as morf -> [morf] | |
181 | + | QUB as morf -> [morf]*) | |
182 | + | pos -> print_endline ("transform_prep_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos] | |
183 | + | |
184 | +let transform_comprep_phrase lemma = function | |
185 | + NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)] | |
186 | + | NCP(Case c,ctype,comp) -> [NCP(Case c,ctype,comp)] | |
187 | + | PrepNP _ as morf -> [morf] | |
188 | + | PrepNCP _ as morf -> [morf] | |
189 | + | phrase -> print_endline ("transform_comprep_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase] | |
190 | + | |
191 | +let transform_comprep_pos lemma = function | |
192 | + | SUBST(_,Case _) as morf -> [morf] | |
193 | +(* | SUBST(n,Str) -> [SUBST(n,CaseAgr)]*) | |
194 | + | NUM(Case _,_) as morf -> [morf] | |
195 | +(* | ADJ(_,Case _,_,_) as morf -> [morf] | |
196 | + | GER(_,Case _,_,_,_,_) as morf -> [morf] | |
197 | + | PPAS(_,Case _,_,_,_) as morf -> [morf] | |
198 | + | ADV _ as morf -> [morf] | |
199 | + | QUB as morf -> [morf]*) | |
200 | + | pos -> print_endline ("transform_comprep_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos] | |
201 | + | |
202 | +let transform_compar_phrase lemma = function | |
203 | + | NP(Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> NP(Case case)) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: uzgodnić a komparatywem *) | |
204 | + | FixedP _ as morf -> [morf] | |
205 | + | phrase -> print_endline ("transform_compar_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase] | |
206 | + | |
207 | +let transform_compar_pos lemma = function | |
208 | + | SUBST(_,Case _) as morf -> [morf] | |
209 | + | ADJ(_,Case _,_,_) as morf -> [morf] | |
210 | + | PREP(Case _) as morf -> [morf] | |
211 | + | SUBST(Number n,Str) -> [SUBST(Number n,CaseUndef)] | |
212 | + | SUBST(NumberAgr,Str) -> [SUBST(NumberUndef,CaseUndef)] | |
213 | + | SUBST(NumberUndef,Str) -> [SUBST(NumberUndef,CaseUndef)] | |
214 | + | PPAS(_,Case _,_,_,_) as morf -> [morf] | |
215 | + | PPAS(NumberAgr,Str,GenderAgr,a,neg) -> [PPAS(NumberUndef,CaseUndef,GenderUndef,a,neg)] (* FIXME: ta sama sytuacja co w "jako" *) | |
216 | + | PPAS(NumberAgr,CaseAgr,GenderAgr,a,neg) -> [PPAS(NumberUndef,CaseUndef,GenderUndef,a,neg)] (* FIXME: ta sama sytuacja co w "jako" *) | |
217 | + | ADJ(NumberAgr,Str,GenderAgr,gr) -> [ADJ(NumberUndef,CaseUndef,GenderUndef,gr)] (* FIXME: ta sama sytuacja co w "jako" *) | |
218 | + | ADJ(NumberAgr,CaseAgr,GenderAgr,gr) -> [ADJ(NumberUndef,CaseUndef,GenderUndef,gr)] (* FIXME: ta sama sytuacja co w "jako" *) | |
219 | + | NUM(Case _,_) as morf -> [morf] | |
220 | + | pos -> print_endline ("transform_compar_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos] | |
221 | + | |
222 | +let transform_comp_phrase lemma = function | |
223 | + | phrase -> print_endline ("transform_comp_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase] | |
224 | + | |
225 | +let transform_comp_pos lemma = function | |
226 | + | PERS _ as morf -> [morf] | |
227 | + | pos -> print_endline ("transform_comp_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos] | |
228 | + | |
229 | +let transform_qub_phrase lemma = function | |
230 | + | phrase -> print_endline ("transform_qub_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase] | |
231 | + | |
232 | +let transform_qub_pos lemma = function | |
233 | + | QUB as morf -> [morf] | |
234 | + | pos -> print_endline ("transform_qub_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos] | |
235 | + | |
236 | +let transform_siebie_phrase lemma = function | |
237 | + | phrase -> print_endline ("transform_siebie_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase); [phrase] | |
238 | + | |
239 | +let transform_siebie_pos lemma = function | |
240 | + | ADJ(NumberAgr,CaseAgr,GenderAgr,gr) -> [ADJ(NumberAgr,AllAgr,GenderAgr,gr)] | |
241 | + | pos -> print_endline ("transform_siebie_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos); [pos] | |
242 | + | |
243 | +let transform_pers_subj_phrase lemma negation mood = function (* FIXME: prepnp(na,loc) *) | |
244 | + | NP(Str) -> [NP(NomAgr)(*;NumP(NomAgr)*)] | |
245 | + | NP(Part) -> [NP(Case "gen");NP(Case "acc")(*;NumP(Case "gen");NumP(Case "acc")*)] (* tylko w 'nalewać', 'nalać', 'ponalewać', 'najechać','uzbierać' *) | |
246 | + | NCP(Str,ctype,comp) -> [NCP(NomAgr,ctype,comp)] | |
247 | + | CP(ctype,comp) as morf -> [morf] | |
248 | + | InfP _ as morf -> [morf] | |
249 | + | Or as morf -> [morf] | |
250 | + (* | Pro -> [ProNG] *) | |
251 | + | Null -> [Null] | |
252 | + | morf -> print_endline ("transform_pers_subj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf] | |
253 | + | |
254 | +let transform_pers_subj_pos lemma negation mood = function | |
255 | + (* COMP _ as morf -> [morf]*) | |
256 | + | SUBST(n,Str) -> [SUBST(n,NomAgr)] | |
257 | + | SUBST(n,Case "nom") -> [SUBST(n,NomAgr)] (* wygląda na błąd Walentego, ale nie ma znaczenia *) | |
258 | + | NUM(Str,g) -> [NUM(NomAgr,g)] | |
259 | + | NUM(Case "nom",g) -> [NUM(NomAgr,g)] | |
260 | +(* | ADJ(n,Str,g,gr) -> [ADJ(n,NomAgr,g,gr)]*) | |
261 | + | morf -> print_endline ("transform_pers_subj_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos morf); [morf] | |
262 | + | |
263 | +let transform_ger_subj_phrase lemma negation mood = function | |
264 | + | NP(Str) -> [NP(Case "gen");PrepNP("przez",Case "acc")(*;NumP(Case "gen")*)(*;PrepNumP("przez",Case "acc")*)] (* FIXME: czy przez:acc jest możliwe? *) | |
265 | + | NP(Part) -> [NP(Case "gen")(*;NP(Case "acc")*)(*;NumP(Case "gen");NumP(Case "acc")*)] | |
266 | + | NCP(Str,ctype,comp) -> [NCP(Case "gen",ctype,comp);PrepNCP("przez",Case "acc",ctype,comp)] (* FIXME: czy przez:acc jest możliwe? *) | |
267 | + | CP(ctype,comp) as morf -> [morf] | |
268 | + | InfP _ as morf -> [morf] (* FIXME: czy to jest możliwe? *) | |
269 | + | Or as morf -> [morf] | |
270 | + (* | Pro -> if control then [Pro] else [Null] *) | |
271 | + | Null -> [Null] | |
272 | + | morf -> print_endline ("transform_ger_subj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf] | |
273 | + | |
274 | +let transform_ger_subj_pos lemma negation mood = function (* FIXME: ADV(_) *) | |
275 | + (* COMP _ as morf -> [morf] (* FIXME: czy to jest możliwe? *)*) | |
276 | + | SUBST(n,Str) -> [SUBST(n,Case "gen")] | |
277 | + | SUBST(n,Case "nom") -> [SUBST(n,Case "gen")] (* wygląda na błąd Walentego, ale nie ma znaczenia *) | |
278 | + | NUM(Str,g) -> [NUM(Case "gen",g)] | |
279 | + | NUM(Case "nom",g) -> [NUM(Case "gen",g)] | |
280 | +(* | ADJ(n,Str,g,gr) -> [ADJ(n,Case "gen",g,gr)]*) | |
281 | + | morf -> print_endline ("transform_pers_subj_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos morf); [morf] | |
282 | + | |
283 | +let transform_ppas_subj_phrase lemma negation mood control = function | |
284 | + | NP(Str) -> [PrepNP("przez",Case "acc")(*;PrepNumP("przez",Case "acc")*)] | |
285 | + | NCP(Str,ctype,comp) -> [PrepNCP("przez",Case "acc",ctype,comp)] | |
286 | + | CP(ctype,comp) as morf -> [morf] | |
287 | + (* | Pro -> if control then [Pro] else [Null] *) | |
288 | + | morf -> print_endline ("transform_ppas_subj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf] | |
289 | + | |
290 | +let transform_pers_phrase lemma negation mood = function | |
291 | + | NP(Str) -> List.flatten (Xlist.map (transform_str mood negation) (fun case -> [NP case(*;NumP(case)*)])) | |
292 | + | NP(Part) -> [NP(Case "gen")] @ (if mood = "gerundial" then [] else [NP(Case "acc")(*;NumP(Case "gen");NumP(Case "acc")*)]) | |
293 | + | NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)] | |
294 | + | NCP(Str,ctype,comp) -> List.flatten (Xlist.map (transform_str mood negation) (fun case -> [NCP(case,ctype,comp)])) | |
295 | + | NCP(Part,ctype,comp) -> List.flatten (Xlist.map (transform_str mood negation) (fun case -> [NCP(case,ctype,comp)])) | |
296 | + | NCP(Case case,ctype,comp) -> [NCP(Case case,ctype,comp)] | |
297 | + | AdjP(Str) -> Xlist.map (transform_str mood negation) (fun case -> AdjP case) (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *) | |
298 | + | AdjP CaseAgr as morf -> if mood = "gerundial" then [AdjP AllAgr] else (print_endline ("transform_pers_phrase2: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf]) | |
299 | + | AdjP(Case _) as morf -> [morf] (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *) | |
300 | + | CP(ctype,comp) as morf -> [morf] | |
301 | + | PrepNP _ as morf -> [morf] | |
302 | + | PrepAdjP _ as morf -> [morf] (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *) | |
303 | + | ComprepNP _ as morf -> [morf] | |
304 | + | ComparP _ as morf -> [morf] | |
305 | + | PrepNCP _ as morf -> [morf] | |
306 | + | InfP _ as morf -> [morf] | |
307 | + | AdvP -> if mood = "gerundial" then [AdjP AllAgr] else [AdvP] | |
308 | + | FixedP _ as morf -> [morf] | |
309 | + | Or as morf -> [morf] | |
310 | + (* | Pro as morf -> [morf] *) | |
311 | + | Null as morf -> [morf] | |
312 | + | morf -> print_endline ("transform_pers_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase morf); [morf] | |
313 | + | |
314 | +let transform_pers_pos lemma negation mood = function | |
315 | + | SUBST(n,Str) -> Xlist.map (transform_str mood negation) (fun case -> SUBST(n,case)) | |
316 | + | PPRON12(n,Str) -> Xlist.map (transform_str mood negation) (fun case -> PPRON12(n,case)) | |
317 | + | PPRON3(n,Str) -> Xlist.map (transform_str mood negation) (fun case -> PPRON3(n,case)) | |
318 | + | SIEBIE(Str) -> Xlist.map (transform_str mood negation) (fun case -> SIEBIE(case)) | |
319 | + | NUM(Str,g) -> Xlist.map (transform_str mood negation) (fun case -> NUM(case,g)) | |
320 | + | ADJ(n,Str,g,gr) -> Xlist.map (transform_str mood negation) (fun case -> ADJ(n,case,g,gr)) | |
321 | +(* | PPAS(n,Str,g,a,neg) -> Xlist.map (transform_str negation) (fun case -> PPAS(n,Str,g,a,neg))*) | |
322 | + | SUBST(n,Part) -> [SUBST(n,Case "gen");SUBST(n,Case "acc")] | |
323 | + | ADJ(n,Part,g,gr) -> [ADJ(n,Case "gen",g,gr);ADJ(n,Case "acc",g,gr)] | |
324 | + | ADJ(n,CaseAgr,g,gr) as morf -> if lemma = "siedzieć" then [morf] else (print_endline ("transform_pers_pos2: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos morf); [morf]) (* FIXME *) | |
325 | + | SUBST(_,Case _) as morf -> [morf] | |
326 | + | PPRON12(_,Case _) as morf -> [morf] | |
327 | + | PPRON3(_,Case _) as morf -> [morf] | |
328 | + | SIEBIE(Case _) as morf -> [morf] | |
329 | + | NUM(Case _,_) as morf -> [morf] | |
330 | + | PREP _ as morf -> [morf] | |
331 | + | ADJ(_,Case _,_,_) as morf -> [morf] | |
332 | + | PPAS(_,Case _,_,_,_) as morf -> [morf] | |
333 | +(* | SUBST(n,CaseAgr) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> SUBST(n,Case case)) (* FIXME: sprawdzić kto kontroluje! *) | |
334 | + | ADJ(n,CaseAgr,g,gr) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> ADJ(n,Case case,g,gr)) (* FIXME: sprawdzić kto kontroluje! *)*) | |
335 | + | COMPAR _ as morf -> [morf] | |
336 | + | COMP _ as morf -> [morf] | |
337 | + | INF _ as morf -> [morf] | |
338 | + | QUB as morf -> [morf] | |
339 | + | ADV grad -> (*if mood = "gerundial" then [ADJ(NumberAgr,AllAgr,GenderAgr,grad)] else*) [ADV grad] (* FIXME: to nie poprawi lematu *) | |
340 | + | PERS _ as morf -> [morf] | |
341 | + | morf -> print_endline ("transform_pers_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos morf); [morf] | |
342 | + | |
343 | +let rec transform_comps negation mood = function | |
344 | + | CP(ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> CP(ctype,comp)) | |
345 | + | NCP(case,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> NCP(case,ctype,comp)) | |
346 | + | PrepNCP(prep,case,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> PrepNCP(prep,case,ctype,comp)) | |
347 | + | E phrase -> Xlist.map (transform_comps negation mood phrase) (fun phrase -> E phrase) | |
348 | + | morf -> [morf] | |
349 | + | |
350 | +let compars = StringSet.of_list ["jak"; "jako"; "niż"; "niczym" ;"niby"; "co"; "zamiast"] | |
351 | + | |
352 | +let is_compar lex = StringSet.mem compars lex | |
353 | + | |
354 | +(* FIXME: pomijam uzgadnianie przypadku, liczby i rodzaju - wykonalne za pomocą kontroli *) | |
355 | +let transform_preps morf = | |
356 | + let morf = match morf with | |
357 | + | LexArg(id,lex,PREP c) -> if is_compar lex then LexArg(id,lex,COMPAR c) else LexArg(id,lex,PREP c) | |
358 | + | SimpleLexArg(lex,PREP c) -> if is_compar lex then SimpleLexArg(lex,COMPAR c) else SimpleLexArg(lex,PREP c) | |
359 | + | PrepNP(prep,c) -> if is_compar prep then ComparP(prep,c) else PrepNP(prep,c) | |
360 | + | PrepAdjP(prep,c) -> if is_compar prep then ComparP(prep,c) else PrepAdjP(prep,c) | |
361 | + | PrepNCP(prep,case,ctype,comp) as morf -> if is_compar prep then failwith "transform_preps" else morf | |
362 | + | morf -> morf in | |
363 | + match morf with | |
364 | + | ComparP(prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> ComparP(prep,Case case)) | |
365 | + | ComparP _ -> failwith "transform_preps" | |
366 | + | LexArg(id,lex,COMPAR Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> LexArg(id,lex,COMPAR (Case case))) | |
367 | + | SimpleLexArg(lex,COMPAR Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> SimpleLexArg(lex,COMPAR (Case case))) | |
368 | + | LexArg(id,lex,COMPAR (Case _)) as morf -> [morf] | |
369 | + | SimpleLexArg(lex,COMPAR (Case _)) as morf -> [morf] | |
370 | + | LexArg(id,lex,COMPAR _) -> failwith "transform_preps" | |
371 | + | SimpleLexArg(lex,COMPAR _) -> failwith "transform_preps" | |
372 | + | PrepNP("per",Str) -> [PrepNP("per",Case "nom");PrepNP("per",Case "voc")] (* FIXME: voc do poprawienie w leksykonie *) | |
373 | + | PrepNP(_,Case _) as morf -> [morf] | |
374 | + | PrepAdjP(_,Case _) as morf -> [morf] | |
375 | + | PrepNCP(_,Case _,_,_) as morf -> [morf] | |
376 | + | PrepNP _ -> failwith "transform_preps" | |
377 | + | PrepAdjP _ -> failwith "transform_preps" | |
378 | + | PrepNCP _ -> failwith "transform_preps" | |
379 | + | LexArg(id,"w",PREP Str) -> [LexArg(id,"w",PREP (Case "acc"));LexArg(id,"w",PREP (Case "loc"));] | |
380 | + | SimpleLexArg("w",PREP Str) -> [SimpleLexArg("w",PREP (Case "acc"));SimpleLexArg("w",PREP (Case "loc"))] | |
381 | + | LexArg(id,lex,PREP (Case _)) as morf -> [morf] | |
382 | + | SimpleLexArg(lex,PREP (Case _)) as morf -> [morf] | |
383 | + | LexArg(id,lex,PREP _) -> failwith "transform_preps" | |
384 | + | SimpleLexArg(lex,PREP _) -> failwith "transform_preps" | |
385 | + | morf -> [morf] | |
386 | + | |
387 | +let transform_pers_schema lemma negation mood schema = | |
388 | + Xlist.map schema (fun s -> | |
389 | + {s with morfs = | |
390 | + let morfs = List.flatten (Xlist.map s.morfs (transform_comps negation mood)) in | |
391 | + let morfs = List.flatten (Xlist.map morfs transform_preps) in | |
392 | + if s.gf = SUBJ then List.flatten (Xlist.map morfs (function | |
393 | + | E phrase -> Xlist.map (transform_pers_subj_phrase lemma negation mood phrase) (fun phrase -> E phrase) | |
394 | + | LexArg(id,lex,pos) -> Xlist.map (transform_pers_subj_pos lemma negation mood pos) (fun pos -> LexArg(id,lex,pos)) | |
395 | + | SimpleLexArg(lex,pos) -> Xlist.map (transform_pers_subj_pos lemma negation mood pos) (fun pos -> SimpleLexArg(lex,pos)) | |
396 | + | phrase -> transform_pers_subj_phrase lemma negation mood phrase)) | |
397 | + else List.flatten (Xlist.map s.morfs (function | |
398 | + | LexArg(id,lex,pos) -> Xlist.map (transform_pers_pos lemma negation mood pos) (fun pos -> LexArg(id,lex,pos)) | |
399 | + | SimpleLexArg(lex,pos) -> Xlist.map (transform_pers_pos lemma negation mood pos) (fun pos -> SimpleLexArg(lex,pos)) | |
400 | + | phrase -> transform_pers_phrase lemma negation mood phrase))}) | |
401 | + | |
402 | +let transform_nosubj_schema lemma negation mood schema = | |
403 | + Xlist.map schema (fun s -> | |
404 | + {s with morfs = | |
405 | + let morfs = List.flatten (Xlist.map s.morfs (transform_comps negation mood)) in | |
406 | + let morfs = List.flatten (Xlist.map morfs transform_preps) in | |
407 | + if s.gf = SUBJ then [Null] | |
408 | + else List.flatten (Xlist.map morfs (function | |
409 | + | LexArg(id,lex,pos) -> Xlist.map (transform_pers_pos lemma negation mood pos) (fun pos -> LexArg(id,lex,pos)) | |
410 | + | SimpleLexArg(lex,pos) -> Xlist.map (transform_pers_pos lemma negation mood pos) (fun pos -> SimpleLexArg(lex,pos)) | |
411 | + | phrase -> transform_pers_phrase lemma negation mood phrase))}) | |
412 | + | |
413 | +(* let transform_ger_adv_lex = function | |
414 | + | s -> print_endline ("transform_ger_adv_lex: " ^ s); s | |
415 | + | |
416 | +let transform_ger_adv_pos = function | |
417 | + | LexArg(id,lex,ADV grad) -> LexArg(id,transform_ger_adv_lex lex,ADJ(NumberAgr,AllAgr,GenderAgr,grad)) | |
418 | + | SimpleLexArg(lex,ADV grad) -> SimpleLexArg(transform_ger_adv_lex lex,ADJ(NumberAgr,AllAgr,GenderAgr,grad)) | |
419 | + | morf -> morf *) | |
420 | + | |
421 | +let transform_ger_schema lemma negation schema = (* FIXME: zakładam, że ger zeruje mood, czy to prawda? *) | |
422 | + Xlist.map schema (fun s -> | |
423 | + {s with morfs = | |
424 | + let morfs = List.flatten (Xlist.map s.morfs (transform_comps negation "gerundial")) in | |
425 | + let morfs = List.flatten (Xlist.map morfs transform_preps) in | |
426 | + (* let morfs = Xlist.map morfs transform_ger_adv_pos in *) | |
427 | + if s.gf = SUBJ then List.flatten (Xlist.map morfs (function | |
428 | + | E phrase -> Xlist.map (transform_ger_subj_phrase lemma negation "gerundial" phrase) (fun phrase -> E phrase) | |
429 | + | LexArg(id,lex,pos) -> Xlist.map (transform_ger_subj_pos lemma negation "gerundial" pos) (fun pos -> LexArg(id,lex,pos)) | |
430 | + | SimpleLexArg(lex,pos) -> Xlist.map (transform_ger_subj_pos lemma negation "gerundial" pos) (fun pos -> SimpleLexArg(lex,pos)) | |
431 | + | phrase -> transform_ger_subj_phrase lemma negation "gerundial" phrase)) | |
432 | + else List.flatten (Xlist.map s.morfs (function | |
433 | + | LexArg(id,lex,pos) -> Xlist.map (transform_pers_pos lemma negation "gerundial" pos) (fun pos -> LexArg(id,lex,pos)) | |
434 | + | SimpleLexArg(lex,pos) -> Xlist.map (transform_pers_pos lemma negation "gerundial" pos) (fun pos -> SimpleLexArg(lex,pos)) | |
435 | + | phrase -> transform_pers_phrase lemma negation "gerundial" phrase))}) | |
436 | + | |
437 | +let transform_ppas_schema lemma negation mood schema = | |
438 | + if not (Xlist.fold schema false (fun b p -> if p.gf = OBJ then true else b)) then raise Not_found else | |
439 | + Xlist.map schema (fun s -> | |
440 | + let morfs = List.flatten (Xlist.map s.morfs (transform_comps negation mood)) in | |
441 | + let morfs = List.flatten (Xlist.map morfs transform_preps) in | |
442 | + {s with morfs = | |
443 | + if s.gf = OBJ then [Null] else | |
444 | + if s.gf = SUBJ then List.flatten (Xlist.map morfs (function | |
445 | + | E phrase -> raise Not_found (* tylko 'obladzać' i 'oblodzić', chyba błąd *) | |
446 | + | LexArg(id,lex,SUBST(n,Str)) -> raise Not_found (* FIXME!!! *) | |
447 | + | SimpleLexArg(lex,SUBST(n,Str)) -> raise Not_found (* FIXME!!! *) | |
448 | + | phrase -> transform_ppas_subj_phrase lemma negation mood (s.cr <> [] || s.ce <> []) phrase)) | |
449 | + else List.flatten (Xlist.map s.morfs (function | |
450 | + | LexArg(id,lex,pos) -> Xlist.map (transform_pers_pos lemma negation mood pos) (fun pos -> LexArg(id,lex,pos)) | |
451 | + | SimpleLexArg(lex,pos) -> Xlist.map (transform_pers_pos lemma negation mood pos) (fun pos -> SimpleLexArg(lex,pos)) | |
452 | + | phrase -> transform_pers_phrase lemma negation mood phrase))}) | |
453 | + | |
454 | +let transform_num_schema acm schema = | |
455 | + Xlist.map schema (fun s -> | |
456 | + {s with morfs=List.flatten (Xlist.map s.morfs (function | |
457 | + | Null -> [Null] | |
458 | + | LexArg(id,lex,SUBST(NumberUndef,CaseUndef)) -> | |
459 | + (match acm with | |
460 | + "rec" -> [LexArg(id,lex,SUBST(NumberUndef,GenAgr))] | |
461 | + | "congr" -> [LexArg(id,lex,SUBST(NumberUndef,AllAgr))] | |
462 | + | _ -> failwith "transform_num_schema") | |
463 | + | SimpleLexArg(lex,SUBST(NumberUndef,CaseUndef)) -> | |
464 | + (match acm with | |
465 | + "rec" -> [SimpleLexArg(lex,SUBST(NumberUndef,GenAgr))] | |
466 | + | "congr" -> [SimpleLexArg(lex,SUBST(NumberUndef,AllAgr))] | |
467 | + | _ -> failwith "transform_num_schema") | |
468 | + | morf -> failwith ("transform_num_schema: " ^ ENIAMwalStringOf.phrase morf)))}) | |
469 | + | |
470 | +let transform_schema pos lemma schema = | |
471 | + let phrase_fun,pos_fun = match pos with | |
472 | + "subst" -> transform_np_phrase,transform_np_pos | |
473 | + | "adj" -> transform_adj_phrase,transform_adj_pos | |
474 | + | "adv" -> transform_adv_phrase,transform_adv_pos | |
475 | + | "prep" -> transform_prep_phrase,transform_prep_pos | |
476 | + | "comprep" -> transform_comprep_phrase,transform_comprep_pos | |
477 | + | "compar" -> transform_compar_phrase,transform_compar_pos | |
478 | + | "comp" -> transform_comp_phrase,transform_comp_pos | |
479 | + | "qub" -> transform_qub_phrase,transform_qub_pos | |
480 | + | "siebie" -> transform_siebie_phrase,transform_siebie_pos | |
481 | + | _ -> failwith "transform_schema" | |
482 | + in | |
483 | + Xlist.map schema (fun s -> | |
484 | + let morfs = List.flatten (Xlist.map s.morfs (transform_comps NegationUndef "")) in (* FIXME: zależność od trybu warunkowego i negacji *) | |
485 | + {s with morfs=List.flatten (Xlist.map morfs (function | |
486 | + LexArg(id,lex,pos) -> Xlist.map (pos_fun lemma pos) (fun pos -> LexArg(id,lex,pos)) | |
487 | + | SimpleLexArg(lex,pos) -> Xlist.map (pos_fun lemma pos) (fun pos -> SimpleLexArg(lex,pos)) | |
488 | + | phrase -> phrase_fun lemma phrase))}) | |
489 | + | |
490 | +let rec remove_adj_agr = function | |
491 | + [] -> [] | |
492 | + | {morfs=[Null;AdjP(CaseAgr)]} :: l -> remove_adj_agr l | |
493 | + | {morfs=[Null;AdjP(Part)]} :: l -> remove_adj_agr l | |
494 | + | s :: l -> (*print_endline (ENIAMwalStringOf.schema [s]);*) s :: (remove_adj_agr l) | |
495 | + | |
496 | +let rec get_role gf = function | |
497 | + [] -> raise Not_found | |
498 | + | s :: l -> if s.gf = gf then s.role,s.role_attr else get_role gf l | |
499 | + | |
500 | +let expand_negation = function | |
501 | + Negation -> [Negation] | |
502 | + | Aff -> [Aff] | |
503 | + | NegationUndef -> [Negation;Aff] | |
504 | + | |
505 | +let expand_aspect = function | |
506 | + Aspect s -> [Aspect s] | |
507 | + | AspectUndef -> [Aspect "imperf";Aspect "perf"] | |
508 | + | |
509 | +let aspect_sel = function | |
510 | + Aspect s -> [ENIAM_LCGlexiconTypes.Aspect,ENIAM_LCGlexiconTypes.Eq,[s]] | |
511 | + | AspectUndef -> [] | |
512 | + | |
513 | +open ENIAM_LCGlexiconTypes | |
514 | + | |
515 | +let transform_entry pos lemma negation pred aspect schema = | |
516 | + if pos = "subst" || pos = "depr" then ( | |
517 | + if negation <> NegationUndef || pred <> PredFalse || aspect <> AspectUndef then failwith ("transform_entry 1"); | |
518 | + [[],transform_schema "subst" lemma schema]) else | |
519 | + if pos = "adj" || pos = "adjc" || pos = "adjp" then ( | |
520 | + if negation <> NegationUndef || aspect <> AspectUndef then failwith ("transform_entry 2"); | |
521 | + let sel = match pred with PredTrue -> [Case,Eq,["pred"]] | _ -> [] in | |
522 | + [sel,transform_schema "adj" lemma schema]) else | |
523 | + if pos = "adv" || pos = "prep" || pos = "comprep" || pos = "comp" || pos = "compar" || pos = "qub" || pos = "siebie" then ( | |
524 | + if negation <> NegationUndef || (*pred <> PredFalse ||*) aspect <> AspectUndef then failwith ("transform_entry 3"); (* FIXME: typy przysłówków *) | |
525 | + [[],transform_schema pos lemma schema]) else | |
526 | + if pred <> PredFalse then failwith ("transform_entry 4") else | |
527 | + if pos = "num" || pos = "intnum" then ( | |
528 | + if negation <> NegationUndef || aspect <> AspectUndef then failwith ("transform_entry 5"); | |
529 | + Xlist.map ["congr";"rec"] (fun acm -> | |
530 | + [Acm,Eq,[acm]],transform_num_schema acm schema)) else | |
531 | + List.flatten (Xlist.map (expand_negation negation) (fun negation -> | |
532 | + let sel = [Negation,Eq,[ENIAMwalStringOf.negation negation]] @ aspect_sel aspect in | |
533 | + if pos = "fin" || pos = "bedzie" then | |
534 | + [sel @ [Mood,Eq,["indicative"]],transform_pers_schema lemma negation "indicative" schema; | |
535 | + sel @ [Mood,Eq,["imperative"]],transform_pers_schema lemma negation "imperative" schema] else | |
536 | + if pos = "praet" || pos = "winien" then | |
537 | + [sel @ [Mood,Eq,["indicative"]],transform_pers_schema lemma negation "indicative" schema; | |
538 | + sel @ [Mood,Eq,["conditional"]],transform_pers_schema lemma negation "conditional" schema] else | |
539 | + if pos = "impt" then | |
540 | + [sel @ [Mood,Eq,["imperative"]],transform_nosubj_schema lemma negation "imperative" schema] else | |
541 | + if pos = "imps" then | |
542 | + [sel @ [Mood,Eq,["indicative"]],transform_nosubj_schema lemma negation "indicative" schema] else | |
543 | + if pos = "pred" then | |
544 | + [sel @ [Mood,Eq,["indicative"]],transform_pers_schema lemma negation "indicative" schema] else | |
545 | + if pos = "pcon" || pos = "pant" || pos = "inf" || pos = "pact" then | |
546 | + (* let role,role_attr = try get_role SUBJ schema with Not_found -> "Initiator","" in *) | |
547 | + [sel, transform_nosubj_schema lemma negation "indicative" schema] else | |
548 | + if pos = "ppas" then | |
549 | + try | |
550 | + (* let role,role_attr = try get_role OBJ schema with Not_found -> "Theme","" in *) | |
551 | + [sel, transform_ppas_schema lemma negation "indicative" schema] | |
552 | + with Not_found -> [] else | |
553 | + if pos = "ger" then | |
554 | + [sel,transform_ger_schema lemma negation schema] else | |
555 | + failwith ("transform_entry: " ^ pos))) | |
556 | + | |
557 | +let transform_lex_entry pos lemma = function | |
558 | + SimpleLexEntry(lemma,pos) -> [[],SimpleLexEntry(lemma,pos)] | |
559 | + | LexEntry(id,lemma,pos,NoRestr,schema) -> | |
560 | + Xlist.map (transform_entry pos lemma NegationUndef PredFalse AspectUndef schema) (fun (sel,schema) -> | |
561 | + sel,LexEntry(id,lemma,pos,NoRestr,schema)) | |
562 | + | ComprepNPEntry(s,NoRestr,schema) -> | |
563 | + Xlist.map (transform_entry "comprep" s NegationUndef PredFalse AspectUndef schema) (fun (sel,schema) -> | |
564 | + sel,ComprepNPEntry(s,NoRestr,schema)) | |
565 | + | LexEntry(id,lemma,pos,_,[]) as entry -> [[],entry] (* FIXME *) | |
566 | + | entry -> print_endline ("transform_lex_entry:" ^ ENIAMwalStringOf.lex_entry entry); [[],entry] | |
567 | + | |
568 | +(*let reduce_frame_negation lexemes = function | |
569 | + Negation -> StringMap.mem lexemes "nie" | |
570 | + | _ -> true | |
571 | + | |
572 | +let reduce_frame_mood lexemes = function | |
573 | + "conditional" -> StringMap.mem lexemes "by" | |
574 | + | _ -> true | |
575 | + | |
576 | +let reduce_frame_aux lexemes = function | |
577 | + NoAux -> true | |
578 | + | PastAux -> (try let poss = StringMap.find lexemes "być" in StringSet.mem poss "praet" with Not_found -> false) | |
579 | + | FutAux -> (try let poss = StringMap.find lexemes "być" in StringSet.mem poss "bedzie" with Not_found -> false) | |
580 | + | ImpAux -> StringMap.mem lexemes "niech" || StringMap.mem lexemes "niechaj" || StringMap.mem lexemes "niechże" || StringMap.mem lexemes "niechajże" | |
581 | + | |
582 | +let reduce_frame_atrs pos lexemes = function | |
583 | + Frame(NounAtrs _,_) -> true | |
584 | + | Frame(AdjAtrs _,_) -> true | |
585 | + | Frame(EmptyAtrs _,_) -> true | |
586 | + | Frame(PersAtrs(_,_,negation,mood,_,aux,_),_) -> reduce_frame_negation lexemes negation && reduce_frame_mood lexemes mood && reduce_frame_aux lexemes aux | |
587 | + | Frame(NonPersAtrs(_,_,_,_,negation,_),_) -> if pos = "pact" || pos = "ppas" then true else reduce_frame_negation lexemes negation | |
588 | + | Frame(GerAtrs(_,_,negation,_),_) -> reduce_frame_negation lexemes negation | |
589 | + | Frame(_,_) as frame -> failwith ("reduce_frame_atrs: " ^ ENIAMwalStringOf.frame "" frame) | |
590 | + | LexFrame _ -> true | |
591 | + | ComprepFrame _ -> true | |
592 | + | |
593 | +let rec reduce_frame_atrs_list pos lexemes = function | |
594 | + [] -> [] | |
595 | + | frame :: l -> (if reduce_frame_atrs pos lexemes frame then [frame] else []) @ reduce_frame_atrs_list pos lexemes l | |
596 | +*) | |
597 | + | |
598 | +let _ = | |
599 | + let schemata = ENIAMwalReduce.merge_entries ENIAMwalParser.phrases ENIAMwalParser.schemata in | |
600 | + Entries.iter schemata (fun pos lemma (opinion,neg,pred,aspect,schema) -> | |
601 | + match pos with | |
602 | + "noun" -> ignore (transform_entry "subst" lemma neg pred aspect schema) | |
603 | + | "adj" -> ignore (transform_entry "adj" lemma neg pred aspect schema) | |
604 | + | "adv" -> ignore (transform_entry "adv" lemma neg pred aspect schema) | |
605 | + | "verb" -> | |
606 | + ignore (transform_entry "fin" lemma neg pred aspect schema); | |
607 | + ignore (transform_entry "praet" lemma neg pred aspect schema); | |
608 | + ignore (transform_entry "impt" lemma neg pred aspect schema); | |
609 | + ignore (transform_entry "imps" lemma neg pred aspect schema); | |
610 | + ignore (transform_entry "ger" lemma neg pred aspect schema); | |
611 | + ignore (transform_entry "pact" lemma neg pred aspect schema); | |
612 | + ignore (transform_entry "ppas" lemma neg pred aspect schema); | |
613 | + ignore (transform_entry "inf" lemma neg pred aspect schema); | |
614 | + ignore (transform_entry "pcon" lemma neg pred aspect schema); | |
615 | + ignore (transform_entry "pant" lemma neg pred aspect schema); | |
616 | + ignore (transform_entry "bedzie" lemma neg pred aspect schema); | |
617 | + ignore (transform_entry "winien" lemma neg pred aspect schema); | |
618 | + ignore (transform_entry "pred" lemma neg pred aspect schema); | |
619 | + () | |
620 | + | _ -> failwith "unknown pos"); | |
621 | + ignore (Entries.map ENIAMwalParser.entries transform_lex_entry); | |
622 | + () | |
... | ... |
lexSemantics/ENIAMwalFrames.ml
1 | 1 | (* |
2 | - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty". | |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
2 | + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. | |
3 | + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | 5 | * |
6 | 6 | * This library is free software: you can redistribute it and/or modify |
7 | 7 | * it under the terms of the GNU Lesser General Public License as published by |
... | ... | @@ -525,752 +525,6 @@ let remove_pro_args schema = (* FIXME: sprawdzić czy Pro i Null są zawsze na p |
525 | 525 | | s -> s :: schema)) |
526 | 526 | |
527 | 527 | |
528 | -let simplify_pos = function | |
529 | - "subst" -> "noun" | |
530 | - | "depr" -> "noun" | |
531 | - | "psubst" -> "noun" | |
532 | - | "pdepr" -> "noun" | |
533 | - | "adj" -> "adj" | |
534 | - | "adjc" -> "adj" | |
535 | - | "ger" -> "verb" | |
536 | - | "pact" -> "verb" | |
537 | - | "ppas" -> "verb" | |
538 | - | "fin" -> "verb" | |
539 | - | "bedzie" -> "verb" | |
540 | - | "praet" -> "verb" | |
541 | - | "winien" -> "verb" | |
542 | - | "impt" -> "verb" | |
543 | - | "imps" -> "verb" | |
544 | - | "inf" -> "verb" | |
545 | - | "pcon" -> "verb" | |
546 | - | "pant" -> "verb" | |
547 | - | "pred" -> "verb" | |
548 | - | "ppron12" -> "pron" | |
549 | - | "ppron3" -> "pron" | |
550 | - | "siebie" -> "pron" | |
551 | - | s -> s | |
552 | - | |
553 | -let transform_zeby = function | |
554 | - Aff -> [Comp "że"] | |
555 | - | Negation -> [Comp "że";Comp "żeby"] | |
556 | - | NegationUndef -> [Comp "że";Comp "żeby"] | |
557 | - | _ -> failwith "transform_zeby" | |
558 | - | |
559 | -let transform_gdy = function | |
560 | - "indicative" -> [Comp "gdy"] | |
561 | - | "imperative" -> [Comp "gdy"] | |
562 | - | "conditional" -> [Comp "gdyby"] | |
563 | - | "gerundial" -> [Comp "gdy"] | |
564 | - | "" -> [Comp "gdy";Comp "gdyby"] | |
565 | - | s -> failwith ("transform_gdy: " ^ s) | |
566 | - | |
567 | -let transform_comp negation mood = function | |
568 | - Comp comp -> [Comp comp] | |
569 | - | Zeby -> transform_zeby negation | |
570 | - | Gdy -> transform_gdy mood | |
571 | - | CompUndef -> [CompUndef](*failwith "transform_comp"*) | |
572 | - | |
573 | -let transform_str = function | |
574 | - Aff -> [Case "acc"] | |
575 | - | Negation -> [Case "gen"] | |
576 | - | NegationUndef -> [Case "acc";Case "gen"] | |
577 | - | _ -> failwith "transform_str" | |
578 | - | |
579 | -(* FIXME: wstawić wszędzie adj jako wariant PrepNP, ComprepNP i NP *) | |
580 | -let transform_np_phrase = function | |
581 | - NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)] | |
582 | - | NP(CaseAgr) -> [NP(CaseAgr)(*;NumP(CaseAgr)*)] | |
583 | - | AdjP(Case _) as morf -> [morf] | |
584 | - | AdjP(CaseAgr) -> [AdjP(AllAgr)] | |
585 | - | AdjP(AllAgr) -> [AdjP(AllAgr)] | |
586 | - | AdjP(Str) -> [AdjP(AllAgr)] | |
587 | - | PrepNP(sem,prep,Case case) -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)] | |
588 | -(* | PrepNumP(_,Case _) as morf -> [morf] *) | |
589 | - | ComprepNP _ as morf -> [morf] | |
590 | - | ComparNP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> ComparNP(sem,prep,Case case)) | |
591 | - | ComparPP _ as morf -> [morf] | |
592 | - | CP(ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> CP(ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji*) | |
593 | - | NCP(Case c,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> NCP(Case c,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji*) | |
594 | - | PrepNCP(sem,prep,Case case,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> PrepNCP(sem,prep,Case case,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji *) | |
595 | - | PrepAdjP(sem,_,Case _) as morf -> [morf] (* to wygląda seryjny błąd w Walentym xp(abl[prepadjp(z,gen)]) *) | |
596 | - | PrepNP(sem,prep,Str) -> List.flatten (Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)])) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *) | |
597 | - | PrepAdjP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> PrepAdjP(sem,prep,Case case)) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *) | |
598 | - | AdvP as morf -> [morf] (* FIXME: tu trafiają przysłówkowe realizacje, trzeba by je przetłumaczyć na przymiotniki *) | |
599 | - | FixedP _ as morf -> [morf] | |
600 | - | PrepP as morf -> [morf] | |
601 | - | Or as morf -> [morf] | |
602 | - | Pro as morf -> [morf] | |
603 | - | Null as morf -> [morf] | |
604 | - | phrase -> print_endline ("transform_np_phrase: " ^ ENIAMwalStringOf.phrase phrase); [phrase] | |
605 | - | |
606 | -let transform_np_pos = function | |
607 | - | SUBST(_,Case _) as morf -> [morf] | |
608 | - | SUBST(_,CaseAgr) as morf -> [morf] | |
609 | - | ADJ(_,Case _,_,_) as morf -> [morf] | |
610 | - | ADJ(n,CaseAgr,g,gr) -> [ADJ(n,AllAgr,g,gr)] | |
611 | - | PACT(n,CaseAgr,g,a,neg,r) -> [PACT(n,AllAgr,g,a,neg,r)] | |
612 | - | PPAS(_,Case _,_,_,_) as morf -> [morf] | |
613 | - | PPAS(n,CaseAgr,g,a,neg) -> [PPAS(n,AllAgr,g,a,neg)] | |
614 | - | ADJ(n,Str,g,gr) -> [ADJ(n,AllAgr,g,gr)] | |
615 | - | PPAS(n,Str,g,a,neg) -> [PPAS(n,AllAgr,g,a,neg)] | |
616 | - | PREP(Case _) as morf -> [morf] | |
617 | - | ADV _ as morf -> [morf] (* FIXME: tu trafiają przysłówkowe realizacje, trzeba by je przetłumaczyć na przymiotniki *) | |
618 | - | COMP _ as morf -> [morf] | |
619 | - | QUB as morf -> [morf] | |
620 | - | pos -> print_endline ("transform_np_pos: " ^ ENIAMwalStringOf.pos pos); [pos] | |
621 | - | |
622 | -let transform_adj_phrase = function | |
623 | - NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)] | |
624 | - | NP(Part) -> [NP(Case "gen");NP(Case "acc")(*;NumP(Case "gen");NumP(Case "acc")*)] | |
625 | - | AdjP(CaseAgr) -> [AdjP(AllAgr)] (* jedno wystąpienie 'cały szczęśliwy', może się przydać podniesienie typu *) | |
626 | - | PrepNP(sem,prep,Case case) -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)] | |
627 | - | ComprepNP _ as morf -> [morf] | |
628 | - | ComparNP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> ComparNP(sem,prep,Case case)) | |
629 | - | ComparPP _ as morf -> [morf] | |
630 | - | CP(ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> CP(ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji*) | |
631 | - | NCP(Case c,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> NCP(Case c,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji*) | |
632 | - | PrepNCP(sem,prep,Case case,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> PrepNCP(sem,prep,Case case,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji *) | |
633 | - | PrepAdjP(sem,_,Case _) as morf -> [morf] | |
634 | - | PrepNP(sem,prep,Str) -> List.flatten (Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)])) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *) | |
635 | - | PrepAdjP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> PrepAdjP(sem,prep,Case case)) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *) | |
636 | - | InfP _ as morf -> [morf] | |
637 | - | AdvP as morf -> [morf] | |
638 | - | FixedP _ as morf -> [morf] | |
639 | - | PrepP as morf -> [morf] | |
640 | - | Or as morf -> [morf] | |
641 | - | Pro as morf -> [morf] | |
642 | - | Null as morf -> [morf] | |
643 | - | morf -> print_endline ("transform_adj_phrase: " ^ ENIAMwalStringOf.phrase morf); [morf] | |
644 | - | |
645 | -let transform_adj_pos = function | |
646 | - | SUBST(_,Case _) as morf -> [morf] | |
647 | - | ADJ(n,CaseAgr,g,gr) -> [ADJ(n,AllAgr,g,gr)] | |
648 | - | PREP(Case _) as morf -> [morf] | |
649 | - | ADV _ as morf -> [morf] | |
650 | - | morf -> print_endline ("transform_adj_pos: " ^ ENIAMwalStringOf.pos morf); [morf] | |
651 | - | |
652 | -let transform_prep_pos = function | |
653 | - | SUBST(_,Case _) as morf -> [morf] | |
654 | - | SUBST(n,Str) -> [SUBST(n,CaseAgr)] | |
655 | - | NUM(Case _,_,_) as morf -> [morf] | |
656 | - | ADJ(_,Case _,_,_) as morf -> [morf] | |
657 | - | GER(_,Case _,_,_,_,_) as morf -> [morf] | |
658 | - | PPAS(_,Case _,_,_,_) as morf -> [morf] | |
659 | - | ADV _ as morf -> [morf] | |
660 | - | QUB as morf -> [morf] | |
661 | - | pos -> print_endline ("transform_prep_pos: " ^ ENIAMwalStringOf.pos pos); [pos] | |
662 | - | |
663 | -let transform_compar_phrase = function | |
664 | - NP(Str) -> [NP CaseUndef(*;NumP(CaseUndef)*)] (* FIXME: ta sama sytuacja co w "jako" *) | |
665 | - | FixedP _ as morf -> [morf] | |
666 | - | phrase -> print_endline ("transform_compar_phrase: " ^ ENIAMwalStringOf.phrase phrase); [phrase] | |
667 | - | |
668 | -let transform_compar_pos = function | |
669 | - | SUBST(_,Case _) as morf -> [morf] | |
670 | - | ADJ(_,Case _,_,_) as morf -> [morf] | |
671 | - | PREP(Case _) as morf -> [morf] | |
672 | - | PPAS(_,Case _,_,_,_) as morf -> [morf] | |
673 | - | SUBST(Number n,Str) -> [SUBST(Number n,CaseUndef)] | |
674 | - | SUBST(NumberAgr,Str) -> [SUBST(NumberUndef,CaseUndef)] | |
675 | - | SUBST(NumberUndef,Str) -> [SUBST(NumberUndef,CaseUndef)] | |
676 | - | PPAS(NumberAgr,Str,GenderAgr,a,neg) -> [PPAS(NumberUndef,CaseUndef,GenderUndef,a,neg)] (* FIXME: ta sama sytuacja co w "jako" *) | |
677 | - | PPAS(NumberAgr,CaseAgr,GenderAgr,a,neg) -> [PPAS(NumberUndef,CaseUndef,GenderUndef,a,neg)] (* FIXME: ta sama sytuacja co w "jako" *) | |
678 | - | ADJ(NumberAgr,Str,GenderAgr,gr) -> [ADJ(NumberUndef,CaseUndef,GenderUndef,gr)] (* FIXME: ta sama sytuacja co w "jako" *) | |
679 | - | ADJ(NumberAgr,CaseAgr,GenderAgr,gr) -> [ADJ(NumberUndef,CaseUndef,GenderUndef,gr)] (* FIXME: ta sama sytuacja co w "jako" *) | |
680 | - | NUM(Case _,_,_) as morf -> [morf] | |
681 | - | pos -> print_endline ("transform_compar_pos: " ^ ENIAMwalStringOf.pos pos); [pos] | |
682 | - | |
683 | -let transform_adv_phrase = function | |
684 | - NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)] | |
685 | - | PrepNP(sem,prep,Case case) -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)] | |
686 | - | PrepNCP(sem,prep,Case case,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> PrepNCP(sem,prep,Case case,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji *) | |
687 | - | ComprepNP _ as morf -> [morf] | |
688 | - | CP(ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> CP(ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji*) | |
689 | - | InfP _ as morf -> [morf] | |
690 | - | AdvP as morf -> [morf] | |
691 | - | Or as morf -> [morf] | |
692 | - | Pro as morf -> [morf] | |
693 | - | Null as morf -> [morf] | |
694 | - | PrepAdjP(sem,_,Case _) as morf -> [morf] | |
695 | - | PrepNP(sem,prep,Str) -> List.flatten (Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)])) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *) | |
696 | - | PrepAdjP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> PrepAdjP(sem,prep,Case case)) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *) | |
697 | - | ComparNP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> ComparNP(sem,prep,Case case)) | |
698 | - | ComparPP _ as morf -> [morf] | |
699 | -(* | AdjP(CaseAgr) as morf -> [morf] *) | |
700 | -(* | NCP(Case c,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> NCP(Case c,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji*) | |
701 | - | PrepNCP(prep,Case case,ctype,comp) -> Xlist.map (transform_comp NegationUndef "" comp) (fun comp -> PrepNCP(prep,Case case,ctype,comp)) (* FIXME zależność od trybu warunkowego*) (* FIXME zależność od negacji *) | |
702 | - | FixedP _ as morf -> [morf]*) | |
703 | - | morf -> print_endline ("transform_adv_phrase: " ^ ENIAMwalStringOf.phrase morf); [morf] | |
704 | - | |
705 | -let transform_adv_pos = function | |
706 | -(* | SUBST(_,Case _) as morf -> [morf] | |
707 | - | ADJ(_,CaseAgr,_,_) as morf -> [morf]*) | |
708 | - COMP _ as morf -> [morf] | |
709 | - | PREP(Case _) as morf -> [morf] | |
710 | - | ADV _ as morf -> [morf] | |
711 | - | morf -> print_endline ("transform_adv_pos: " ^ ENIAMwalStringOf.pos morf); [morf] | |
712 | - | |
713 | -(*| Prepnp("jako",Str) as morf -> morf | |
714 | - | Prepnp("jak",Str) as morf -> morf | |
715 | - | Prepnp("niczym",Str) as morf -> morf | |
716 | - | Prepadjp("jako",Str) as morf -> morf | |
717 | - | Prepadjp("jak",Str) as morf -> morf | |
718 | - | Prepadjp("niczym",Str) as morf -> morf | |
719 | - | Compar "jako" as morf -> morf | |
720 | - | Compar "jak" as morf -> morf | |
721 | - | Compar "niczym" as morf -> morf | |
722 | - | Compar "niż" as morf -> morf*) | |
723 | - | |
724 | -let transform_pers_subj_phrase negation mood = function (* FIXME: prepnp(na,loc) *) | |
725 | - | NP(Str) -> [NP(NomAgr)(*;NumP(NomAgr)*)] | |
726 | - | NCP(Str,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> NCP(NomAgr,ctype,comp)) | |
727 | - | CP(ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> CP(ctype,comp)) | |
728 | - | InfP _ as morf -> [morf] | |
729 | - | Or as morf -> [morf] | |
730 | - | NP(Part) -> [NP(Case "gen")(*;NP(Case "acc")*)(*;NumP(Case "gen");NumP(Case "acc")*)] | |
731 | - | Pro -> [ProNG] | |
732 | - | morf -> print_endline ("transform_pers_subj_phrase: " ^ ENIAMwalStringOf.phrase morf); [morf] | |
733 | - | |
734 | -let transform_pers_subj_pos negation mood = function | |
735 | - COMP _ as morf -> [morf] | |
736 | - | SUBST(n,Str) -> [SUBST(n,NomAgr)] | |
737 | - | SUBST(n,Case "nom") -> [SUBST(n,NomAgr)] (* wygląda na błąd Walentego, ale nie ma znaczenia *) | |
738 | - | NUM(Str,g,AcmUndef) -> [NUM(NomAgr,g,AcmUndef)] | |
739 | - | ADJ(n,Str,g,gr) -> [ADJ(n,NomAgr,g,gr)] | |
740 | - | morf -> print_endline ("transform_ger_subj_pos: " ^ ENIAMwalStringOf.pos morf); [morf] | |
741 | - | |
742 | -let transform_ger_subj_phrase negation mood control = function | |
743 | - | NP(Str) -> [NP(Case "gen");PrepNP(NoSem,"przez",Case "acc")(*;NumP(Case "gen")*)(*;PrepNumP("przez",Case "acc")*)] (* FIXME: czy przez:acc jest możliwe? *) | |
744 | - | NCP(Str,ctype,comp) -> List.flatten (Xlist.map (transform_comp negation mood comp) (fun comp -> [NCP(Case "gen",ctype,comp);PrepNCP(NoSem,"przez",Case "acc",ctype,comp)])) (* FIXME: czy przez:acc jest możliwe? *) | |
745 | - | CP(ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> CP(ctype,comp)) (* FIXME: czy to jest możliwe? *) | |
746 | - | InfP _ as morf -> [morf] (* FIXME: czy to jest możliwe? *) | |
747 | - | Or as morf -> [morf] | |
748 | - | NP(Part) -> [NP(Case "gen")(*;NP(Case "acc")*)(*;NumP(Case "gen");NumP(Case "acc")*)] | |
749 | - | Pro -> if control then [Pro] else [Null] | |
750 | - | morf -> print_endline ("transform_ger_subj_phrase: " ^ ENIAMwalStringOf.phrase morf); [morf] | |
751 | - | |
752 | -let transform_ger_subj_pos negation mood = function (* FIXME: ADV(_) *) | |
753 | - COMP _ as morf -> [morf] (* FIXME: czy to jest możliwe? *) | |
754 | - | SUBST(n,Str) -> [SUBST(n,Case "gen")] | |
755 | - | SUBST(n,Case "nom") -> [SUBST(n,Case "gen")] (* wygląda na błąd Walentego, ale nie ma znaczenia *) | |
756 | - | NUM(Str,g,AcmUndef) -> [NUM(Case "gen",g,AcmUndef)] | |
757 | - | ADJ(n,Str,g,gr) -> [ADJ(n,Case "gen",g,gr)] | |
758 | - | morf -> print_endline ("transform_pers_subj_pos: " ^ ENIAMwalStringOf.pos morf); [morf] | |
759 | - | |
760 | -let transform_ppas_subj_phrase negation mood control = function | |
761 | - | NP(Str) -> [PrepNP(NoSem,"przez",Case "acc")(*;PrepNumP("przez",Case "acc")*)] | |
762 | - | NCP(Str,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> PrepNCP(NoSem,"przez",Case "acc",ctype,comp)) | |
763 | - | CP(ctype,comp) -> [Null] (* zakładam, że w ramie jest też NCP *) | |
764 | - | Pro -> if control then [Pro] else [Null] | |
765 | - | morf -> print_endline ("transform_ppas_subj_phrase: " ^ ENIAMwalStringOf.phrase morf); [morf] | |
766 | - | |
767 | -let transform_pers_phrase negation mood = function | |
768 | - | NP(Str) -> List.flatten (Xlist.map (transform_str negation) (fun case -> [NP case(*;NumP(case)*)])) | |
769 | - | AdjP(Str) -> Xlist.map (transform_str negation) (fun case -> AdjP case) (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *) | |
770 | - | NCP(Str,ctype,comp) -> List.flatten (Xlist.map (transform_str negation) (fun case -> Xlist.map (transform_comp negation mood comp) (fun comp -> NCP(case,ctype,comp)))) | |
771 | - | NP(Part) -> [NP(Case "gen");NP(Case "acc")(*;NumP(Case "gen");NumP(Case "acc")*)] | |
772 | - | NCP(Part,ctype,comp) -> List.flatten (Xlist.map (transform_comp negation mood comp) (fun comp -> [NCP(Case "gen",ctype,comp);NCP(Case "acc",ctype,comp)])) | |
773 | - | NP(Case case) -> [NP(Case case)(*;NumP(Case case)*)] | |
774 | - | PrepNP(sem,prep,Case case) -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)] | |
775 | -(* | PrepNumP(_,Case _) as morf -> [morf] *) | |
776 | - | ComprepNP _ as morf -> [morf] | |
777 | - | NCP(Case case,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> NCP(Case case,ctype,comp)) | |
778 | - | PrepNCP(sem,prep,Case case,ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> PrepNCP(sem,prep,Case case,ctype,comp)) | |
779 | - | AdjP(Case _) as morf -> [morf] (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *) | |
780 | - | PrepAdjP(sem,_,Case _) as morf -> [morf] (* FIXME: pomijam uzgadnianie liczby i rodzaju - wykonalne za pomocą kontroli *) | |
781 | - | PrepNP(sem,prep,Str) -> List.flatten (Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> [PrepNP(sem,prep,Case case)(*;PrepNumP(prep,Case case)*)])) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *) | |
782 | - | PrepAdjP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> PrepAdjP(sem,prep,Case case)) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *) | |
783 | - | ComparNP(sem,prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> ComparNP(sem,prep,Case case)) | |
784 | - | ComparPP _ as morf -> [morf] | |
785 | - | CP(ctype,comp) -> Xlist.map (transform_comp negation mood comp) (fun comp -> CP(ctype,comp)) | |
786 | - | InfP _ as morf -> [morf] | |
787 | - | PadvP as morf -> [morf] | |
788 | - | AdvP -> if mood = "gerundial" then [AdjP AllAgr] else [AdvP] | |
789 | - | FixedP _ as morf -> [morf] | |
790 | - | PrepP as morf -> [morf] | |
791 | - | Or as morf -> [morf] | |
792 | - | Lex "się" as morf -> [morf] | |
793 | -(* | Refl as morf -> [morf] *) | |
794 | -(* | Recip as morf -> [morf] *) | |
795 | - | Pro as morf -> [morf] | |
796 | - | Null as morf -> [morf] | |
797 | - | morf -> print_endline ("transform_pers_phrase: " ^ ENIAMwalStringOf.phrase morf); [morf] | |
798 | - | |
799 | -let transform_pers_pos negation mood = function | |
800 | - | SUBST(n,Str) -> Xlist.map (transform_str negation) (fun case -> SUBST(n,case)) | |
801 | - | NUM(Str,g,a) -> Xlist.map (transform_str negation) (fun case -> NUM(case,g,a)) | |
802 | - | ADJ(n,Str,g,gr) -> Xlist.map (transform_str negation) (fun case -> ADJ(n,case,g,gr)) | |
803 | - | PPAS(n,Str,g,a,neg) -> Xlist.map (transform_str negation) (fun case -> PPAS(n,Str,g,a,neg)) | |
804 | - | SUBST(n,Part) -> [SUBST(n,Case "gen");SUBST(n,Case "acc")] | |
805 | - | SUBST(_,Case _) as morf -> [morf] | |
806 | - | NUM(Case _,_,_) as morf -> [morf] | |
807 | - | PREP(Case _) as morf -> [morf] | |
808 | - | ADJ(_,Case _,_,_) as morf -> [morf] | |
809 | - | PREP(Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> PREP(Case case)) (* FIXME: sprawdzić kto kontroluje! *) (* FIXME: pomijam uzgodnienie liczby i rodzaju *) (* zakładam, że nie jest kontrolowany przez SUBJ w czasowikach z OBJ *) | |
810 | - | SUBST(n,CaseAgr) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> SUBST(n,Case case)) (* FIXME: sprawdzić kto kontroluje! *) | |
811 | - | ADJ(n,CaseAgr,g,gr) -> Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> ADJ(n,Case case,g,gr)) (* FIXME: sprawdzić kto kontroluje! *) | |
812 | - | COMPAR as morf -> [morf] | |
813 | - | COMP _ as morf -> [morf] | |
814 | - | INF _ as morf -> [morf] | |
815 | - | ADV grad -> if mood = "gerundial" then [ADJ(NumberAgr,AllAgr,GenderAgr,grad)] else [ADV grad] | |
816 | - | morf -> print_endline ("transform_pers_pos: " ^ ENIAMwalStringOf.pos morf); [morf] | |
817 | - | |
818 | -let transform_pers_schema negation mood schema = | |
819 | - Xlist.map schema (fun s -> | |
820 | - {s with morfs = | |
821 | - if s.gf = SUBJ then List.flatten (Xlist.map s.morfs (function | |
822 | - Phrase phrase -> Xlist.map (transform_pers_subj_phrase negation mood phrase) (fun phrase -> Phrase phrase) | |
823 | - | E phrase -> Xlist.map (transform_pers_subj_phrase negation mood phrase) (fun phrase -> E phrase) | |
824 | - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_subj_pos negation mood pos) (fun pos -> LexArg(id,pos,lex)) | |
825 | - | _ -> failwith "transform_fin_schema")) | |
826 | - else List.flatten (Xlist.map s.morfs (function | |
827 | - Phrase phrase -> Xlist.map (transform_pers_phrase negation mood phrase) (fun phrase -> Phrase phrase) | |
828 | - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *) | |
829 | - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation mood pos) (fun pos -> LexArg(id,pos,lex)) | |
830 | - | _ -> failwith "transform_fin_schema"))}) | |
831 | - | |
832 | -let transform_impt_schema negation mood schema = | |
833 | - Xlist.map schema (fun s -> | |
834 | - {s with morfs = | |
835 | - if s.gf = SUBJ then [Phrase ProNG] | |
836 | - else List.flatten (Xlist.map s.morfs (function | |
837 | - Phrase phrase -> Xlist.map (transform_pers_phrase negation mood phrase) (fun phrase -> Phrase phrase) | |
838 | - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *) | |
839 | - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation mood pos) (fun pos -> LexArg(id,pos,lex)) | |
840 | - | _ -> failwith "transform_impt_schema"))}) | |
841 | - | |
842 | -let transform_imps_schema negation mood schema = | |
843 | - Xlist.map schema (fun s -> | |
844 | - {s with morfs = | |
845 | - if s.gf = SUBJ then [Phrase Pro] | |
846 | - else List.flatten (Xlist.map s.morfs (function | |
847 | - Phrase phrase -> Xlist.map (transform_pers_phrase negation mood phrase) (fun phrase -> Phrase phrase) | |
848 | - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *) | |
849 | - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation mood pos) (fun pos -> LexArg(id,pos,lex)) | |
850 | - | _ -> failwith "transform_imps_chema"))}) | |
851 | - | |
852 | -let transform_ger_schema negation schema = (* FIXME: zakładam, że ger zeruje mood, czy to prawda? *) | |
853 | - Xlist.map schema (fun s -> | |
854 | - {s with morfs = | |
855 | - if s.gf = SUBJ then List.flatten (Xlist.map s.morfs (function | |
856 | - Phrase phrase -> Xlist.map (transform_ger_subj_phrase negation "gerundial" (s.cr <> [] || s.ce <> []) phrase) (fun phrase -> Phrase phrase) | |
857 | - | E phrase -> Xlist.map (transform_ger_subj_phrase negation "gerundial" (s.cr <> [] || s.ce <> []) phrase) (fun phrase -> E phrase) | |
858 | - | LexArg(id,pos,lex) -> Xlist.map (transform_ger_subj_pos negation "gerundial" pos) (fun pos -> LexArg(id,pos,lex)) | |
859 | - | _ -> failwith "transform_fin_schema")) | |
860 | - else List.flatten (Xlist.map s.morfs (function | |
861 | - Phrase phrase -> Xlist.map (transform_pers_phrase negation "gerundial" phrase) (fun phrase -> Phrase phrase) | |
862 | - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *) | |
863 | - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation "gerundial" pos) (fun pos -> LexArg(id,pos,lex)) | |
864 | - | _ -> failwith "transform_fin_schema"))}) | |
865 | - | |
866 | -let transform_padv_schema negation mood pro schema = | |
867 | - Xlist.map schema (fun s -> | |
868 | - {s with morfs = | |
869 | - if s.gf = SUBJ then if s.ce = [] then if pro then [Phrase Pro] else [Phrase Null] else [Phrase Null] else | |
870 | - List.flatten (Xlist.map s.morfs (function | |
871 | - Phrase phrase -> Xlist.map (transform_pers_phrase negation mood phrase) (fun phrase -> Phrase phrase) | |
872 | - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *) | |
873 | - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation mood pos) (fun pos -> LexArg(id,pos,lex)) | |
874 | - | _ -> failwith "transform_fin_schema"))}) | |
875 | - | |
876 | -let transform_pact_schema negation mood schema = | |
877 | - Xlist.map schema (fun s -> | |
878 | - {s with morfs = | |
879 | - if s.gf = SUBJ then [Phrase Null] | |
880 | - else List.flatten (Xlist.map s.morfs (function | |
881 | - Phrase phrase -> Xlist.map (transform_pers_phrase negation mood phrase) (fun phrase -> Phrase phrase) | |
882 | - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *) | |
883 | - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation mood pos) (fun pos -> LexArg(id,pos,lex)) | |
884 | - | _ -> failwith "transform_pact_schema"))}) | |
885 | - | |
886 | -let transform_ppas_schema negation mood schema = | |
887 | - Xlist.map schema (fun s -> | |
888 | - {s with morfs = | |
889 | - if s.gf = OBJ then [Phrase Null] else | |
890 | - if s.gf = SUBJ then List.flatten (Xlist.map s.morfs (function | |
891 | - Phrase phrase -> Xlist.map (transform_ppas_subj_phrase negation mood (s.cr <> [] || s.ce <> []) phrase) (fun phrase -> Phrase phrase) | |
892 | - | E phrase -> Xlist.map (transform_ppas_subj_phrase negation mood (s.cr <> [] || s.ce <> []) phrase) (fun phrase -> E phrase) | |
893 | - | LexArg(id,SUBST(n,Str),lex) -> raise Not_found (* FIXME!!! *) | |
894 | - | _ -> failwith "transform_ppas_schema")) | |
895 | - else List.flatten (Xlist.map s.morfs (function | |
896 | - Phrase phrase -> Xlist.map (transform_pers_phrase negation mood phrase) (fun phrase -> Phrase phrase) | |
897 | - | E phrase -> [Phrase Null] (*E(List.flatten (Xlist.map phrases (transform_pers_phrase negation mood)))*) (* FIXME *) | |
898 | - | LexArg(id,pos,lex) -> Xlist.map (transform_pers_pos negation mood pos) (fun pos -> LexArg(id,pos,lex)) | |
899 | - | _ -> failwith "transform_ppas_schema"))}) | |
900 | - | |
901 | -let add_padv schema = | |
902 | - List.flatten (Xlist.map schema (fun s -> | |
903 | - if s.gf = SUBJ then | |
904 | - match s.cr with | |
905 | - [] -> [{s with cr=["3"]}; let s = adjunct_schema_field "" Both [Phrase Null;Phrase PadvP] in {s with ce=["3"]}] | |
906 | - | [cr] -> [s; let s = adjunct_schema_field "" Both [Phrase Null;Phrase PadvP] in {s with ce=[cr]}] | |
907 | - | _ -> failwith "add_padv" | |
908 | - else [s])) | |
909 | - | |
910 | -let transform_np_schema schema = | |
911 | - Xlist.map schema (fun s -> | |
912 | - {s with morfs=List.flatten (Xlist.map s.morfs (function | |
913 | - Phrase phrase -> Xlist.map (transform_np_phrase phrase) (fun phrase -> Phrase phrase) | |
914 | -(* | LexArg(id,ADV _,lex) as morf -> print_endline (ENIAMwalStringOf.morf morf); [morf] *) | |
915 | - | LexArg(id,pos,lex) -> Xlist.map (transform_np_pos pos) (fun pos -> LexArg(id,pos,lex)) | |
916 | - | Multi[AdjP AllAgr] -> [Multi[AdjP AllAgr]] | |
917 | - | _ -> failwith "transform_np_schema"))}) | |
918 | - | |
919 | -let transform_num_schema acm schema = | |
920 | - Xlist.map schema (fun s -> | |
921 | - {s with morfs=List.flatten (Xlist.map s.morfs (function (* kierunek argumentu został dodany w expand_lexicalizations_morfs *) | |
922 | - | Phrase Pro -> [Phrase Pro] | |
923 | - | LexArg(id,SUBST(NumberUndef,CaseUndef),lex) -> | |
924 | - (match acm with | |
925 | - Acm "rec" -> [LexArg(id,SUBST(NumberUndef,GenAgr),lex)] | |
926 | - | Acm "congr" -> [LexArg(id,SUBST(NumberUndef,AllAgr),lex)] | |
927 | - | _ -> failwith "transform_num_schema") | |
928 | - | morf -> failwith ("transform_num_schema: " ^ ENIAMwalStringOf.morf morf)))}) | |
929 | - | |
930 | -let transform_adj_schema schema = | |
931 | - Xlist.map schema (fun s -> | |
932 | - {s with morfs=List.flatten (Xlist.map s.morfs (function | |
933 | - Phrase phrase -> Xlist.map (transform_adj_phrase phrase) (fun phrase -> Phrase phrase) | |
934 | - | LexArg(id,pos,lex) -> Xlist.map (transform_adj_pos pos) (fun pos -> LexArg(id,pos,lex)) | |
935 | - | _ -> failwith "transform_adj_schema"))}) | |
936 | - | |
937 | -let transform_adv_schema schema = | |
938 | - Xlist.map schema (fun s -> | |
939 | - {s with morfs=List.flatten (Xlist.map s.morfs (function | |
940 | - Phrase phrase -> Xlist.map (transform_adv_phrase phrase) (fun phrase -> Phrase phrase) | |
941 | - | LexArg(id,pos,lex) -> Xlist.map (transform_adv_pos pos) (fun pos -> LexArg(id,pos,lex)) | |
942 | - | _ -> failwith "transform_adv_schema"))}) | |
943 | - | |
944 | -let transform_prep_schema schema = | |
945 | - Xlist.map schema (fun s -> | |
946 | - {s with morfs=List.flatten (Xlist.map s.morfs (function | |
947 | - Phrase(NumP(case)) -> [Phrase(NumP(case))] | |
948 | - | LexArg(id,pos,lex) -> Xlist.map (transform_prep_pos pos) (fun pos -> LexArg(id,pos,lex)) | |
949 | - | morf -> failwith ("transform_prep_schema: " ^ ENIAMwalStringOf.morf morf)))}) | |
950 | - | |
951 | -let transform_compar_schema schema = | |
952 | - Xlist.map schema (fun s -> | |
953 | - {s with morfs=List.flatten (Xlist.map s.morfs (function | |
954 | - Phrase phrase -> Xlist.map (transform_compar_phrase phrase) (fun phrase -> Phrase phrase) | |
955 | - | LexArg(id,pos,lex) -> Xlist.map (transform_compar_pos pos) (fun pos -> LexArg(id,pos,lex)) | |
956 | - | morf -> failwith ("transform_compar_schema: " ^ ENIAMwalStringOf.morf morf)))}) | |
957 | - | |
958 | -let transform_comp_schema schema = (* kierunek argumentu został dodany w expand_lexicalizations_morfs *) | |
959 | - Xlist.map schema (fun s -> | |
960 | - {s with morfs=List.flatten (Xlist.map s.morfs (function | |
961 | - | LexArg(_,PERS _,_) as morf -> [morf] | |
962 | - | morf -> failwith ("transform_comp_schema: " ^ ENIAMwalStringOf.morf morf)))}) | |
963 | - | |
964 | -let transform_qub_schema schema = | |
965 | - Xlist.map schema (fun s -> | |
966 | - {s with morfs=List.flatten (Xlist.map s.morfs (function | |
967 | - | LexArg(_,PERS _,_) as morf -> [morf] | |
968 | - | morf -> failwith ("transform_qub_schema: " ^ ENIAMwalStringOf.morf morf)))}) | |
969 | - | |
970 | -let rec remove_adj_agr = function | |
971 | - [] -> [] | |
972 | - | {morfs=[Phrase Null;Phrase(AdjP(CaseAgr))]} :: l -> remove_adj_agr l | |
973 | - | {morfs=[Phrase Null;Phrase(AdjP(Part))]} :: l -> remove_adj_agr l | |
974 | - | s :: l -> (*print_endline (ENIAMwalStringOf.schema [s]);*) s :: (remove_adj_agr l) | |
975 | - | |
976 | -let rec get_role gf = function | |
977 | - [] -> raise Not_found | |
978 | - | s :: l -> if s.gf = gf then s.role,s.role_attr else get_role gf l | |
979 | - | |
980 | -let expand_negation = function | |
981 | - Negation -> [Negation] | |
982 | - | Aff -> [Aff] | |
983 | - | NegationUndef -> [Negation;Aff] | |
984 | - | NegationNA -> failwith "expand_negation" | |
985 | - | |
986 | -let expand_aspect = function | |
987 | - Aspect s -> [Aspect s] | |
988 | - | AspectUndef -> [Aspect "imperf";Aspect "perf"] | |
989 | - | AspectNA -> failwith "expand_aspect" | |
990 | - | |
991 | -let load_list filename = | |
992 | - Str.split (Str.regexp "\n") (File.load_file filename) | |
993 | - | |
994 | -let subst_uncountable_lexemes = StringSet.of_list (load_list subst_uncountable_lexemes_filename) | |
995 | -let subst_uncountable_lexemes2 = StringSet.of_list (load_list subst_uncountable_lexemes_filename2) | |
996 | -let subst_container_lexemes = StringSet.of_list (load_list subst_container_lexemes_filename) | |
997 | -let subst_numeral_lexemes = StringSet.of_list (load_list subst_numeral_lexemes_filename) | |
998 | -let subst_time_lexemes = StringSet.of_list (load_list subst_time_lexemes_filename) | |
999 | - | |
1000 | -let subst_pronoun_lexemes = StringSet.of_list ["co"; "kto"; "cokolwiek"; "ktokolwiek"; "nic"; "nikt"; "coś"; "ktoś"; "to"] | |
1001 | -let adj_pronoun_lexemes = StringSet.of_list ["czyj"; "jaki"; "który"; "jakiś"; "ten"; "taki"] | |
1002 | - | |
1003 | -(* let adj_quant_lexemes = StringSet.of_list ["każdy"; "wszelki"; "wszystek"; "żaden"; "jakiś"; "pewien"; "niektóry"; "jedyny"; "sam"] *) | |
1004 | - | |
1005 | -let empty_valence_lexemes = StringSet.union subst_pronoun_lexemes adj_pronoun_lexemes | |
1006 | - | |
1007 | - | |
1008 | -let noun_type lemma pos = | |
1009 | - let nsyn = | |
1010 | - if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then "pronoun" else | |
1011 | - if pos = "psubst" || pos = "pdepr" || pos = "date" then "proper" else | |
1012 | - if StringSet.mem subst_pronoun_lexemes lemma then "pronoun" else | |
1013 | - "common" in | |
1014 | - let nsem = | |
1015 | - if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then [Common "count"] else | |
1016 | - if StringSet.mem subst_time_lexemes lemma then [Time] else | |
1017 | - let l = ["count"] in | |
1018 | - let l = if StringSet.mem subst_uncountable_lexemes lemma || StringSet.mem subst_uncountable_lexemes2 lemma then "mass" :: l else l in | |
1019 | - let l = if StringSet.mem subst_container_lexemes lemma then "measure" :: l else l in | |
1020 | - Xlist.map l (fun s -> Common s) in | |
1021 | - nsyn,nsem | |
1022 | - | |
1023 | -let adj_type lemma = (* FIXME: typy przymiotników wymagają zbadania - przejrzenia listy przymiotników *) | |
1024 | - let adjsyn = if StringSet.mem adj_pronoun_lexemes lemma then "pronoun" else "common" in (* FIXME: dodać heurystykę uwzględniającą wielkość liter aby wykrywać proper np. Oświęcimski*) | |
1025 | - adjsyn | |
1026 | - | |
1027 | -let transform_frame lexeme pos = function (* FIXME: dodać tutaj typy rzeczowników *) | |
1028 | - Frame(DefaultAtrs(meanings,refl,opinion,negation,pred,aspect),schema) as frame -> | |
1029 | - if pos = "subst" || pos = "depr" || pos = "psubst" || pos = "pdepr" || pos = "ppron12" || pos = "ppron3" || pos = "siebie" then ( | |
1030 | - if refl <> ReflEmpty || negation <> NegationNA || pred <> PredNA || aspect <> AspectNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); | |
1031 | - let nsyn,nsem(*,typ*) = noun_type lexeme pos in | |
1032 | - let schema = if nsyn = "pronoun" then [] else (remove_adj_agr schema) @ noun_adjuncts in (* FIXME: remove_adj_agr jest w słowniku tymczasowo *) | |
1033 | -(* List.flatten (Xlist.map typ (fun typ -> *) | |
1034 | - Xlist.map nsem (fun nsem -> Frame(NounAtrs(meanings,nsyn,nsem(*,typ*)),transform_np_schema schema)))(* ))*) else | |
1035 | - if pos = "symbol" || pos = "date" || pos = "date-interval" || pos = "hour" || pos = "hour-minute" || pos = "hour-interval" || pos = "hour-minute-interval" || | |
1036 | - pos = "year" || pos = "year-interval" || pos = "day" || pos = "day-interval" || pos = "day-month" || pos = "day-month-interval" || | |
1037 | - pos = "match-result" || pos = "month-interval" || pos = "roman" || pos = "roman-interval" || pos = "url" || pos = "email" || pos = "obj-id" then | |
1038 | - let nsyn,nsem = "proper",[Common "count"] in | |
1039 | - Xlist.map nsem (fun nsem -> Frame(NounAtrs(meanings,nsyn,nsem),transform_np_schema schema)) else | |
1040 | - if pos = "adj" || pos = "adjc" || pos = "adjp" || pos = "ordnum" then ( | |
1041 | - if refl <> ReflEmpty || negation <> NegationNA || aspect <> AspectNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); | |
1042 | - let adjsyn(*,adjsem,typ*) = adj_type lexeme in | |
1043 | - let schema = if pos = "adjp" || pos = "ordnum" then schema else if adjsyn = "pronoun" then [] else schema @ adj_adjuncts in | |
1044 | - let case = match pred with Pred -> Case "pred" | PredNA -> CaseUndef in | |
1045 | -(* Xlist.map typ (fun typ -> *) | |
1046 | - [Frame(AdjAtrs(meanings,case,adjsyn(*,adjsem,typ*)),transform_adj_schema schema)])(* )*) else | |
1047 | - if pos = "adv" then ( | |
1048 | - if refl <> ReflEmpty || negation <> NegationNA || pred <> PredNA || aspect <> AspectNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); (* FIXME: typy przysłówków *) | |
1049 | - [Frame(EmptyAtrs meanings,transform_adv_schema (remove_adj_agr schema))]) else | |
1050 | - if pos = "fin" then ( | |
1051 | - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); | |
1052 | - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in | |
1053 | - let schema = (add_padv schema) @ verb_adjuncts in | |
1054 | - List.flatten (Xlist.map (expand_negation negation) (fun negation -> | |
1055 | - Xlist.map (expand_aspect aspect) (function | |
1056 | - Aspect "imperf" -> Frame(PersAtrs(meanings,s,negation,"indicative","pres",NoAux,Aspect "imperf"), transform_pers_schema negation "indicative" schema) | |
1057 | - | Aspect "perf" -> Frame(PersAtrs(meanings,s,negation,"indicative","fut",NoAux,Aspect "perf"), transform_pers_schema negation "indicative" schema) | |
1058 | - | _ -> failwith "transform_frame") @ | |
1059 | - [Frame(PersAtrs(meanings,s,negation,"imperative","fut",ImpAux,aspect), transform_pers_schema negation "imperative" schema)]))) else | |
1060 | - if pos = "bedzie" then ( | |
1061 | - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); | |
1062 | - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in | |
1063 | - let schema = (add_padv schema) @ verb_adjuncts in | |
1064 | - List.flatten (Xlist.map (expand_negation negation) (fun negation -> | |
1065 | - Xlist.map (expand_aspect aspect) (function | |
1066 | - Aspect "imperf" -> Frame(PersAtrs(meanings,s,negation,"indicative","fut",NoAux,Aspect "imperf"), transform_pers_schema negation "indicative" schema) | |
1067 | - | Aspect "perf" -> Frame(PersAtrs(meanings,s,negation,"indicative","fut",NoAux,Aspect "perf"), transform_pers_schema negation "indicative" schema) (* FIXME: niepotrzebne *) | |
1068 | - | _ -> failwith "transform_frame")))) else | |
1069 | - if pos = "praet" then ( | |
1070 | - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); | |
1071 | - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in | |
1072 | - let schema = (add_padv schema) @ verb_adjuncts in | |
1073 | - List.flatten (Xlist.map (expand_negation negation) (fun negation -> | |
1074 | - List.flatten (Xlist.map (expand_aspect aspect) (function | |
1075 | - Aspect "imperf" -> | |
1076 | - [Frame(PersAtrs(meanings,s,negation,"indicative","past",NoAux,Aspect "imperf"), transform_pers_schema negation "indicative" schema); | |
1077 | - Frame(PersAtrs(meanings,s,negation,"conditional","past",NoAux,Aspect "imperf"), transform_pers_schema negation "conditional" schema); | |
1078 | - Frame(PersAtrs(meanings,s,negation,"indicative","fut",FutAux,Aspect "imperf"), transform_pers_schema negation "indicative" schema)] | |
1079 | - | Aspect "perf" -> | |
1080 | - [Frame(PersAtrs(meanings,s,negation,"indicative","past",NoAux,Aspect "perf"), transform_pers_schema negation "indicative" schema); | |
1081 | - Frame(PersAtrs(meanings,s,negation,"conditional","past",NoAux,Aspect "perf"), transform_pers_schema negation "conditional" schema)] | |
1082 | - | _ -> failwith "transform_frame"))))) else | |
1083 | - if pos = "winien" then ( | |
1084 | - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); | |
1085 | - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in | |
1086 | - let schema = (add_padv schema) @ verb_adjuncts in | |
1087 | - List.flatten (Xlist.map (expand_negation negation) (fun negation -> | |
1088 | - List.flatten (Xlist.map (expand_aspect aspect) (fun aspect -> | |
1089 | - [Frame(PersAtrs(meanings,s,negation,"indicative","pres",NoAux,aspect), transform_pers_schema negation "indicative" schema); | |
1090 | - Frame(PersAtrs(meanings,s,negation,"conditional","past",NoAux,aspect), transform_pers_schema negation "conditional" schema); | |
1091 | - Frame(PersAtrs(meanings,s,negation,"indicative","past",PastAux,aspect), transform_pers_schema negation "indicative" schema)]))))) else | |
1092 | - if pos = "impt" then ( | |
1093 | - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); | |
1094 | - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in | |
1095 | - let schema = (add_padv schema) @ verb_adjuncts in | |
1096 | - Xlist.map (expand_negation negation) (fun negation -> | |
1097 | - Frame(PersAtrs(meanings,s,negation,"imperative","fut",NoAux,aspect),transform_impt_schema negation "imperative" schema))) else | |
1098 | - if pos = "imps" then ( | |
1099 | - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); | |
1100 | - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in | |
1101 | - let schema = (add_padv schema) @ verb_adjuncts in | |
1102 | - Xlist.map (expand_negation negation) (fun negation -> | |
1103 | - Frame(PersAtrs(meanings,s,negation,"indicative","past",NoAux,aspect),transform_imps_schema negation "indicative" schema))) else | |
1104 | - if pos = "pred" then ( | |
1105 | - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); | |
1106 | - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in | |
1107 | - let schema = (add_padv schema) @ verb_adjuncts in | |
1108 | - List.flatten (Xlist.map (expand_negation negation) (fun negation -> | |
1109 | - [Frame(PersAtrs(meanings,s,negation,"indicative","pres",NoAux,aspect), transform_pers_schema negation "indicative" schema); | |
1110 | - Frame(PersAtrs(meanings,s,negation,"indicative","fut",FutAux,aspect), transform_pers_schema negation "indicative" schema); | |
1111 | - Frame(PersAtrs(meanings,s,negation,"indicative","past",PastAux,aspect), transform_pers_schema negation "indicative" schema)]))) else | |
1112 | - if pos = "pcon" || pos = "pant" || pos = "inf" then ( | |
1113 | - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); | |
1114 | - let role,role_attr = try get_role SUBJ schema with Not_found -> "Initiator","" in | |
1115 | - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in | |
1116 | - let schema = schema @ verb_adjuncts in | |
1117 | - Xlist.map (expand_negation negation) (fun negation -> | |
1118 | - Frame(NonPersAtrs(meanings,s,role,role_attr,negation,aspect),transform_padv_schema negation "indicative" true schema))) else | |
1119 | - if pos = "pact" then ( | |
1120 | - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); | |
1121 | - try | |
1122 | - let role,role_attr = try get_role SUBJ schema with Not_found -> "Initiator","" in | |
1123 | - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in | |
1124 | - let schema = schema @ verb_adjuncts in | |
1125 | - Xlist.map (expand_negation negation) (fun negation -> | |
1126 | - Frame(NonPersAtrs(meanings,s,role,role_attr,negation,aspect),transform_pact_schema negation "indicative" schema)) | |
1127 | - with Not_found -> []) else | |
1128 | - if pos = "ppas" then ( | |
1129 | - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); | |
1130 | - try | |
1131 | - let role,role_attr = try get_role OBJ schema with Not_found -> "Theme","" in | |
1132 | - let s,schema = if refl = ReflSie then raise Not_found else lexeme, schema in | |
1133 | - let schema = schema @ verb_adjuncts in | |
1134 | - Xlist.map (expand_negation negation) (fun negation -> | |
1135 | - Frame(NonPersAtrs(meanings,s,role,role_attr,negation,aspect),transform_ppas_schema negation "indicative" schema)) | |
1136 | - with Not_found -> []) else | |
1137 | - if pos = "ger" then ( | |
1138 | - if pred <> PredNA then failwith ("transform_frame: " ^ ENIAMwalStringOf.frame lexeme frame); | |
1139 | - let s,schema = if refl = ReflSie then lexeme ^ " się", nosem_refl_schema_field :: schema else lexeme, schema in (* FIXME: czy ger może mieć niesemantyczne się? *) | |
1140 | - let schema = schema @ verb_adjuncts in | |
1141 | - Xlist.map (expand_negation negation) (fun negation -> | |
1142 | - Frame(GerAtrs(meanings,s,negation,aspect),transform_ger_schema negation schema))) else | |
1143 | - failwith ("transform_frame: " ^ pos) | |
1144 | - | LexFrame(id,pos,NoRestr,schema) as frame -> | |
1145 | - (match pos with | |
1146 | - SUBST _ -> [LexFrame(id,pos,NoRestr,transform_np_schema schema)] | |
1147 | - | PREP _ -> [LexFrame(id,pos,NoRestr,transform_prep_schema schema)] | |
1148 | - | NUM(c,g,AcmUndef) -> | |
1149 | - Xlist.map [Acm "congr";Acm "rec"] (fun acm -> | |
1150 | - LexFrame(id,NUM(c,g,acm),NoRestr,transform_num_schema acm schema)) | |
1151 | - | ADJ(n,c,g,gr) -> [LexFrame(id,pos,NoRestr,transform_adj_schema schema)] | |
1152 | - | ADV(gr) -> [LexFrame(id,pos,NoRestr,transform_adv_schema schema)] | |
1153 | - | GER(n,c,g,a,negation,ReflEmpty) -> | |
1154 | - Xlist.map (expand_negation negation) (fun negation -> | |
1155 | - LexFrame(id,GER(n,c,g,a,negation,ReflEmpty),NoRestr,transform_ger_schema negation schema)) | |
1156 | - | PACT(n,c,g,a,negation,ReflEmpty) -> | |
1157 | - Xlist.map (expand_negation negation) (fun negation -> | |
1158 | - LexFrame(id,PACT(n,c,g,a,negation,ReflEmpty),NoRestr,transform_pact_schema negation "indicative" schema)) | |
1159 | - | PPAS(n,c,g,a,negation) -> | |
1160 | - Xlist.map (expand_negation negation) (fun negation -> | |
1161 | - LexFrame(id,PPAS(n,c,g,a,negation),NoRestr,transform_ppas_schema negation "indicative" schema)) | |
1162 | - | INF(a,negation,r) -> | |
1163 | - Xlist.map (expand_negation negation) (fun negation -> | |
1164 | - LexFrame(id,INF(a,negation,r),NoRestr,transform_padv_schema negation "indicative" false schema)) | |
1165 | - | QUB -> [LexFrame(id,pos,NoRestr,transform_qub_schema schema)] | |
1166 | - | COMPAR -> [LexFrame(id,pos,NoRestr,transform_compar_schema schema)] | |
1167 | - | COMP _ -> [LexFrame(id,pos,NoRestr,transform_comp_schema schema)] | |
1168 | - | PERS(negation,r) -> | |
1169 | - Xlist.map (expand_negation negation) (fun negation -> | |
1170 | - LexFrame(id,PERS(negation,r),NoRestr,transform_pers_schema negation "indicative" schema)) | |
1171 | - | _ -> failwith ("transform_frame:" ^ ENIAMwalStringOf.frame lexeme frame)) | |
1172 | - | ComprepFrame(s,pos,NoRestr,schema) as frame -> | |
1173 | - (match pos with | |
1174 | - PREP _ -> [ComprepFrame(s,pos,NoRestr,transform_prep_schema schema)] | |
1175 | - | ADV _ -> [ComprepFrame(s,pos,NoRestr,transform_adv_schema schema)] | |
1176 | - | _ -> failwith ("transform_frame:" ^ ENIAMwalStringOf.frame lexeme frame)) | |
1177 | - | frame -> failwith ("transform_frame:" ^ ENIAMwalStringOf.frame lexeme frame) | |
1178 | - | |
1179 | -let reduce_frame_negation lexemes = function | |
1180 | - Negation -> StringMap.mem lexemes "nie" | |
1181 | - | _ -> true | |
1182 | - | |
1183 | -let reduce_frame_mood lexemes = function | |
1184 | - "conditional" -> StringMap.mem lexemes "by" | |
1185 | - | _ -> true | |
1186 | - | |
1187 | -let reduce_frame_aux lexemes = function | |
1188 | - NoAux -> true | |
1189 | - | PastAux -> (try let poss = StringMap.find lexemes "być" in StringSet.mem poss "praet" with Not_found -> false) | |
1190 | - | FutAux -> (try let poss = StringMap.find lexemes "być" in StringSet.mem poss "bedzie" with Not_found -> false) | |
1191 | - | ImpAux -> StringMap.mem lexemes "niech" || StringMap.mem lexemes "niechaj" || StringMap.mem lexemes "niechże" || StringMap.mem lexemes "niechajże" | |
1192 | - | |
1193 | -let reduce_frame_atrs pos lexemes = function | |
1194 | - Frame(NounAtrs _,_) -> true | |
1195 | - | Frame(AdjAtrs _,_) -> true | |
1196 | - | Frame(EmptyAtrs _,_) -> true | |
1197 | - | Frame(PersAtrs(_,_,negation,mood,_,aux,_),_) -> reduce_frame_negation lexemes negation && reduce_frame_mood lexemes mood && reduce_frame_aux lexemes aux | |
1198 | - | Frame(NonPersAtrs(_,_,_,_,negation,_),_) -> if pos = "pact" || pos = "ppas" then true else reduce_frame_negation lexemes negation | |
1199 | - | Frame(GerAtrs(_,_,negation,_),_) -> reduce_frame_negation lexemes negation | |
1200 | - | Frame(_,_) as frame -> failwith ("reduce_frame_atrs: " ^ ENIAMwalStringOf.frame "" frame) | |
1201 | - | LexFrame _ -> true | |
1202 | - | ComprepFrame _ -> true | |
1203 | - | |
1204 | -let rec reduce_frame_atrs_list pos lexemes = function | |
1205 | - [] -> [] | |
1206 | - | frame :: l -> (if reduce_frame_atrs pos lexemes frame then [frame] else []) @ reduce_frame_atrs_list pos lexemes l | |
1207 | - | |
1208 | -let find_frames lexemes = | |
1209 | -(* print_endline "find_frames 1"; *) | |
1210 | - let valence = StringMap.fold lexemes StringMap.empty (fun valence lexeme poss -> | |
1211 | -(* let poss = StringSet.fold poss StringSet.empty (fun poss pos -> StringSet.add poss (simplify_pos pos)) in *) | |
1212 | -(* Printf.printf "find_frame: %s |%s|\n" s (String.concat " " (StringSet.to_list lexemes)); *) | |
1213 | - StringSet.fold poss valence (fun valence pos -> | |
1214 | - let valence = | |
1215 | - let frames_sem = try StringMap.find (StringMap.find walenty (simplify_pos pos)) lexeme with Not_found -> [] in | |
1216 | -(* if frames_sem <> [] then Printf.printf "%s %s in TEI\n%!" lexeme pos; *) | |
1217 | - if frames_sem <> [] then | |
1218 | - Xlist.fold frames_sem valence (fun valence frame -> | |
1219 | - convert_frame_sem expands subtypes equivs lexemes valence lexeme pos frame) | |
1220 | - else | |
1221 | - let frames = match simplify_pos pos with | |
1222 | - "verb" -> ((*try StringMap.find verb_frames lexeme with Not_found ->*) ["verb","","","","",""]) | |
1223 | - | "noun" -> ((*try StringMap.find noun_frames lexeme with Not_found ->*) if StringSet.mem empty_valence_lexemes lexeme then ["empty","","","","",""] else ["noun","","","","",""]) | |
1224 | - | "adj" -> ((*try StringMap.find adj_frames lexeme with Not_found ->*) if StringSet.mem empty_valence_lexemes lexeme then ["empty","","","","",""] else ["adj","","","","",""]) | |
1225 | - | "adv" -> ((*try StringMap.find adv_frames lexeme with Not_found ->*) ["adv","","","","",""]) | |
1226 | - | "pron" -> ["empty","","","","",""] | |
1227 | - | "adjp" -> ["empty","","","","",""] | |
1228 | - | "ordnum" -> ["empty","","","","",""] | |
1229 | - | "symbol" -> ["empty","","","","",""] | |
1230 | - | "date" -> ["date","","","","",""] | |
1231 | - | "date-interval" -> ["empty","","","","",""] | |
1232 | - | "hour" -> ["hour","","","","",""] | |
1233 | - | "hour-minute" -> ["hour","","","","",""] | |
1234 | - | "hour-interval" -> ["empty","","","","",""] | |
1235 | - | "hour-minute-interval" -> ["empty","","","","",""] | |
1236 | - | "year" -> ["empty","","","","",""] | |
1237 | - | "year-interval" -> ["empty","","","","",""] | |
1238 | - | "day" -> ["day","","","","",""] | |
1239 | - | "day-interval" -> ["day","","","","",""] | |
1240 | - | "day-month" -> ["date2","","","","",""] | |
1241 | - | "day-month-interval" -> ["empty","","","","",""] | |
1242 | - | "match-result" -> ["empty","","","","",""] | |
1243 | - | "month-interval" -> ["empty","","","","",""] | |
1244 | - | "roman" -> ["empty","","","","",""] | |
1245 | - | "roman-interval" -> ["empty","","","","",""] | |
1246 | - | "url" -> ["empty","","","","",""] | |
1247 | - | "email" -> ["empty","","","","",""] | |
1248 | - | "obj-id" -> ["empty","","","","",""] | |
1249 | - | _ -> [] in | |
1250 | -(* if frames = [] then valence else | |
1251 | - Printf.printf "find_frame: %s |l|=%d\n" s (Xlist.size l); *) | |
1252 | - Xlist.fold frames valence (fun valence frame -> | |
1253 | - convert_frame expands subtypes equivs lexemes valence lexeme pos frame) in | |
1254 | - Xlist.fold ((*try StringMap.find compreps lexeme with Not_found ->*) []) valence (fun valence (cpos,frame) -> (* FIXME: na razie przyimki złożone są wyłączone *) | |
1255 | - if cpos = pos then convert_comprep_frame expands subtypes equivs lexemes valence lexeme pos frame else valence))) in | |
1256 | -(* print_endline "find_frames 2"; *) | |
1257 | - let valence = StringMap.mapi valence (fun lexeme poss -> | |
1258 | - StringMap.mapi poss (fun pos frames -> | |
1259 | - List.flatten (Xlist.map frames (fun frame -> | |
1260 | -(* print_endline ("find_frames: " ^ ENIAMwalStringOf.frame lexeme frame); *) | |
1261 | - expand_restr valence lexeme pos frame)))) in | |
1262 | -(* print_endline "find_frames 3"; *) | |
1263 | - let valence = StringMap.mapi valence (fun lexeme poss -> | |
1264 | - StringMap.mapi poss (fun pos frames -> | |
1265 | - reduce_frame_atrs_list pos lexemes (List.flatten (Xlist.map frames (transform_frame lexeme pos))))) in | |
1266 | -(* let valence = StringMap.mapi valence (fun lexeme poss -> | |
1267 | - StringMap.mapi poss (fun pos frames -> | |
1268 | - Xlist.map frames (assign_thematic_role pos))) in*) | |
1269 | -(* StringMap.iter valence (fun lexeme poss -> | |
1270 | - StringMap.iter poss (fun pos frames -> | |
1271 | - Xlist.iter frames (fun frame -> print_endline (ENIAMwalStringOf.frame lexeme frame))));*) | |
1272 | -(* print_endline "find_frames 4"; *) | |
1273 | - valence | |
1274 | 528 | |
1275 | 529 | (*let _ = |
1276 | 530 | let valence = Xlist.fold all_frames StringMap.empty (fun valence (pos,frame_map) -> |
... | ... |
lexSemantics/ENIAMwalParser.ml
1 | 1 | (* |
2 | - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty". | |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
2 | + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. | |
3 | + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | 5 | * |
6 | 6 | * This library is free software: you can redistribute it and/or modify |
7 | 7 | * it under the terms of the GNU Lesser General Public License as published by |
... | ... | @@ -156,7 +156,7 @@ let parse_negation = function |
156 | 156 | (* | [Text ""] -> NegationNA *) |
157 | 157 | | l -> failwith ("parse_negation: " ^ string_of_token_list l) |
158 | 158 | |
159 | -let parse_refl = function | |
159 | +(* let parse_refl = function | |
160 | 160 | (* [] -> ReflEmpty |
161 | 161 | | [Text "się"] -> ReflSie |
162 | 162 | | [Text ""] -> ReflEmpty |
... | ... | @@ -164,7 +164,7 @@ let parse_refl = function |
164 | 164 | | [Text "true"] -> ReflSie *) |
165 | 165 | | [Text "nosię"] -> ReflFalse |
166 | 166 | | [Text "się"] -> ReflTrue |
167 | - | l -> failwith ("parse_refl: " ^ string_of_token_list l) | |
167 | + | l -> failwith ("parse_refl: " ^ string_of_token_list l) *) | |
168 | 168 | |
169 | 169 | let parse_ctype = function |
170 | 170 | [Text "int"] -> Int |
... | ... | @@ -172,11 +172,11 @@ let parse_ctype = function |
172 | 172 | | [Text "_"] -> CompTypeUndef |
173 | 173 | | l -> failwith ("parse_ctype: " ^ string_of_token_list l) |
174 | 174 | |
175 | -let parse_acm = function | |
175 | +(* let parse_acm = function | |
176 | 176 | (* [Text "int"] -> Int |
177 | 177 | | [Text "rel"] -> Rel *) |
178 | 178 | | [Text "_"] -> AcmUndef |
179 | - | l -> failwith ("parse_acm: " ^ string_of_token_list l) | |
179 | + | l -> failwith ("parse_acm: " ^ string_of_token_list l) *) | |
180 | 180 | |
181 | 181 | let parse_comp = function |
182 | 182 | | [Text "co"] -> Comp "co" (* subst qub prep comp *) |
... | ... | @@ -234,17 +234,17 @@ let parse_pos = function |
234 | 234 | | "PPRON3",[number;case] -> PPRON3(parse_number number,parse_case case) |
235 | 235 | | "SIEBIE",[case] -> SIEBIE(parse_case case) |
236 | 236 | | "PREP",[case] -> PREP(parse_case case) |
237 | - | "NUM",[case;gender;acm] -> NUM(parse_case case,parse_gender gender,parse_acm acm) | |
237 | + | "NUM",[case;gender] -> NUM(parse_case case,parse_gender gender) | |
238 | 238 | | "ADJ",[number;case;gender;grad] -> ADJ(parse_number number,parse_case case,parse_gender gender,parse_grad grad) |
239 | 239 | | "ADV",[grad] -> ADV(parse_grad grad) |
240 | - | "GER",[number;case;gender;aspect;negation;refl] -> GER(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation,parse_refl refl) | |
240 | + | "GER",[number;case;gender;aspect;negation] -> GER(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation) | |
241 | 241 | | "PPAS",[number;case;gender;aspect;negation] -> PPAS(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation) |
242 | - | "PACT",[number;case;gender;aspect;negation;refl] -> PACT(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation,parse_refl refl) | |
243 | - | "INF",[aspect;negation;refl] -> INF(parse_aspect aspect,parse_negation negation,parse_refl refl) | |
242 | + | "PACT",[number;case;gender;aspect;negation] -> PACT(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation) | |
243 | + | "INF",[aspect;negation] -> INF(parse_aspect aspect,parse_negation negation) | |
244 | 244 | | "QUB",[] -> QUB |
245 | - | "COMPAR",[] -> COMPAR | |
245 | + | "COMPAR",[] -> COMPAR Str | |
246 | 246 | | "COMP",[ctype] -> COMP(parse_ctype ctype) |
247 | - | "PERS",[negation;refl] -> PERS(parse_negation negation,parse_refl refl) | |
247 | + | "PERS",[negation] -> PERS(parse_negation negation) | |
248 | 248 | | s,ll -> failwith ("parse_pos: " ^ s ^ "(" ^ String.concat "," (Xlist.map ll string_of_token_list) ^ ")") |
249 | 249 | |
250 | 250 | let rec parse_phrase = function |
... | ... | @@ -253,7 +253,7 @@ let rec parse_phrase = function |
253 | 253 | | "adjp",[case] -> AdjP(parse_case case) |
254 | 254 | | "prepadjp",[[Text prep]; case] -> PrepAdjP(prep,parse_case case) |
255 | 255 | | "comprepnp",[[Text prep]] -> ComprepNP prep |
256 | - | "comparp",[[Text prep]] -> ComparP prep | |
256 | + | "comparp",[[Text prep]] -> ComparP(prep,Str) | |
257 | 257 | | "cp",[ctype;comp] -> CP(parse_ctype ctype,parse_comp comp) |
258 | 258 | | "ncp",[case;ctype;comp] -> NCP(parse_case case,parse_ctype ctype,parse_comp comp) |
259 | 259 | | "prepncp",[[Text prep];case;ctype;comp] -> PrepNCP(prep,parse_case case,parse_ctype ctype,parse_comp comp) |
... | ... |
lexSemantics/ENIAMwalReduce.ml
1 | 1 | (* |
2 | - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty". | |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
2 | + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. | |
3 | + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | 5 | * |
6 | 6 | * This library is free software: you can redistribute it and/or modify |
7 | 7 | * it under the terms of the GNU Lesser General Public License as published by |
... | ... | @@ -24,7 +24,7 @@ let create_phrase_reqs s (reqs,noreqs) = function |
24 | 24 | | PrepNP(prep,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs |
25 | 25 | | PrepAdjP(prep,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs |
26 | 26 | | PrepNCP(prep,_,_,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs |
27 | - | ComparP(prep) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | |
27 | + | ComparP(prep,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | |
28 | 28 | | FixedP(prep) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs |
29 | 29 | | SimpleLexArg(lex,_) -> StringMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs |
30 | 30 | | LexArg(_,lex,_) -> StringMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs |
... | ... | @@ -35,7 +35,7 @@ let create_phrase_reqs2 s (reqs,noreqs) = function |
35 | 35 | | PrepNP(prep,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs |
36 | 36 | | PrepAdjP(prep,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs |
37 | 37 | | PrepNCP(prep,_,_,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs |
38 | - | ComparP(prep) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | |
38 | + | ComparP(prep,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | |
39 | 39 | | FixedP(prep) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs |
40 | 40 | | SimpleLexArg(lex,_) -> IntMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs |
41 | 41 | | LexArg(_,lex,_) -> IntMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs |
... | ... | @@ -105,7 +105,7 @@ let reduce_phrase (test_comprep_reqs,test_comprep_reqs2,test_lexarg_reqs,test_le |
105 | 105 | | PrepNP(prep,case) as phrase -> if test_lexemes prep then phrase else raise Not_found |
106 | 106 | | PrepAdjP(prep,case) as phrase -> if test_lexemes prep then phrase else raise Not_found |
107 | 107 | | ComprepNP(prep) as phrase -> if test_comprep_reqs prep && test_comprep_reqs2 prep then phrase else raise Not_found |
108 | - | ComparP(prep) as phrase -> if test_lexemes prep then phrase else raise Not_found | |
108 | + | ComparP(prep,case) as phrase -> if test_lexemes prep then phrase else raise Not_found | |
109 | 109 | | CP(ctype,comp) -> CP(ctype,reduce_comp test_lexemes comp) |
110 | 110 | | NCP(case,ctype,comp) -> if test_lexemes "to" then NCP(case,ctype,reduce_comp test_lexemes comp) else raise Not_found |
111 | 111 | | PrepNCP(prep,case,ctype,comp) -> if test_lexemes prep && test_lexemes "to" then PrepNCP(prep,case,ctype,reduce_comp test_lexemes comp) else raise Not_found |
... | ... | @@ -204,7 +204,10 @@ let entries,schemata,connected = |
204 | 204 | - usunięcie adjunctów |
205 | 205 | - uwzględnienie cech morfoskładniowych |
206 | 206 | - scalenie schematów |
207 | - - dodanie adjunctów | |
207 | + - dodanie adjunctów - pamiętać o padvp | |
208 | +*) | |
209 | +(* TODO | |
210 | + - leksykalizacje bez schema | |
208 | 211 | *) |
209 | 212 | |
210 | 213 | (* let _ = |
... | ... |
lexSemantics/ENIAMwalStringOf.ml
1 | 1 | (* |
2 | - * ENIAMwalenty, a converter for Polish Valence Dictionary "Walenty". | |
2 | + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. | |
3 | 3 | * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> |
4 | 4 | * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences |
5 | 5 | * |
... | ... | @@ -55,11 +55,11 @@ let case = function |
55 | 55 | | Part -> "part" |
56 | 56 | | CaseAgr -> "agr" |
57 | 57 | (* | CaseUAgr -> "uagr" |
58 | - | NomAgr -> "nomagr" | |
59 | - | GenAgr -> "genagr" | |
60 | - | AllAgr -> "allagr" | |
61 | 58 | | AllUAgr -> "alluagr" *) |
62 | 59 | | CaseUndef -> "_" |
60 | + | AllAgr -> "allagr" | |
61 | + | NomAgr -> "nomagr" | |
62 | + | GenAgr -> "genagr" | |
63 | 63 | |
64 | 64 | let rec comp = function |
65 | 65 | Comp s -> s |
... | ... | @@ -87,15 +87,15 @@ let grad = function |
87 | 87 | Grad s -> s |
88 | 88 | | GradUndef -> "_" |
89 | 89 | |
90 | -let refl = function | |
90 | +(* let refl = function | |
91 | 91 | (* ReflEmpty -> "" *) |
92 | 92 | | ReflTrue -> "się" |
93 | 93 | | ReflFalse -> "nosię" |
94 | - | ReflUndef -> "_" | |
94 | + | ReflUndef -> "_" *) | |
95 | 95 | |
96 | -let acm = function | |
96 | +(* let acm = function | |
97 | 97 | Acm s -> s |
98 | - | AcmUndef -> "_" | |
98 | + | AcmUndef -> "_" *) | |
99 | 99 | |
100 | 100 | let gf = function |
101 | 101 | SUBJ -> "subj" |
... | ... | @@ -108,17 +108,17 @@ let pos = function |
108 | 108 | | PPRON3(n,c) -> "PPRON3(" ^ number n ^ "," ^ case c ^ ")" |
109 | 109 | | SIEBIE(c) -> "SIEBIE(" ^ case c ^ ")" |
110 | 110 | | PREP(c) -> "PREP(" ^ case c ^ ")" |
111 | - | NUM(c,g,a) -> "NUM(" ^ case c ^ "," ^ gender g ^ "," ^ acm a ^ ")" | |
111 | + | NUM(c,g) -> "NUM(" ^ case c ^ "," ^ gender g ^ ")" | |
112 | 112 | | ADJ(n,c,g,gr) -> "ADJ(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ grad gr ^ ")" |
113 | 113 | | ADV(gr) -> "ADV(" ^ grad gr ^ ")" |
114 | - | GER(n,c,g,a,neg,r) -> "GER(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ "," ^ refl r ^ ")" | |
115 | - | PACT(n,c,g,a,neg,r) -> "PACT(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ "," ^ refl r ^ ")" | |
114 | + | GER(n,c,g,a,neg) -> "GER(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ ")" | |
115 | + | PACT(n,c,g,a,neg) -> "PACT(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ ")" | |
116 | 116 | | PPAS(n,c,g,a,neg) -> "PPAS(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ ")" |
117 | - | INF(a,n,r) -> "INF(" ^ aspect a ^ "," ^ negation n ^ "," ^ refl r ^ ")" | |
117 | + | INF(a,n) -> "INF(" ^ aspect a ^ "," ^ negation n ^ ")" | |
118 | 118 | | QUB -> "QUB" |
119 | - | COMPAR -> "COMPAR" | |
119 | + | COMPAR c -> "COMPAR(" ^ case c ^ ")" | |
120 | 120 | | COMP(c) -> "COMP(" ^ comp_type c ^ ")" |
121 | - | PERS(n,r) -> "PERS(" ^ negation n ^ "," ^ refl r ^ ")" | |
121 | + | PERS(n) -> "PERS(" ^ negation n ^ ")" | |
122 | 122 | | FIXED -> "FIXED" |
123 | 123 | |
124 | 124 | let rec phrase = function |
... | ... | @@ -129,7 +129,7 @@ let rec phrase = function |
129 | 129 | (* | NumP(c) -> "nump(" ^ case c ^ ")" |
130 | 130 | | PrepNumP(prep,c) -> "prepnump(" ^ prep ^ "," ^ case c ^ ")" *) |
131 | 131 | | ComprepNP(prep) -> "comprepnp(" ^ prep ^ ")" |
132 | - | ComparP(prep) -> "comparp(" ^ prep ^ ")" | |
132 | + | ComparP(prep,c) -> "comparp(" ^ prep ^ "," ^ case c ^ ")" | |
133 | 133 | | CP(ct,co) -> "cp(" ^ comp_type ct ^ "," ^ comp co ^ ")" |
134 | 134 | | NCP(c,ct,co) -> "ncp(" ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")" |
135 | 135 | | PrepNCP(prep,c,ct,co) -> "prepncp(" ^ prep ^ "," ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")" |
... | ... |
lexSemantics/ENIAMwalTypes.ml
1 | 1 | (* |
2 | - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty". | |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
2 | + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. | |
3 | + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | 5 | * |
6 | 6 | * This library is free software: you can redistribute it and/or modify |
7 | 7 | * it under the terms of the GNU Lesser General Public License as published by |
... | ... | @@ -24,14 +24,14 @@ type opinion = Pewny | Potoczny | Watpliwy | Archaiczny | Zly | Wulgarny | Nieok |
24 | 24 | type negation = Negation | Aff | NegationUndef (*| NegationNA*) |
25 | 25 | type pred = PredTrue | PredFalse | PredUndef (*| PredNA*) |
26 | 26 | type aspect = Aspect of string | AspectUndef (*| AspectNA*) |
27 | -type case = Case of string | Str | Part | CaseAgr (*| NomAgr | GenAgr | AllAgr*) | CaseUndef (*| AllUAgr | CaseUAgr*) | |
27 | +type case = Case of string | Str | Part | CaseAgr | CaseUndef (*| AllUAgr | CaseUAgr*) | GenAgr | NomAgr | AllAgr | |
28 | 28 | type comp = Comp of string | Zeby | Gdy | CompUndef |
29 | 29 | type comp_type = Int | Rel | CompTypeUndef (*| CompTypeAgr*) |
30 | 30 | type number = Number of string | NumberUndef | NumberAgr |
31 | 31 | type gender = Gender of string | GenderUndef | GenderAgr | Genders of string list |
32 | 32 | type grad = Grad of string | GradUndef |
33 | -type refl = (*ReflEmpty |*) ReflTrue | ReflFalse | ReflUndef | |
34 | -type acm = Acm of string | AcmUndef | |
33 | +(* type refl = (*ReflEmpty |*) ReflTrue | ReflFalse | ReflUndef *) | |
34 | +(* type acm = Acm of string | AcmUndef *) | |
35 | 35 | |
36 | 36 | (*type mood = (*Mood of*) string (*| MoodUndef*) |
37 | 37 | type tense = string |
... | ... | @@ -47,17 +47,17 @@ type pos = |
47 | 47 | | PPRON3 of number * case |
48 | 48 | | SIEBIE of case |
49 | 49 | | PREP of case |
50 | - | NUM of case * gender * acm | |
50 | + | NUM of case * gender | |
51 | 51 | | ADJ of number * case * gender * grad |
52 | 52 | | ADV of grad |
53 | - | GER of number * case * gender * aspect * negation * refl | |
54 | - | PACT of number * case * gender * aspect * negation * refl | |
53 | + | GER of number * case * gender * aspect * negation | |
54 | + | PACT of number * case * gender * aspect * negation | |
55 | 55 | | PPAS of number * case * gender * aspect * negation |
56 | - | INF of aspect * negation * refl | |
56 | + | INF of aspect * negation | |
57 | 57 | | QUB |
58 | - | COMPAR | |
58 | + | COMPAR of case | |
59 | 59 | | COMP of comp_type |
60 | - | PERS of (*number * gender * aspect * person * *)negation * refl | |
60 | + | PERS of (*number * gender * aspect * person * *)negation | |
61 | 61 | | FIXED |
62 | 62 | |
63 | 63 | type phrase = |
... | ... | @@ -68,7 +68,7 @@ type phrase = |
68 | 68 | (* | NumP of case |
69 | 69 | | PrepNumP of string * case *) |
70 | 70 | | ComprepNP of string |
71 | - | ComparP of string (** case*) | |
71 | + | ComparP of string * case | |
72 | 72 | | CP of comp_type * comp |
73 | 73 | | NCP of case * comp_type * comp |
74 | 74 | | PrepNCP of string * case * comp_type * comp |
... | ... | @@ -128,8 +128,8 @@ let empty_meaning = {mng_id = (-1); |
128 | 128 | | GerAtrs of meaning list * string * negation * aspect |
129 | 129 | | NonPersAtrs of meaning list * string * string * string * negation * aspect *) |
130 | 130 | |
131 | -type schema = {sch_id: int; opinion: opinion; reflexiveMark: refl; aspect: aspect; | |
132 | - negativity: negation; predicativity: pred; positions: position list; text_rep: string} | |
131 | +(* type schema = {sch_id: int; opinion: opinion; reflexiveMark: refl; aspect: aspect; | |
132 | + negativity: negation; predicativity: pred; positions: position list; text_rep: string} *) | |
133 | 133 | |
134 | 134 | type lex_entry = |
135 | 135 | (* Frame of frame_atrs * position list *) |
... | ... |
lexSemantics/ENIAMwalenty.ml
1 | 1 | (* |
2 | - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty". | |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
2 | + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. | |
3 | + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | 5 | * |
6 | 6 | * This library is free software: you can redistribute it and/or modify |
7 | 7 | * it under the terms of the GNU Lesser General Public License as published by |
... | ... |
lexSemantics/README
... | ... | @@ -2,8 +2,7 @@ ENIAMsemValence Version 1.0 : |
2 | 2 | ----------------------- |
3 | 3 | |
4 | 4 | ENIAMsemValence is a library that assigns tokens with lexicosemantic information. |
5 | -It recognizes named entities and assigns thematic roles, | |
6 | -senses, valence and other semantic information to tokens. | |
5 | +It assigns thematic roles, word senses, valence and other semantic information to tokens. | |
7 | 6 | |
8 | 7 | Install |
9 | 8 | ------- |
... | ... |
lexSemantics/TODO
... | ... | @@ -3,3 +3,56 @@ |
3 | 3 | - sprawdzić czy walencja nazw własnych jest dobrze zrobiona. |
4 | 4 | - trzeba zrobić słownik nazw własnych |
5 | 5 | - trzeba poprawić selekcję preferencji selecyjnych: jeśli podrzędnikiem jest zaimek nie muszą jawnie występować wśród sensów. |
6 | + | |
7 | +- błędy w realizacjach | |
8 | +xp(abl[prepadjp(z,gen)] | |
9 | +na korzyść - na niekorzyść | |
10 | +xp(mod[prepnp(jako,str)]) -> xp(mod[compar(jako)]) | |
11 | +xp(mod[prepadjp(jako,str)]) -> xp(mod[compar(jako)]) | |
12 | +na sposób - zgłoszone | |
13 | + | |
14 | +cyrk advp(misc) [54480] | |
15 | +banalnie - pred | |
16 | +subst woda lex(880,woda,subst) {lex(święcony,ADJ(agr,gen,agr,pos))} | |
17 | + | |
18 | +uciąć: pewny: _: : perf: subj{np(str)} + obj{lex(np(str),sg,'gałąź',ratr1({lex(cp(rel[który]),aff,'siedzieć',,ratr(subj{np(str)}+{lex(adjp(agr),agr,agr,pos,'sam',natr)}+{lex(prepadjp(na,loc),sg,f,pos,'który',natr)}))}))} | |
19 | + | |
20 | +np(str) -> adjp(agr) | |
21 | +przychodzić: pewny: _: : imperf: subj{lex(np(str),sg,'godzina',ratr1({lex(np(str),agr,'zły',natr)}))} + {prepnp(na,acc)} | |
22 | + | |
23 | +Czym jest podmiot podniesiony w poniższych ramach i jak się zachowuje w stronie biernej? | |
24 | +obladzać: pewny: _: : imperf: subj{E} + obj{np(str)} + {np(dat)} | |
25 | +oblodzić: pewny: _: : perf: subj{E} + obj{np(str)} + {np(dat)} | |
26 | + | |
27 | +Czy 'zły' nie powinien być tu przymiotnikiem? | |
28 | +przychodzić: pewny: _: : imperf: subj{lex(np(str),sg,'godzina',ratr1({lex(np(str),agr,'zły',natr)}))} + {prepnp(na,acc)} | |
29 | +przyjść: pewny: _: : perf: subj{lex(np(str),sg,'godzina',ratr1({lex(np(str),agr,'zły',natr)}))} + {prepnp(na,acc)} | |
30 | + | |
31 | +Czy zamiast adjp(str) -> adjp(agr) | |
32 | +kreślić: pewny: _: : imperf: subj{np(str)} + obj{np(str)} + {lex(np(inst),pl,XOR('barwa','kolor'),ratr({adjp(str)}))} | |
33 | +malować: pewny: _: : imperf: subj{np(str)} + obj{np(str)} + {lex(np(inst),pl,XOR('barwa','kolor'),ratr({adjp(str)}))} | |
34 | +namalować: pewny: _: : perf: subj{np(str)} + obj{np(str)} + {lex(np(inst),pl,XOR('barwa','kolor'),ratr({adjp(str)}))} | |
35 | +odmalować: pewny: _: : perf: subj{np(str)} + obj{np(str)} + {lex(np(inst),pl,XOR('barwa','kolor'),ratr({adjp(str)}))} | |
36 | +odmalowywać: pewny: _: : imperf: subj{np(str)} + obj{np(str)} + {lex(np(inst),pl,XOR('barwa','kolor'),ratr({adjp(str)}))} | |
37 | +zajrzeć: pewny: _: : perf: subj{lex(np(str),sg,XOR('bieda','głód','nędza','śmierć'),atr({adjp(str)}))} + {np(dat)} + {lex(prepnp(w,acc),pl,'oko',natr)} | |
38 | +chwycić: pewny: _: : perf: subj{np(str)} + {lex(np(inst),pl,'usta',atr({adjp(str)}))} + {lex(np(str),sg,'powietrze',atr({adjp(agr)}))} | |
39 | +chwytać: pewny: _: : imperf: subj{np(str)} + {lex(np(inst),pl,'usta',atr({adjp(str)}))} + {lex(np(str),sg,'powietrze',atr({adjp(agr)}))} | |
40 | +łapać: pewny: _: : imperf: subj{np(str)} + obj{lex(np(str),sg,'powietrze',atr({adjp(agr)}))} + {lex(np(inst),pl,'usta',atr({adjp(str)}))} | |
41 | +złapać: pewny: _: : perf: subj{np(str)} + obj{lex(np(str),sg,'powietrze',atr({adjp(agr)}))} + {lex(np(inst),pl,'usta',atr({adjp(str)}))} | |
42 | + | |
43 | + | |
44 | +Jak się zachowuje podmiot zdaniowy w stronie biernej? - o to już pytałem | |
45 | +podciąć: pewny: _: : perf: subj{cp(gdy)} + obj{lex(np(str),pl,'skrzydło',natr)} + {np(dat)} | |
46 | +podciąć: pewny: _: : perf: subj{cp(jak)} + obj{lex(np(str),pl,'skrzydło',natr)} + {np(dat)} | |
47 | +podciąć: pewny: _: : perf: subj{cp(kiedy)} + obj{lex(np(str),pl,'skrzydło',natr)} + {np(dat)} | |
48 | +podcinać: pewny: _: : imperf: subj{cp(gdy)} + obj{lex(np(str),pl,'skrzydło',natr)} + {np(dat)} | |
49 | +podcinać: pewny: _: : imperf: subj{cp(jak)} + obj{lex(np(str),pl,'skrzydło',natr)} + {np(dat)} | |
50 | +podcinać: pewny: _: : imperf: subj{cp(kiedy)} + obj{lex(np(str),pl,'skrzydło',natr)} + {np(dat)} | |
51 | +sprawiać: pewny: _: : imperf: subj{np(str);cp(że);ncp(str,że)} + obj{np(str)} + {np(dat)} | |
52 | +sprawić: pewny: _: : perf: subj{np(str);cp(że);ncp(str,że)} + obj{np(str)} + {np(dat)} | |
53 | +zaskakiwać: pewny: _: : imperf: subj{np(str);cp(int);cp(że);ncp(str,int);ncp(str,że)} + obj{np(str)} | |
54 | +zaskoczyć: pewny: _: : perf: subj{np(str);cp(int);cp(że);ncp(str,int);ncp(str,że)} + obj{np(str)} | |
55 | +zwracać: pewny: _: : imperf: subj{cp(int)} + obj{lex(np(str),sg,'uwaga',atr({possp}))} | |
56 | +zwracać: pewny: _: : imperf: subj{cp(że)} + obj{lex(np(str),sg,'uwaga',atr({possp}))} | |
57 | +zwrócić: pewny: _: : perf: subj{cp(int)} + obj{lex(np(str),sg,'uwaga',atr({possp}))} | |
58 | +zwrócić: pewny: _: : perf: subj{cp(że)} + obj{lex(np(str),sg,'uwaga',atr({possp}))} | |
... | ... |
lexSemantics/entries.ml
1 | 1 | (* |
2 | - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty". | |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
2 | + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. | |
3 | + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | 5 | * |
6 | 6 | * This library is free software: you can redistribute it and/or modify |
7 | 7 | * it under the terms of the GNU Lesser General Public License as published by |
... | ... |
lexSemantics/makefile
... | ... | @@ -3,7 +3,7 @@ OCAMLOPT=ocamlopt |
3 | 3 | OCAMLDEP=ocamldep |
4 | 4 | INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam |
5 | 5 | OCAMLFLAGS=$(INCLUDES) -g |
6 | -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa #eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-plWordnet.cmxa eniam-lcg-parser.cmxa #eniam-lcg-grammar-pl.cmxa #eniam-lexSemantics.cmxa | |
6 | +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa #eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-plWordnet.cmxa #eniam-lexSemantics.cmxa | |
7 | 7 | INSTALLDIR=`ocamlc -where`/eniam |
8 | 8 | |
9 | 9 | SOURCES= ENIAMlexSemanticsTypes.ml ENIAMcategories.ml ENIAMlexSemanticsData.ml ENIAMlexSemantics.ml |
... | ... | @@ -28,8 +28,8 @@ eniam-lexSemantics.cmxa: $(SOURCES) |
28 | 28 | |
29 | 29 | # test: test.ml |
30 | 30 | # $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml |
31 | -test: entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml test.ml | |
32 | - $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml test.ml | |
31 | +test: entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml test.ml | |
32 | + $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml test.ml | |
33 | 33 | |
34 | 34 | |
35 | 35 | .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx |
... | ... |
lexSemantics/test.ml
1 | 1 | (* |
2 | 2 | * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
3 | + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | 5 | * |
6 | 6 | * This library is free software: you can redistribute it and/or modify |
7 | 7 | * it under the terms of the GNU Lesser General Public License as published by |
... | ... |
lexSemantics/test2.ml
1 | 1 | (* |
2 | - * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty". | |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
2 | + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. | |
3 | + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | 5 | * |
6 | 6 | * This library is free software: you can redistribute it and/or modify |
7 | 7 | * it under the terms of the GNU Lesser General Public License as published by |
... | ... |
subsyntax/README
walenty/ENIAMwalConnect.ml
... | ... | @@ -72,7 +72,7 @@ let connect entry = |
72 | 72 | let phrases = process_morfs position.morfs in |
73 | 73 | let morfs = Xlist.fold phrase_ids [] (fun morfs phrase_id -> |
74 | 74 | try IntMap.find phrases phrase_id :: morfs |
75 | - with Not_found -> Printf.printf "%s\n%!" entry.form_orth;morfs) in | |
75 | + with Not_found -> if entry.form_orth <> "podobać" then Printf.printf "connect: %s\n%!" entry.form_orth;morfs) in | |
76 | 76 | {position with role=arg.role; role_attr=arg.role_attribute; sel_prefs=sel_prefs; |
77 | 77 | morfs=List.rev morfs} :: conn_positions)) in |
78 | 78 | (* let meanings = List.rev (Xlist.rev_map frame.meanings (fun id -> |
... | ... |
walenty/ENIAMwalGenerate.ml
... | ... | @@ -20,8 +20,19 @@ |
20 | 20 | open ENIAMwalTypes |
21 | 21 | open Xstd |
22 | 22 | |
23 | +let correct_walenty entry = | |
24 | + if entry.form_orth = "podobać" then | |
25 | + {entry with schemata=Xlist.map entry.schemata (fun s -> | |
26 | + {s with positions=Xlist.map s.positions (fun p -> | |
27 | + if p.gf=SUBJ then {p with morfs=List.flatten (Xlist.map p.morfs (function | |
28 | + MorfId 126 -> [] | |
29 | + | m -> [m]))} | |
30 | + else p)})} | |
31 | + else entry | |
32 | + | |
23 | 33 | let load_walenty walenty_filename expands_filename = |
24 | 34 | let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in |
35 | + let walenty = Xlist.rev_map walenty correct_walenty in | |
25 | 36 | let expands = ENIAMwalTEI.load_expands expands_filename in |
26 | 37 | let meanings = |
27 | 38 | Xlist.fold walenty IntMap.empty (fun meanings entry -> |
... | ... |
walenty/ENIAMwalLex.ml
... | ... | @@ -41,6 +41,39 @@ let rec split_elexeme = function |
41 | 41 | genders,[ORcoord(List.rev l)] |
42 | 42 | | Elexeme gender -> [gender],[] |
43 | 43 | |
44 | +let rec get_lexemes = function | |
45 | + Lexeme s -> [s] | |
46 | + | ORconcat l -> List.flatten (Xlist.map l get_lexemes) | |
47 | + | ORcoord l -> List.flatten (Xlist.map l get_lexemes) | |
48 | + | XOR l -> List.flatten (Xlist.map l get_lexemes) | |
49 | + | Elexeme gender -> failwith "get_lexemes" | |
50 | + | |
51 | +let rec remove_list set = function | |
52 | + [] -> [] | |
53 | + | s :: l -> if Xlist.mem set s then remove_list set l else s :: (remove_list set l) | |
54 | + | |
55 | +let rec check_lexemes_morfs l = function | |
56 | + LexPhrase(lexs,(_,schema)) -> | |
57 | + let l = Xlist.fold lexs l (fun l (_,lex) -> | |
58 | + remove_list (get_lexemes lex) l) in | |
59 | + check_lexemes_schema l schema | |
60 | + | _ -> l | |
61 | + | |
62 | +and check_lexemes_schema l schema = | |
63 | + Xlist.fold schema l (fun l s -> | |
64 | + Xlist.fold s.morfs l check_lexemes_morfs) | |
65 | + | |
66 | +let add_refl_restr (restr,schema) = | |
67 | + (match restr with | |
68 | + Natr -> Ratr | |
69 | + | Atr1 -> Atr | |
70 | + | Atr -> Atr | |
71 | + | Ratr1 -> Ratr | |
72 | + | Ratr -> Ratr | |
73 | + | Ratrs -> Ratrs | |
74 | + | NoRestr -> failwith "add_refl_restr"), | |
75 | + position [LexPhrase([QUB,Lexeme "się"],(Natr,[]))] :: schema | |
76 | + | |
44 | 77 | let rec expand_lexicalizations_schema schema = |
45 | 78 | Xlist.map schema (fun s -> |
46 | 79 | {s with morfs=expand_lexicalizations_morfs s.morfs}) |
... | ... | @@ -51,15 +84,7 @@ and expand_lexicalizations_morfs morfs = (* uproszczenie polegające na zezwolen |
51 | 84 | LexPhrase(pos_lex,(restr,schema)) -> LexPhrase(pos_lex,(restr,expand_lexicalizations_schema schema)) |
52 | 85 | | morf -> morf in |
53 | 86 | match morf with |
54 | - (* LexPhrase([ADV _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf] *) | |
55 | - (* | LexPhrase([PREP _,_;SUBST _,_],(_,schema)) -> if remove_trivial_args schema <> [] then print_endline (ENIAMwalStringOf.morf morf); [morf] *) | |
56 | - (* | LexPhrase([PREP _,_;GER _,_],(_,schema)) -> if remove_trivial_args schema <> [] then print_endline (ENIAMwalStringOf.morf morf); [morf] *) | |
57 | - (* | LexPhrase([NUM _,_;_],(_,schema)) -> if remove_trivial_args schema <> [] then print_endline (ENIAMwalStringOf.morf morf); [morf] *) | |
58 | - (* | LexPhrase([PREP _,_;NUM _,_;_],(_,schema)) -> if remove_trivial_args schema <> [] then print_endline (ENIAMwalStringOf.morf morf); [morf] *) | |
59 | - (* | LexPhrase([PREP _,_;ADJ _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf] | |
60 | - | LexPhrase([PREP _,_;PPAS _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf] | |
61 | - | LexPhrase([PREP _,_;PACT _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf] *) | |
62 | - (* | Phrase(PrepNumP(prep,case)) -> [LexPhrase([PREP case,Lexeme prep],(Ratrs,[position(*2*) [Phrase(NumP(case))]]))] *) | |
87 | +(* | Phrase(PrepNumP(prep,case)) -> [LexPhrase([PREP case,Lexeme prep],(Ratrs,[position(*2*) [Phrase(NumP(case))]]))] *) | |
63 | 88 | | Phrase(PrepNumP(prep,case)) -> [Phrase(PrepNP(prep,case))] (* FIXME: celowe uproszczenie *) |
64 | 89 | | LexPhrase([PREP pcase,plex;SUBST(n,c),slex],(Atr1,[{morfs=[LexPhrase([QUB,_],_)]} as s])) -> |
65 | 90 | (* print_endline (ENIAMwalStringOf.morf morf); *) |
... | ... | @@ -74,62 +99,30 @@ and expand_lexicalizations_morfs morfs = (* uproszczenie polegające na zezwolen |
74 | 99 | [LexPhrase([PREP pcase,plex],(Ratrs,[position [LexPhrase([SUBST(n,c),slex],(Natr,[]))];s(*{s with dir=Backward}*)]))] |
75 | 100 | | LexPhrase([PREP pcase,plex;pos,lex],restr) -> |
76 | 101 | [LexPhrase([PREP pcase,plex],(Ratrs,[position [LexPhrase([pos,lex],restr)]]))] |
77 | - | LexPhrase([PREP pcase,plex;NUM(c,g,a),nlex;pos,lex],restr) -> | |
102 | + | LexPhrase([PREP pcase,plex;NUM(c,g),nlex;pos,lex],restr) -> | |
78 | 103 | let genders,lexs = split_elexeme lex in |
79 | 104 | Xlist.map genders (fun gender -> |
80 | - LexPhrase([PREP pcase,plex],(Ratrs,[position [LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[(*num*)position [Phrase Null(*Pro*)]]))]]))) @ (*FIXME*) | |
105 | + LexPhrase([PREP pcase,plex],(Ratrs,[position [LexPhrase([NUM(c,gender),nlex],(Ratrs,[(*num*)position [Phrase Null(*Pro*)]]))]]))) @ (*FIXME*) | |
81 | 106 | Xlist.map lexs (fun lex -> |
82 | - LexPhrase([PREP pcase,plex],(Ratrs,[position [LexPhrase([NUM(c,g,a),nlex],(Ratrs,[(*num*)position [LexPhrase([pos,lex],restr)]]))]]))) | |
83 | - | LexPhrase([NUM(c,g,a),nlex;pos,lex],restr) -> | |
107 | + LexPhrase([PREP pcase,plex],(Ratrs,[position [LexPhrase([NUM(c,g),nlex],(Ratrs,[(*num*)position [LexPhrase([pos,lex],restr)]]))]]))) | |
108 | + | LexPhrase([NUM(c,g),nlex;pos,lex],restr) -> | |
84 | 109 | let genders,lexs = split_elexeme lex in |
85 | 110 | Xlist.map genders (fun gender -> |
86 | - LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[(*num*)position [Phrase Null(*Pro*)]]))) @ | |
111 | + LexPhrase([NUM(c,gender),nlex],(Ratrs,[(*num*)position [Phrase Null(*Pro*)]]))) @ | |
87 | 112 | Xlist.map lexs (fun lex -> |
88 | - LexPhrase([NUM(c,g,a),nlex],(Ratrs,[(*num*)position [LexPhrase([pos,lex],restr)]]))) | |
113 | + LexPhrase([NUM(c,g),nlex],(Ratrs,[(*num*)position [LexPhrase([pos,lex],restr)]]))) | |
114 | + | LexPhrase([INF(a,n),lex;QUB,Lexeme "się"],restr) -> [LexPhrase([INF(a,n),lex],add_refl_restr restr)] | |
115 | + | LexPhrase([COMP ctype,clex;pos,lex;QUB,Lexeme "się"],restr) -> | |
116 | + if Xlist.size (check_lexemes_schema (get_lexemes clex) (snd restr)) = 0 then | |
117 | + [LexPhrase([pos,lex],add_refl_restr restr)] | |
118 | + else [LexPhrase([COMP ctype,clex],(Ratrs,[(*std*)position (*Forward*) [LexPhrase([pos,lex],add_refl_restr restr)]]))] | |
89 | 119 | | LexPhrase([COMP ctype,clex;pos,lex],restr) -> |
90 | - [LexPhrase([COMP ctype,clex],(Ratrs,[(*std*)position (*Forward*) [LexPhrase([pos,lex],restr)]]))] | |
91 | - | LexPhrase([SUBST(n,c),slex;COMP ctype,clex;pos,lex],restr) -> | |
92 | - [LexPhrase([SUBST(n,c),slex],(Ratrs,[(*std*)position (*Forward*) [LexPhrase([COMP ctype,clex],(Ratrs,[(*std*)position (*Forward*) [LexPhrase([pos,lex],restr)]]))]]))] (* FIXME: poprawić po zrobieniu NCP *) | |
120 | + if Xlist.size (check_lexemes_schema (get_lexemes clex) (snd restr)) = 0 then | |
121 | + [LexPhrase([pos,lex],restr)] | |
122 | + else [LexPhrase([COMP ctype,clex],(Ratrs,[(*std*)position (*Forward*) [LexPhrase([pos,lex],restr)]]))] | |
93 | 123 | | LexPhrase(_::_::_,_) -> failwith ("expand_lexicalizations_morfs: " ^ ENIAMwalStringOf.morf morf) |
94 | - (* | LexPhrase([PREP pcase,plex;SUBST(n,c),slex],(Atr1,[gf,cr,ce,[LexPhrase([QUB,lex],arestr)]])) -> | |
95 | - (* print_endline (ENIAMwalStringOf.morf morf); *) | |
96 | - [LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))]])); | |
97 | - LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))];gf,cr,ce,[LexPhrase([QUB,lex],arestr)]]))] | |
98 | - | LexPhrase([PREP(pcase),plex;SUBST(n,c),slex],(Atr1,[gf,cr,ce,[LexPhrase([ADV gr,lex],arestr)]])) -> | |
99 | - (* print_endline (ENIAMwalStringOf.morf morf); *) | |
100 | - [LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))]])); | |
101 | - LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))];gf,cr,ce,[LexPhrase([ADV gr,lex],arestr)]]))] | |
102 | - | LexPhrase([PREP pcase,plex;SUBST(n,c),slex],(Ratr1,[gf,cr,ce,[LexPhrase([ADV gr,lex],arestr)]])) -> | |
103 | - (* print_endline (ENIAMwalStringOf.morf morf); *) | |
104 | - [LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))];gf,cr,ce,[LexPhrase([ADV gr,lex],arestr)]]))] | |
105 | - | LexPhrase([PREP pcase,plex;pos,lex],restr) -> | |
106 | - [LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))] | |
107 | - | LexPhrase([PREP pcase,plex;NUM(c,g,a),nlex;pos,lex],restr) -> | |
108 | - let genders,lexs = split_elexeme lex in | |
109 | - Xlist.map genders (fun gender -> | |
110 | - LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[("OBJ","QUANT-ARG",["T"]),[],[],[Phrase Pro]]))]]))) @ | |
111 | - Xlist.map lexs (fun lex -> | |
112 | - LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([NUM(c,g,a),nlex],(Ratrs,[("OBJ","QUANT-ARG",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))]]))) | |
113 | - | LexPhrase([NUM(c,g,a),nlex;pos,lex],restr) -> | |
114 | - let genders,lexs = split_elexeme lex in | |
115 | - Xlist.map genders (fun gender -> | |
116 | - LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[("OBJ","QUANT-ARG",["T"]),[],[],[Phrase Pro]]))) @ | |
117 | - Xlist.map lexs (fun lex -> | |
118 | - LexPhrase([NUM(c,g,a),nlex],(Ratrs,[("OBJ","QUANT-ARG",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))) | |
119 | - | LexPhrase([COMP ctype,clex;pos,lex],restr) -> | |
120 | - [LexPhrase([COMP ctype,clex],(Ratrs,[("C","",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))] | |
121 | - | LexPhrase([SUBST(n,c),slex;COMP ctype,clex;pos,lex],restr) -> | |
122 | - [LexPhrase([SUBST(n,c),slex],(Ratrs,[("OBJ","",["T"]),[],[],[LexPhrase([COMP ctype,clex],(Ratrs,[("C","",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))]]))] | |
123 | - | LexPhrase(_::_::_,_) -> failwith ("expand_lexicalizations_morfs: " ^ ENIAMwalStringOf.morf morf)*) | |
124 | 124 | | morf -> [morf])) |
125 | 125 | |
126 | -let rec get_lexemes = function | |
127 | - Lexeme s -> [s] | |
128 | - | ORconcat l -> List.flatten (Xlist.map l get_lexemes) | |
129 | - | ORcoord l -> List.flatten (Xlist.map l get_lexemes) | |
130 | - | XOR l -> List.flatten (Xlist.map l get_lexemes) | |
131 | - | Elexeme gender -> failwith "get_lexemes" | |
132 | - | |
133 | 126 | let winien = StringSet.of_list ["winien"; "rad"; "powinien"; "nierad"; "niekontent"; "kontent"; "gotów"] |
134 | 127 | let pred = StringSet.of_list ["żal"; "śmiech"; "znać"; "wstyd"; "wolno"; "widać"; "wiadomo"; |
135 | 128 | "warto"; "trzeba"; "trza"; "słychać"; "szkoda"; "strach"; "stać"; "sposób"; "potrzeba"; "pora"; |
... | ... | @@ -148,7 +141,11 @@ let get_pos lex = function |
148 | 141 | | "się" -> ["qub"] |
149 | 142 | | _ -> ["subst"]) |
150 | 143 | | PREP _ -> ["prep"] |
151 | - | NUM _ -> ["num"] | |
144 | + | NUM _ -> | |
145 | + (try | |
146 | + let _ = int_of_string lex in | |
147 | + ["intnum"] | |
148 | + with _ -> ["num"]) | |
152 | 149 | | ADV _ -> ["adv"] |
153 | 150 | | ADJ _ -> ["adj"] |
154 | 151 | | GER _ -> ["ger"] |
... | ... |
walenty/ENIAMwalStringOf.ml
... | ... | @@ -93,9 +93,9 @@ let refl = function |
93 | 93 | | ReflFalse -> "nosię" |
94 | 94 | | ReflUndef -> "_" |
95 | 95 | |
96 | -let acm = function | |
96 | +(* let acm = function | |
97 | 97 | Acm s -> s |
98 | - | AcmUndef -> "_" | |
98 | + | AcmUndef -> "_" *) | |
99 | 99 | |
100 | 100 | let gf = function |
101 | 101 | SUBJ -> "subj" |
... | ... | @@ -108,17 +108,17 @@ let pos = function |
108 | 108 | | PPRON3(n,c) -> "PPRON3(" ^ number n ^ "," ^ case c ^ ")" |
109 | 109 | | SIEBIE(c) -> "SIEBIE(" ^ case c ^ ")" |
110 | 110 | | PREP(c) -> "PREP(" ^ case c ^ ")" |
111 | - | NUM(c,g,a) -> "NUM(" ^ case c ^ "," ^ gender g ^ "," ^ acm a ^ ")" | |
111 | + | NUM(c,g) -> "NUM(" ^ case c ^ "," ^ gender g ^ (*"," ^ acm a ^*) ")" | |
112 | 112 | | ADJ(n,c,g,gr) -> "ADJ(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ grad gr ^ ")" |
113 | 113 | | ADV(gr) -> "ADV(" ^ grad gr ^ ")" |
114 | - | GER(n,c,g,a,neg,r) -> "GER(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ "," ^ refl r ^ ")" | |
115 | - | PACT(n,c,g,a,neg,r) -> "PACT(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ "," ^ refl r ^ ")" | |
114 | + | GER(n,c,g,a,neg) -> "GER(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg (*^ "," ^ refl r*) ^ ")" | |
115 | + | PACT(n,c,g,a,neg) -> "PACT(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg (*^ "," ^ refl r*) ^ ")" | |
116 | 116 | | PPAS(n,c,g,a,neg) -> "PPAS(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ ")" |
117 | - | INF(a,n,r) -> "INF(" ^ aspect a ^ "," ^ negation n ^ "," ^ refl r ^ ")" | |
117 | + | INF(a,n) -> "INF(" ^ aspect a ^ "," ^ negation n (*^ "," ^ refl r*) ^ ")" | |
118 | 118 | | QUB -> "QUB" |
119 | 119 | | COMPAR -> "COMPAR" |
120 | 120 | | COMP(c) -> "COMP(" ^ comp_type c ^ ")" |
121 | - | PERS(n,r) -> "PERS(" ^ negation n ^ "," ^ refl r ^ ")" | |
121 | + | PERS(n) -> "PERS(" ^ negation n ^ (*"," ^ refl r ^*) ")" | |
122 | 122 | | FIXED -> "FIXED" |
123 | 123 | |
124 | 124 | let phrase = function |
... | ... | @@ -136,7 +136,7 @@ let phrase = function |
136 | 136 | | InfP(a) -> "infp(" ^ aspect a (*^ req r*) ^ ")" |
137 | 137 | | AdvP -> "advp" |
138 | 138 | | FixedP s -> "fixed(" ^ s ^ ")" |
139 | - | Num(c,a) -> "num(" ^ case c ^ "," ^ acm a ^ ")" | |
139 | + (* | Num(c,a) -> "num(" ^ case c ^ "," ^ acm a ^ ")" *) | |
140 | 140 | | Or -> "or" |
141 | 141 | | Qub -> "qub" |
142 | 142 | (* | Pro -> "pro" |
... | ... |
walenty/ENIAMwalTEI.ml
... | ... | @@ -225,11 +225,14 @@ let process_lex_phrase lemma = function |
225 | 225 | | PrepNP(prep,case),number,GenderUndef,GradUndef,NegationUndef,ReflUndef -> [PREP case,Lexeme prep;SUBST(number,case),lemma] |
226 | 226 | | AdjP(case),number,gender,grad,NegationUndef,ReflUndef -> [ADJ(number,case,gender,grad),lemma] |
227 | 227 | | PrepAdjP(prep,case),number,gender,grad,NegationUndef,ReflUndef -> [PREP case,Lexeme prep;ADJ(number,case,gender,grad),lemma] |
228 | - | InfP(aspect),NumberUndef,GenderUndef,GradUndef,negation,refl -> [INF(aspect,negation,refl),lemma] | |
228 | + | InfP(aspect),NumberUndef,GenderUndef,GradUndef,negation,ReflTrue -> [INF(aspect,negation),lemma;QUB,Lexeme "się"] | |
229 | + | InfP(aspect),NumberUndef,GenderUndef,GradUndef,negation,refl -> [INF(aspect,negation),lemma] | |
229 | 230 | | PpasP(case),number,gender,GradUndef,negation,ReflUndef -> [PPAS(number,case,gender,AspectUndef,negation),lemma] |
230 | 231 | | PrepPpasP(prep,case),number,gender,GradUndef,negation,ReflUndef -> [PREP case,Lexeme prep;PPAS(number,case,gender,AspectUndef,negation),lemma] |
231 | - | PactP(case),number,gender,GradUndef,negation,refl -> [PACT(number,case,gender,AspectUndef,negation,refl),lemma] | |
232 | - | PrepGerP(prep,case),number,GenderUndef,GradUndef,negation,refl -> [PREP case,Lexeme prep;GER(number,case,GenderUndef,AspectUndef,negation,refl),lemma] | |
232 | + | PactP(case),number,gender,GradUndef,negation,ReflTrue -> [PACT(number,case,gender,AspectUndef,negation),lemma;QUB,Lexeme "się"] | |
233 | + | PactP(case),number,gender,GradUndef,negation,refl -> [PACT(number,case,gender,AspectUndef,negation),lemma] | |
234 | + | PrepGerP(prep,case),number,GenderUndef,GradUndef,negation,ReflTrue -> [PREP case,Lexeme prep;GER(number,case,GenderUndef,AspectUndef,negation),lemma;QUB,Lexeme "się"] | |
235 | + | PrepGerP(prep,case),number,GenderUndef,GradUndef,negation,refl -> [PREP case,Lexeme prep;GER(number,case,GenderUndef,AspectUndef,negation),lemma] | |
233 | 236 | | Qub,NumberUndef,GenderUndef,GradUndef,NegationUndef,ReflUndef -> [QUB,lemma] |
234 | 237 | | AdvP,NumberUndef,GenderUndef,grad,NegationUndef,ReflUndef -> [ADV grad,lemma] |
235 | 238 | | phrase,number,gender,grad,negation,reflex -> |
... | ... | @@ -248,10 +251,14 @@ let rec process_lex lex = function |
248 | 251 | | PhraseAbbr(Advp mode,[]),[],lemma,Lexeme "" -> |
249 | 252 | let poss = process_lex_phrase lemma (AdvP,lex.lex_number,lex.lex_gender,lex.lex_degree,lex.lex_negation,lex.lex_reflex) in |
250 | 253 | LexPhrase(poss,lex.lex_modification) |
251 | - | Phrase (NumP(case)),[],lemma,num_lemma -> LexPhrase([NUM(case,GenderUndef,AcmUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification) | |
252 | - | Phrase (PrepNumP(prep,case)),[],lemma,num_lemma -> LexPhrase([PREP case,Lexeme prep;NUM(case,GenderUndef,AcmUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification) | |
253 | - | PhraseComp(Cp,(ctype,[Comp comp])),[],lemma,Lexeme "" -> LexPhrase([COMP ctype,Lexeme comp;PERS(lex.lex_negation,lex.lex_reflex),lemma],lex.lex_modification) | |
254 | - | PhraseComp(Cp,(ctype,[Comp comp1;Comp comp2])),[],lemma,Lexeme "" -> LexPhrase([COMP ctype,XOR[Lexeme comp1;Lexeme comp2];PERS(lex.lex_negation,lex.lex_reflex),lemma],lex.lex_modification) | |
254 | + | Phrase (NumP(case)),[],lemma,num_lemma -> LexPhrase([NUM(case,GenderUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification) | |
255 | + | Phrase (PrepNumP(prep,case)),[],lemma,num_lemma -> LexPhrase([PREP case,Lexeme prep;NUM(case,GenderUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification) | |
256 | + | PhraseComp(Cp,(ctype,[Comp comp])),[],lemma,Lexeme "" -> | |
257 | + if lex.lex_reflex = ReflTrue then LexPhrase([COMP ctype,Lexeme comp;PERS(lex.lex_negation),lemma;QUB,Lexeme "się"],lex.lex_modification) | |
258 | + else LexPhrase([COMP ctype,Lexeme comp;PERS(lex.lex_negation),lemma],lex.lex_modification) | |
259 | + | PhraseComp(Cp,(ctype,[Comp comp1;Comp comp2])),[],lemma,Lexeme "" -> | |
260 | + if lex.lex_reflex = ReflTrue then LexPhrase([COMP ctype,XOR[Lexeme comp1;Lexeme comp2];PERS(lex.lex_negation),lemma;QUB,Lexeme "się"],lex.lex_modification) | |
261 | + else LexPhrase([COMP ctype,XOR[Lexeme comp1;Lexeme comp2];PERS(lex.lex_negation),lemma],lex.lex_modification) | |
255 | 262 | | Phrase phrase,[],lemma,Lexeme "" -> |
256 | 263 | let poss = process_lex_phrase lemma (phrase,lex.lex_number,lex.lex_gender,lex.lex_degree,lex.lex_negation,lex.lex_reflex) in |
257 | 264 | LexPhrase(poss,lex.lex_modification) |
... | ... |
walenty/ENIAMwalTypes.ml
... | ... | @@ -31,7 +31,7 @@ type number = Number of string | NumberUndef | NumberAgr |
31 | 31 | type gender = Gender of string | GenderUndef | GenderAgr | Genders of string list |
32 | 32 | type grad = Grad of string | GradUndef |
33 | 33 | type refl = ReflEmpty | ReflTrue | ReflFalse | ReflUndef |
34 | -type acm = Acm of string | AcmUndef | |
34 | +(* type acm = Acm of string | AcmUndef *) | |
35 | 35 | |
36 | 36 | type gf = SUBJ | OBJ | ARG |
37 | 37 | |
... | ... | @@ -41,17 +41,17 @@ type pos = |
41 | 41 | | PPRON3 of number * case |
42 | 42 | | SIEBIE of case |
43 | 43 | | PREP of case |
44 | - | NUM of case * gender * acm | |
44 | + | NUM of case * gender (* acm*) | |
45 | 45 | | ADJ of number * case * gender * grad |
46 | 46 | | ADV of grad |
47 | - | GER of number * case * gender * aspect * negation * refl | |
48 | - | PACT of number * case * gender * aspect * negation * refl | |
47 | + | GER of number * case * gender * aspect * negation (** refl*) | |
48 | + | PACT of number * case * gender * aspect * negation (* refl*) | |
49 | 49 | | PPAS of number * case * gender * aspect * negation |
50 | - | INF of aspect * negation * refl | |
50 | + | INF of aspect * negation (* refl*) | |
51 | 51 | | QUB |
52 | 52 | | COMPAR |
53 | 53 | | COMP of comp_type |
54 | - | PERS of (*number * gender * aspect * person * *)negation * refl | |
54 | + | PERS of (*number * gender * aspect * person * *)negation (* refl*) | |
55 | 55 | | FIXED |
56 | 56 | |
57 | 57 | type phrase = |
... | ... | @@ -69,7 +69,7 @@ type phrase = |
69 | 69 | | InfP of aspect |
70 | 70 | | AdvP |
71 | 71 | | FixedP of string |
72 | - | Num of case * acm | |
72 | + (* | Num of case (* acm*) *) | |
73 | 73 | | Or |
74 | 74 | | Qub |
75 | 75 | (* | Pro |
... | ... |