Commit f8a8211753f1329e2dbea69321e6c10e3a06243d
1 parent
ccecbc75
gramatyka
Showing
2 changed files
with
277 additions
and
456 deletions
parser/LCGlexicon2.ml
1 | 1 | |
2 | +type cats = | |
3 | + Inumber | Igender | Iperson | Nperson | Plemma | Ctype | Number | Case | Gender | Person | | |
4 | + Nsyn | Nsem | Unumber | Ucase | Ugender | Uperson | Rec | Congr | Postp | Adjsyn | Grad | | |
5 | + Pos | Aspect | Negation | Mood | Tense | Sub | Coord | Lemma | Acm | Int | Rel | |
6 | + | |
2 | 7 | (* FIXME: "Można było" - brakuje uzgodnienia rodzaju przymiotnika w przypadku predykatywnym, i ogólnie kontroli składniowej *) |
3 | 8 | |
4 | 9 | (* x="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jedną z wartości atrybutu x, reguła zostanie wykonana dla x z usuniętymi pozostałymi wartościami *) |
5 | 10 | (* x!="s" oznacza, że żeby reguła została użyta token musi mieć jako jedną z wartości atrybutu x symbol inny od "s", reguła zostanie wykonana dla x z usuniętą wartością "s" *) |
6 | 11 | (* x=="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jednyną z wartość atrybutu x *) |
7 | 12 | |
13 | +let symbol_weight = 1. | |
14 | +let measure_weight = 0.5 | |
15 | + | |
8 | 16 | let grammar = [ |
9 | 17 | |
10 | 18 | (* symbole występujące w tekście - daty itp. i słowa określające ich typy *) |
11 | 19 | "lemma=dzień,pos=subst,number=sg,case=gen", |
12 | - [Number,"sg";Case,["gen"];Gender,"genders";Person,"ter"], | |
13 | - ["day-lex/(date+day+day-month)", | |
20 | + [Number,"sg";Case,"gen";Gender,"genders";Person,"ter"], | |
21 | + "day-lex/(date+day+day-month)", | |
14 | 22 | symbol_weight, [Number; Case; Gender; Person]; |
15 | 23 | "lemma=dzień,pos=subst,number=sg", |
16 | 24 | [Number,"sg";Case,"cases";Gender,"genders";Person,"ter"], |
... | ... | @@ -40,7 +48,7 @@ let grammar = [ |
40 | 48 | symbol_weight, [Nsyn; Nsem]; |
41 | 49 | |
42 | 50 | "lemma=styczeń|luty|marzec|kwiecień|maj|czerwiec|lipiec|sierpień|wrzesień|październik|litopad|grudzień,pos=subst,number=sg,case=gen", |
43 | - [Number,"sg";Case,["gen"];Gender,"genders";Person,"ter"], | |
51 | + [Number,"sg";Case,"gen";Gender,"genders";Person,"ter"], | |
44 | 52 | "month-lex/(Null+year+NP(gen))", |
45 | 53 | symbol_weight, [Number; Case; Gender; Person]; |
46 | 54 | "lemma=styczeń|luty|marzec|kwiecień|maj|czerwiec|lipiec|sierpień|wrzesień|październik|litopad|grudzień,pos=subst,number=sg", |
... | ... | @@ -135,8 +143,8 @@ let grammar = [ |
135 | 143 | "np*number*case*gender*person{num_congr}{schema}{qub_inclusion}", |
136 | 144 | 0., [Nsyn; Nsem; Number; Case; Gender; Person]; |
137 | 145 | "pos=subst,check_frame_number2,case=gen,nsem!=measure", |
138 | - [Number,"numbers";Case,"all_cases";Gender,"genders";Person,"ter", | |
139 | - "np*sg*case*n2*person{num_rec}{schema}{qub_inclusion}"], (* UWAGA: number "sg" i gender "n2", żeby uzgadniać z podmiotem czasownika *) | |
146 | + [Number,"numbers";Case,"all_cases";Gender,"genders";Person,"ter"], | |
147 | + "np*sg*case*n2*person{num_rec}{schema}{qub_inclusion}", (* UWAGA: number "sg" i gender "n2", żeby uzgadniać z podmiotem czasownika *) | |
140 | 148 | 0., [Nsyn; Nsem; Number; Case; Gender; Person]; |
141 | 149 | "pos=subst,check_frame_number2,case=gen,nsem!=measure", |
142 | 150 | [Unumber,"all_numbers";Ucase,"all_cases";Ugender,"all_genders"; Uperson,"all_persons";Number,"numbers";Case,"all_cases";Gender,"genders";Person,"ter"], |
... | ... | @@ -164,39 +172,39 @@ let grammar = [ |
164 | 172 | "pos=num,acm=rec", |
165 | 173 | [Number,"numbers";Case,"cases";Gender,"genders";Person,"persons"], |
166 | 174 | "num*number*case*gender*person*rec{qub_inclusion}", (* FIXME: jak usunięcie Phrase ProNG wpływa na pokrycie? *) |
167 | - "c.weight 0 [rec; number; case; gender; person"; | |
175 | + 0., [Rec; Number; Case; Gender; Person]; | |
168 | 176 | "pos=num,acm=congr", |
169 | - [Number,"numbers";Case,"cases";Gender,"genders";Person,"persons"] | |
177 | + [Number,"numbers";Case,"cases";Gender,"genders";Person,"persons"], | |
170 | 178 | "num*number*case*gender*person*congr{qub_inclusion}", (* FIXME: jak usunięcie Phrase ProNG wpływa na pokrycie? *) |
171 | - "c.weight 0 [congr; number; case; gender; person"; | |
179 | + 0., [Congr; Number; Case; Gender; Person]; | |
172 | 180 | "lemma=1|-1,pos=intnum", (* FIXME: ustawić atrybut acm dla intnum *) |
173 | 181 | [Number,"sg";Case,"all_cases";Gender,"all_genders";Person,"ter"], |
174 | 182 | "num*number*case*gender*person*congr{qub_inclusion}", (* FIXME: jak usunięcie Phrase ProNG wpływa na pokrycie? *) |
175 | - "c.weight 0 []"; | |
183 | + 0., []; | |
176 | 184 | "lemma!=1,lemma!=-1,pos=intnum", |
177 | 185 | [Number,"pl";Case,"all_cases";Gender,"all_genders";Person,"ter"], |
178 | 186 | "num*number*case*gender*person*rec{qub_inclusion}", (* FIXME: jak usunięcie Phrase ProNG wpływa na pokrycie? *) |
179 | - "c.weight 0 []"; | |
187 | + 0., []; | |
180 | 188 | "lemma!=1,lemma!=-1,lemma=*2|*3|*4,pos=intnum", |
181 | - [Number,"pl";Case,"all_cases";Gender,"all_genders";Person,"ter"] | |
189 | + [Number,"pl";Case,"all_cases";Gender,"all_genders";Person,"ter"], | |
182 | 190 | "num*number*case*gender*person*congr{qub_inclusion}", (* FIXME: jak usunięcie Phrase ProNG wpływa na pokrycie? *) |
183 | - "c.weight 0 []"; | |
191 | + 0., []; | |
184 | 192 | "pos=realnum", |
185 | 193 | [Number,"sg";Case,"all_cases";Gender,"all_genders";Person,"ter"], |
186 | 194 | "num*number*case*gender*person*rec{qub_inclusion}", (* FIXME: jak usunięcie Phrase ProNG wpływa na pokrycie? *) |
187 | - "c.weight 0 []"; | |
195 | + 0., []; | |
188 | 196 | "pos=intnum-interval", |
189 | 197 | [Number,"pl";Case,"all_cases";Gender,"all_genders";Person,"ter"], |
190 | 198 | "num*number*case*gender*person*congr{qub_inclusion}", (* FIXME: jak usunięcie Phrase ProNG wpływa na pokrycie? *) |
191 | - "c.weight 0 []"; | |
199 | + 0., []; | |
192 | 200 | "pos=intnum-interval", |
193 | 201 | [Number,"pl";Case,"all_cases";Gender,"all_genders";Person,"ter"], |
194 | 202 | "num*number*case*gender*person*rec{qub_inclusion}", (* FIXME: jak usunięcie Phrase ProNG wpływa na pokrycie? *) |
195 | - "c.weight 0 []"; | |
203 | + 0., []; | |
196 | 204 | "pos=realnum-interval", |
197 | 205 | [Number,"sg";Case,"all_cases";Gender,"all_genders";Person,"ter"], |
198 | 206 | "num*number*case*gender*person*rec{qub_inclusion}", (* FIXME: jak usunięcie Phrase ProNG wpływa na pokrycie? *) |
199 | - "c.weight 0 []"; | |
207 | + 0., []; | |
200 | 208 | |
201 | 209 | (* pojemniki *) |
202 | 210 | "pos=subst,check_frame_number2,case!=voc,nsem=measure", |
... | ... | @@ -209,7 +217,7 @@ let grammar = [ |
209 | 217 | measure_weight, [Nsyn; Nsem; Number; Case; Gender; Person]; |
210 | 218 | "pos=subst,check_frame_number2,case=gen,nsem=measure", |
211 | 219 | [Number,"numbers";Case,"all_cases";Gender,"genders";Person,"ter"], |
212 | - "measure*sg*case*n2*person{num_rec}{schema}{qub_inclusion}"], (* UWAGA: number "sg" i gender "n2", żeby uzgadniać z podmiotem czasownika *) | |
220 | + "measure*sg*case*n2*person{num_rec}{schema}{qub_inclusion}", (* UWAGA: number "sg" i gender "n2", żeby uzgadniać z podmiotem czasownika *) | |
213 | 221 | measure_weight, [Nsyn; Nsem; Number; Case; Gender; Person]; |
214 | 222 | |
215 | 223 | (* frazy przyimkowe *) |
... | ... | @@ -222,13 +230,13 @@ let grammar = [ |
222 | 230 | [Case,"cases"], |
223 | 231 | "prepadjp*lemma*case{\\(1+advp),/adjp*T*case*T}{qub_inclusion}", |
224 | 232 | 0., [Case]; |
225 | -"lemma=po,pos=prep",[] (* po polsku, po kreciemu *) | |
233 | +"lemma=po,pos=prep",[], (* po polsku, po kreciemu *) | |
226 | 234 | "prepadjp*lemma*postp{\\(1+advp),/(adjp*sg*dat*m1+adjp*T*postp*T)}{qub_inclusion}", |
227 | 235 | 0., [Postp]; |
228 | -"lemma=z,pos=prep",[] (* z bliska *) | |
236 | +"lemma=z,pos=prep",[], (* z bliska *) | |
229 | 237 | "prepadjp*lemma*postp{\\(1+advp),/adjp*sg*nom*f}{qub_inclusion}", |
230 | 238 | 0., [Postp]; |
231 | -"lemma=na,pos=prep",[] (* na lewo *) | |
239 | +"lemma=na,pos=prep",[], (* na lewo *) | |
232 | 240 | "prepadjp*lemma*postp{\\(1+advp),/advp}{qub_inclusion}", |
233 | 241 | 0., [Postp]; |
234 | 242 | |
... | ... | @@ -293,30 +301,30 @@ let grammar = [ |
293 | 301 | [Number,"numbers";Case,"cases";Gender,"genders"], |
294 | 302 | "adjp*number*case*gender{schema}{qub_inclusion}", |
295 | 303 | 0., [Adjsyn; Grad; Number; Case; Gender]; |
296 | -"pos=adjc" | |
304 | +"pos=adjc", | |
297 | 305 | [Number,"sg";Case,"pred";Gender,"m1&m2&m3"], |
298 | 306 | "adjp*number*case*gender{schema}{qub_inclusion}{\\(1+adja)}", |
299 | 307 | 0., [Adjsyn; Pos; Number; Case; Gender]; |
300 | -"pos=adjp" | |
308 | +"pos=adjp", | |
301 | 309 | [Number,"all_numbers";Case,"postp";Gender,"all_genders"], |
302 | 310 | "adjp*number*case*gender{schema}{qub_inclusion}{\\(1+adja)}", |
303 | 311 | 0., [Adjsyn; Pos; Number; Case; Gender]; |
304 | -"pos=ordnum" | |
312 | +"pos=ordnum", | |
305 | 313 | [Number,"all_numbers";Case,"all_cases";Gender,"all_genders"], |
306 | 314 | "adjp*number*case*gender{schema}{qub_inclusion}{\\(1+adja)}", |
307 | 315 | 0., [Adjsyn; Pos; Number; Case; Gender]; |
308 | -"pos=roman" | |
316 | +"pos=roman", | |
309 | 317 | [Number,"all_numbers";Case,"all_cases";Gender,"all_genders"], |
310 | 318 | "adjp*number*case*gender{schema}{qub_inclusion}{\\(1+adja)}", |
311 | 319 | 0., [Adjsyn; Pos; Number; Case; Gender]; |
312 | 320 | |
313 | -"pos=adja|intnum|realnum|intnum-interval|realnum-interval|roman|roman-interval",[] | |
321 | +"pos=adja|intnum|realnum|intnum-interval|realnum-interval|roman|roman-interval",[], | |
314 | 322 | "adja/hyphen", |
315 | - 0., [] | |
323 | + 0., []; | |
316 | 324 | |
317 | 325 | (* przysłówki *) |
318 | 326 | (* FIXME let grad = match grads with [grad] -> grad | _ -> failwith "make_advp: grad" in*) |
319 | -"pos=adv",[] | |
327 | +"pos=adv",[], | |
320 | 328 | "advp{schema}{qub_inclusion}{\\(1+adja)}", |
321 | 329 | 0., [Grad]; |
322 | 330 | |
... | ... | @@ -326,11 +334,11 @@ let grammar = [ |
326 | 334 | [Inumber,"";Igender,"";Iperson,"";Ctype,"int&rel"], |
327 | 335 | "cp*ctype*lemma{\\(1+advp),/(ip*inumber*igender*iperson/advp)}", |
328 | 336 | 0., [Ctype]; (*["CTYPE",SubstVar "ctype"]*) |
329 | -"lemma=odkąd|dlaczego|czemu,pos=adv" | |
337 | +"lemma=odkąd|dlaczego|czemu,pos=adv", | |
330 | 338 | [Inumber,"";Igender,"";Iperson,"";Ctype,"int"], |
331 | 339 | "cp*ctype*lemma{\\(1+advp),/(ip*inumber*igender*iperson/advp)}", |
332 | 340 | 0., [Ctype]; (*["CTYPE",SubstVar "ctype"]*) |
333 | -"lemma=gdy,pos=adv" | |
341 | +"lemma=gdy,pos=adv", | |
334 | 342 | [Inumber,"";Igender,"";Iperson,"";Ctype,"sub"], |
335 | 343 | "cp*ctype*lemma{\\(1+advp),/(ip*inumber*igender*iperson/advp)}", |
336 | 344 | 0., [Ctype]; (*["CTYPE",SubstVar "ctype"]*) |
... | ... | @@ -352,390 +360,281 @@ let grammar = [ |
352 | 360 | "adjp*number*case*gender{schema}{qub_inclusion}", |
353 | 361 | 0., [Negation; Aspect; Number; Case; Gender];(* FIXME: new_lemma *) |
354 | 362 | |
355 | -"pos=fin|bedzie|impt,negation=aff" | |
363 | +(* FIXME: ustalić czy negation, mood i tense są określane tu, czy w walencji *) | |
364 | +"pos=fin|bedzie,negation=aff,mood!=imperative", | |
365 | + [Number,"numbers";Gender,"all_genders";Person,"persons";Aspect,"aspects"], | |
366 | + "ip*number*gender*person{/(1+int)}{schema}{qub_inclusion}", | |
367 | + 0., [Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) | |
368 | +"pos=fin|bedzie,negation=neg,mood!=imperative", | |
369 | + [Number,"numbers";Gender,"all_genders";Person,"persons";Aspect,"aspects"], | |
370 | + "ip*number*gender*person{/(1+int)}{schema}{qub_inclusion}{\\nie}", | |
371 | + 0., [Negation; Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) | |
372 | +"pos=impt,negation=aff", | |
356 | 373 | [Number,"numbers";Gender,"all_genders";Person,"persons";Aspect,"aspects"], |
357 | 374 | "ip*number*gender*person{/(1+int)}{schema}{qub_inclusion}", |
358 | 375 | 0., [Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) |
359 | -"pos=fin|bedzie|impt,negation=neg" | |
376 | +"pos=impt,negation=neg", | |
360 | 377 | [Number,"numbers";Gender,"all_genders";Person,"persons";Aspect,"aspects"], |
361 | 378 | "ip*number*gender*person{/(1+int)}{schema}{qub_inclusion}{\\nie}", |
362 | 379 | 0., [Negation; Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) |
363 | -"pos=fin,person!=sec,negation=aff" | |
380 | +"pos=fin,person!=sec,negation=aff,mood=imperative", | |
364 | 381 | [Number,"numbers";Gender,"all_genders";Person,"persons";Aspect,"aspects"], |
365 | 382 | "ip*number*gender*person{/(1+int)}{schema,|aux-imp}{qub_inclusion}", |
366 | 383 | 0., [Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) |
367 | -"pos=fin,person!=sec,negation=neg" | |
384 | +"pos=fin,person!=sec,negation=neg,mood=imperative", | |
368 | 385 | [Number,"numbers";Gender,"all_genders";Person,"persons";Aspect,"aspects"], |
369 | 386 | "ip*number*gender*person{/(1+int)}{schema,|aux-imp}{qub_inclusion}{\\nie}", |
370 | 387 | 0., [Negation; Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) |
371 | 388 | |
372 | -"pos=imps,negation=aff" | |
373 | - [Number,"numbers";Gender,"all_genders";Person,"all_persons";Aspect,"aspects"], | |
389 | +"pos=imps,negation=aff", | |
390 | + [Number,"all_numbers";Gender,"all_genders";Person,"all_persons";Aspect,"aspects"], | |
374 | 391 | "ip*number*gender*person{/(1+int)}{schema}{qub_inclusion}", |
375 | 392 | 0., [Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) |
376 | -"pos=imps,negation=neg" | |
377 | - [Number,"numbers";Gender,"all_genders";Person,"all_persons";Aspect,"aspects"], | |
393 | +"pos=imps,negation=neg", | |
394 | + [Number,"all_numbers";Gender,"all_genders";Person,"all_persons";Aspect,"aspects"], | |
378 | 395 | "ip*number*gender*person{/(1+int)}{schema}{qub_inclusion}{\\nie}", |
379 | 396 | 0., [Negation; Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) |
380 | - | |
381 | -"pos=praet|winien,person=ter,negation=aff,mood!=conditional" | |
382 | - [Number,"numbers";Gender,"all_genders";Person,"persons";Aspect,"aspects"], | |
397 | +"pos=pred,negation=aff", | |
398 | + [Number,"sg";Gender,"n2";Person,"ter";Aspect,"imperf"], | |
383 | 399 | "ip*number*gender*person{/(1+int)}{schema}{qub_inclusion}", |
384 | 400 | 0., [Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) |
385 | -"pos=praet|winien,person=ter,negation=neg,mood!=conditional" | |
386 | - [Number,"numbers";Gender,"all_genders";Person,"persons";Aspect,"aspects"], | |
401 | +"pos=pred,negation=neg", | |
402 | + [Number,"sg";Gender,"n2";Person,"ter";Aspect,"imperf"], | |
387 | 403 | "ip*number*gender*person{/(1+int)}{schema}{qub_inclusion}{\\nie}", |
388 | 404 | 0., [Negation; Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) |
389 | -"pos=pred,negation=aff" | |
405 | +"pos=pred,negation=aff", | |
390 | 406 | [Number,"sg";Gender,"n2";Person,"ter";Aspect,"imperf"], |
391 | - "ip*number*gender*person{/(1+int)}{schema}{qub_inclusion}", | |
407 | + "ip*number*gender*person{/(1+int)}{schema,|aux-fut*number*gender*person}{qub_inclusion}", | |
392 | 408 | 0., [Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) |
393 | -"pos=pred,negation=neg" | |
409 | +"pos=pred,negation=neg", | |
410 | + [Number,"sg";Gender,"n2";Person,"ter";Aspect,"imperf"], | |
411 | + "ip*number*gender*person{/(1+int)}{schema,|aux-fut*number*gender*person}{qub_inclusion}{\\nie}", | |
412 | + 0., [Negation; Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) | |
413 | +"pos=pred,negation=aff", | |
414 | + [Number,"sg";Gender,"n2";Person,"ter";Aspect,"imperf"], | |
415 | + "ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{qub_inclusion}", | |
416 | + 0., [Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) (* FIXME: tense *) | |
417 | +"pos=pred,negation=neg", | |
394 | 418 | [Number,"sg";Gender,"n2";Person,"ter";Aspect,"imperf"], |
419 | + "ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{qub_inclusion}{\\nie}", | |
420 | + 0., [Negation; Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) (* FIXME: tense *) | |
421 | + | |
422 | +"pos=praet|winien,negation=aff,mood!=conditional", | |
423 | + [Number,"numbers";Gender,"genders";Person,"ter";Aspect,"aspects"], | |
424 | + "ip*number*gender*person{/(1+int)}{schema}{qub_inclusion}", | |
425 | + 0., [Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) | |
426 | +"pos=praet|winien,negation=neg,mood!=conditional", | |
427 | + [Number,"numbers";Gender,"genders";Person,"ter";Aspect,"aspects"], | |
395 | 428 | "ip*number*gender*person{/(1+int)}{schema}{qub_inclusion}{\\nie}", |
396 | 429 | 0., [Negation; Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) |
430 | +"pos=praet|winien,negation=aff,mood!=conditional", | |
431 | + [Number,"numbers";Gender,"genders";Person,"pri&sec";Aspect,"aspects"], | |
432 | + "ip*number*gender*person{/(1+int)}{schema,|aglt*number*person}{qub_inclusion}", | |
433 | + 0., [Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) | |
434 | +"pos=praet|winien,person!=ter,negation=neg,mood!=conditional", | |
435 | + [Number,"numbers";Gender,"genders";Person,"pri&sec";Aspect,"aspects"], | |
436 | + "ip*number*gender*person{/(1+int)}{schema,|aglt*number*person}{qub_inclusion}{\\nie}", | |
437 | + 0., [Negation; Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) | |
438 | + | |
439 | +"pos=praet|winien,negation=aff,mood=conditional", | |
440 | + [Number,"numbers";Gender,"genders";Person,"ter";Aspect,"aspects"], | |
441 | + "ip*number*gender*person{/(1+int)}{schema,|by}{qub_inclusion}", | |
442 | + 0., [Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) | |
443 | +"pos=praet|winien,negation=neg,mood=conditional", | |
444 | + [Number,"numbers";Gender,"genders";Person,"ter";Aspect,"aspects"], | |
445 | + "ip*number*gender*person{/(1+int)}{schema,|by}{qub_inclusion}{\\nie}", | |
446 | + 0., [Negation; Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) | |
447 | +"pos=praet|winien,negation=aff,mood=conditional", | |
448 | + [Number,"numbers";Gender,"genders";Person,"pri&sec";Aspect,"aspects"], | |
449 | + "ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|by},|by{qub_inclusion}", | |
450 | + 0., [Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) | |
451 | +"pos=praet|winien,negation=neg,mood=conditional", | |
452 | + [Number,"numbers";Gender,"genders";Person,"pri&sec";Aspect,"aspects"], | |
453 | + "ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|by}{qub_inclusion}{\\nie}", | |
454 | + 0., [Negation; Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) | |
397 | 455 | |
456 | +"pos=praet,negation=aff,mood!=conditional", | |
457 | + [Number,"numbers";Gender,"genders";Person,"all_persons";Aspect,"aspects"], | |
458 | + "ip*number*gender*person{/(1+int)}{schema,|aux-fut*number*gender*person}{qub_inclusion}", | |
459 | + 0., [Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) | |
460 | + | |
461 | +"pos=winien,negation=aff,mood!=conditional", | |
462 | + [Number,"numbers";Gender,"genders";Person,"ter";Aspect,"aspects"], | |
463 | + "ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{qub_inclusion}", | |
464 | + 0., [Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) | |
465 | +"pos=winien,negation=neg,mood!=conditional", | |
466 | + [Number,"numbers";Gender,"genders";Person,"ter";Aspect,"aspects"], | |
467 | + "ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{qub_inclusion}{\\nie}", | |
468 | + 0., [Negation; Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) | |
469 | +"pos=winien,negation=aff,mood!=conditional", | |
470 | + [Number,"numbers";Gender,"genders";Person,"pri&sec";Aspect,"aspects"], | |
471 | + "ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|aux-past*number*gender*person}{qub_inclusion}", | |
472 | + 0., [Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) | |
473 | +"pos=winien,person!=ter,negation=neg,mood!=conditional", | |
474 | + [Number,"numbers";Gender,"genders";Person,"pri&sec";Aspect,"aspects"], | |
475 | + "ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|aux-past*number*gender*person}{qub_inclusion}{\\nie}", | |
476 | + 0., [Negation; Mood; Tense; Aspect; Number; Case; Gender; Person];(* FIXME: new_lemma *) | |
398 | 477 | |
399 | -let make_ip numbers genders persons aspects aglt aux2 (c:ENIAMtokenizerTypes.token_record) d lemma cat = | |
400 | - fnum,Frame(PersAtrs(_,new_lemma,negation,mood,tense,aux,aspect),schema) -> | |
401 | - (try | |
402 | - if aux2 = true && aux = NoAux then raise Not_found else | |
403 | - if aux2 = false && aux <> NoAux then raise Not_found else | |
404 | - let cond_arg = match mood with "conditional" -> [nosem_schema_field Both [Phrase(Lex "by")]] | "" -> failwith "make_ip" | _ -> [] in | |
405 | - let aglt_arg = if aglt then [nosem_schema_field Both [Phrase Aglt]] else [] in | |
406 | - let aux_arg = match aux with | |
407 | - PastAux -> [nosem_schema_field Both [Phrase AuxPast]] | |
408 | - | FutAux -> [nosem_schema_field Both [Phrase AuxFut]] | |
409 | - | ImpAux -> [nosem_schema_field Both [Phrase AuxImp]] | |
410 | - | NoAux -> [] in | |
411 | - let schema_list = [if negation = Aff then [] else [nosem_schema_field Backward [Phrase(Lex "nie")]]; | |
412 | - qub_inclusion; | |
413 | - aglt_arg @ aux_arg @ cond_arg @ schema @ int_arg] in | |
414 | - (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
415 | - with Not_found -> l) | |
416 | - | fnum,frame -> failwith ("make_ip 1: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in | |
417 | - | |
418 | -"pos=bedzie" | |
478 | +"pos=bedzie", | |
419 | 479 | [Number,"numbers";Gender,"all_genders";Person,"persons"], |
420 | 480 | "aux-fut*number*gender*person", |
421 | 481 | 0., []; |
422 | -"lemma=być,pos=praet" | |
423 | - [Number,"numbers";Gender,"all_genders";Person,"persons"], | |
482 | +"lemma=być,pos=praet", | |
483 | + [Number,"numbers";Gender,"genders";Person,"all_persons"], | |
424 | 484 | "aux-past*number*gender*person", |
425 | 485 | 0., []; |
486 | +"pos=aglt", | |
487 | + [Number,"numbers";Person,"persons"], | |
488 | + "aglt*number*person", | |
489 | + 0., []; | |
426 | 490 | |
491 | +"pos=inf,negation=aff", | |
492 | + [Aspect,"aspects"], | |
493 | + "infp{schema}{qub_inclusion}", | |
494 | + 0., [Aspect]; | |
495 | +"pos=inf,negation=neg", | |
496 | + [Aspect,"aspects"], | |
497 | + "infp{schema}{qub_inclusion}{\\nie}", | |
498 | + 0., [Negation; Aspect]; | |
499 | +"pos=pcon,negation=aff", | |
500 | + [Aspect,"aspects"], | |
501 | + "padvp{schema}{qub_inclusion}", | |
502 | + 0., [Aspect]; | |
503 | +"pos=pcon,negation=neg", | |
504 | + [Aspect,"aspects"], | |
505 | + "padvp{schema}{qub_inclusion}{\\nie}", | |
506 | + 0., [Negation; Aspect]; | |
507 | +"pos=pant,negation=aff", | |
508 | + [Aspect,"aspects"], | |
509 | + "padvp{schema}{qub_inclusion}", | |
510 | + 0., [Aspect]; | |
511 | +"pos=pant,negation=neg", | |
512 | + [Aspect,"aspects"], | |
513 | + "padvp{schema}{qub_inclusion}{\\nie}", | |
514 | + 0., [Negation; Aspect]; | |
515 | + | |
516 | +"pos=comp",[], | |
517 | + "cp*sub*lemma/ip*T*T*T", | |
518 | + 0., [Sub]; | |
519 | +"pos=conj",[], | |
520 | + "cp*coord*lemma/ip*T*T*T", | |
521 | + 0., [Coord]; | |
522 | +"lemma=i|lub|czy|bądź", | |
523 | + [Number,"all_numbers";Gender,"all_genders";Person,"all_persons"], | |
524 | + "(ip*number*gender*person/ip*T*T*T)\\ip*T*T*T", | |
525 | + 0., []; (* FIXME: semantyka z make_conj_frame *) | |
526 | +"lemma=,|i|lub|czy|bądź", [], | |
527 | + "(advp/prepnp*T*T)\\prepnp*T*T", | |
528 | + 0., []; (* FIXME: semantyka z make_conj_frame *) | |
529 | +"lemma=,|i|lub|czy|bądź", [], | |
530 | + "(advp/advp)\\prepnp*T*T", | |
531 | + 0., []; (* FIXME: semantyka z make_conj_frame *) | |
532 | +"lemma=,|i|lub|czy|bądź", [], | |
533 | + "(advp/prepnp*T*T)\\advp", | |
534 | + 0., []; (* FIXME: semantyka z make_conj_frame *) | |
535 | +"lemma=,|i|lub|czy|bądź", [], | |
536 | + "(advp/advp)\\advp", | |
537 | + 0., []; (* FIXME: semantyka z make_conj_frame *) | |
538 | +"lemma=,|i|lub|czy|bądź", | |
539 | + [Lemma,"";Case,"all_cases"], | |
540 | + "(prepnp*lemma*case/prepnp*lemma*case)\\prepnp*lemma*case", | |
541 | + 0., []; (* FIXME: semantyka z make_conj_frame *) | |
542 | +"lemma=,|i|lub|czy|bądź", | |
543 | + [Number,"all_numbers";Case,"all_cases";Gender,"all_genders";Person,"all_persons"], | |
544 | + "(np*number*case*gender*person/np*T*case*T*T)\\np*T*case*T*T", | |
545 | + 0., []; (* FIXME: semantyka z make_conj_frame *) | |
546 | +"lemma=,|i|lub|czy|bądź", | |
547 | + [Number,"all_numbers";Case,"all_cases";Gender,"all_genders"], | |
548 | + "(adjp*number*case*gender/adjp*number*case*gender)\\adjp*number*case*gender", | |
549 | + 0., []; (* FIXME: semantyka z make_conj_frame *) | |
427 | 550 | |
428 | - | lemma,"praet",[numbers;genders;aspects] -> | |
429 | - (make_ip numbers genders ["pri";"sec"] aspects true false c d lemma "praet") @ | |
430 | - (make_ip numbers genders ["pri";"sec";"ter"] aspects false true c d lemma "praet") | |
431 | - | lemma,"winien",[numbers;genders;aspects] -> | |
432 | - (make_ip numbers genders ["ter"] aspects false true c d lemma "winien") @ | |
433 | - (make_ip numbers genders ["pri";"sec"] aspects true false c d lemma "winien") @ | |
434 | - (make_ip numbers genders ["pri";"sec"] aspects true true c d lemma "winien") | |
435 | - | lemma,"pred",[] -> (* FIXME: czy predykatyw zawsze jest niedokonany? *) | |
436 | - (make_ip ["sg"] ["n2"] ["ter"] ["imperf"] false true c d lemma "pred") | |
437 | - | |
438 | -let make_infp aspects (c:ENIAMtokenizerTypes.token_record) d lemma = | |
439 | - Xlist.fold d.simple_valence [] (fun l -> function | |
440 | - fnum,Frame(NonPersAtrs(_,new_lemma,role,role_attr,negation,aspect),schema) -> | |
441 | - (try | |
442 | - let aspects = check_frame_aspect aspects aspect in | |
443 | - let quant = ["aspect",d.e.aspect,aspects] in | |
444 | - let t = ["infp"; "aspect"] in | |
445 | - let batrs = make_node new_lemma "inf" c.weight fnum (["aspect"] @ if negation = Aff then [] else ["negation"]) in | |
446 | - let schema_list = [if negation = Aff then [] else [nosem_schema_field Backward [Phrase(Lex "nie")]]; | |
447 | - qub_inclusion;schema] in | |
448 | - (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
449 | - with Not_found -> l) | |
450 | - | fnum,LexFrame(lid,INF(aspect,negation,refl),NoRestr,schema) -> | |
451 | - (try | |
452 | - let aspects = check_frame_aspect aspects aspect in | |
453 | - let quant = ["aspect",d.e.aspect,aspects] in | |
454 | - let t = ["lex";lid;lemma;"inf"; "aspect"] in | |
455 | - let new_lemma,schema = if refl = ReflEmpty then lemma, schema else lemma ^ " się", nosem_refl_schema_field :: schema in | |
456 | - let batrs = make_node new_lemma "inf" (lex_weight +. c.weight) fnum (["lex";"aspect"] @ if negation = Aff then [] else ["negation"]) in | |
457 | - let schema_list = [if negation = Aff then [] else [nosem_schema_field Backward [Phrase(Lex "nie")]]; | |
458 | - [inclusion];schema] in | |
459 | - (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
460 | - with Not_found -> l) | |
461 | - | fnum,frame -> failwith ("make_infp: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in | |
462 | - | |
463 | -let make_padvp aspects (c:ENIAMtokenizerTypes.token_record) d lemma cat = | |
464 | - Xlist.fold d.simple_valence [] (fun l -> function | |
465 | - fnum,Frame(NonPersAtrs(_,new_lemma,role,role_attr,negation,aspect),schema) -> | |
466 | - (try | |
467 | - let aspects = check_frame_aspect aspects aspect in | |
468 | - let quant = ["aspect",d.e.aspect,aspects] in | |
469 | - let t = ["padvp"] in | |
470 | - let batrs = make_node new_lemma cat c.weight fnum (["aspect"] @ if negation = Aff then [] else ["negation"]) in | |
471 | - let schema_list = [if negation = Aff then [] else [nosem_schema_field Backward [Phrase(Lex "nie")]]; | |
472 | - qub_inclusion;schema] in | |
473 | - (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
474 | - with Not_found -> l) | |
475 | - | fnum,frame -> failwith ("make_padvp: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in | |
476 | - | |
477 | -let make_conjunct (c:ENIAMtokenizerTypes.token_record) d lemma cat = (* FIXME: poprawić semantykę *) | |
478 | - let ctype = if cat = "comp" then "sub" else if cat = "conj" then "coord" else failwith "make_conjunct" in | |
479 | - let quant = [] in | |
480 | - let t = ["cp"; ctype; lemma] in | |
481 | - let batrs = make_node lemma cat c.weight 0 [ctype] in | |
482 | - let schema_list = [[comp_arg_schema_field [Phrase IP]]] in | |
483 | - [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] in | |
484 | - | |
485 | -(* FIXME: uzgadniania HIPERO i SELPREFS *) | |
486 | -let make_conj f (c:ENIAMtokenizerTypes.token_record) d lemma = | |
487 | - (if f then | |
488 | - [LCGrenderer.make_conj_frame | |
489 | - ["number",d.e.number,all_numbers;"gender",d.e.gender,all_genders;"person",d.e.person,all_persons] | |
490 | - (Tensor[Atom "ip"; Top; Top; Top]) (Tensor[Atom "ip"; Top; Top; Top]) | |
491 | - ["ip";"number";"gender";"person"] d | |
492 | - (make_node lemma "conj" c.weight 0 ["number";"gender";"person"])] else []) @ | |
493 | - [LCGrenderer.make_conj_frame [] | |
494 | - (Tensor[Atom "prepnp"; Top; Top]) (Tensor[Atom "prepnp"; Top; Top]) ["advp"] d | |
495 | - (make_node lemma "conj" c.weight 0 []); | |
496 | - LCGrenderer.make_conj_frame [] | |
497 | - (Tensor[Atom "advp"]) (Tensor[Atom "prepnp"; Top; Top]) ["advp"] d | |
498 | - (make_node lemma "conj" c.weight 0 []); | |
499 | - LCGrenderer.make_conj_frame [] | |
500 | - (Tensor[Atom "prepnp"; Top; Top]) (Tensor[Atom "advp"]) ["advp"] d | |
501 | - (make_node lemma "conj" c.weight 0 []); | |
502 | - LCGrenderer.make_conj_frame [] | |
503 | - (Tensor[Atom "advp"]) (Tensor[Atom "advp"]) ["advp"] d | |
504 | - (make_node lemma "conj" c.weight 0 []); | |
505 | - LCGrenderer.make_conj_frame ["lemma",ge (),[];"case",d.e.case,all_cases] | |
506 | - (Tensor[Atom "prepnp";AVar "lemma"; AVar "case"]) (Tensor[Atom "prepnp"; AVar "lemma"; AVar "case"]) | |
507 | - ["prepnp";"lemma";"case"] d | |
508 | - (make_node lemma "conj" c.weight 0 ["case"]); | |
509 | - LCGrenderer.make_conj_frame | |
510 | - ["number",d.e.number,all_numbers;"case",d.e.case,all_cases;"gender",d.e.gender,all_genders;"person",d.e.person,all_persons] | |
511 | - (Tensor[Atom "np"; Top; AVar "case"; Top; Top]) (Tensor[Atom "np"; Top; AVar "case"; Top; Top]) | |
512 | - ["np"; "number"; "case"; "gender"; "person"] d | |
513 | - (make_node lemma "conj" c.weight 0 ["number";"case";"gender";"person"]); | |
514 | - LCGrenderer.make_conj_frame | |
515 | - ["number",d.e.number,all_numbers;"case",d.e.case,all_cases;"gender",d.e.gender,all_genders] | |
516 | - (Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]) (Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]) | |
517 | - ["adjp";"number";"case";"gender"] d | |
518 | - (make_node lemma "conj" c.weight 0 ["number";"case";"gender"]); | |
519 | - ] in | |
520 | - | |
521 | -(* FIXME: aktualnie NP nie obejmują przymiotników, trzeba albo dodać podrzędniki przymiotnikowe, albo kategorię np dla przymiotników *) | |
522 | -(* FIXME: nadmiarowe interpretacje dla num np. dodana jest opcja z pro i apozycją *) | |
523 | -(* FIXME: zrobić kontolę w znaczeniu dziedziczenia podmiotu *) | |
524 | -(* FIXME: poprawić walencję z negacją, problem z zanegowanymi ramami dla ger i ppas *) | |
525 | -(* FIXME: sprawdzić czy są ramy z NegationUndef i NegationNA *) | |
526 | -(* FIXME: obniżyć wagi przyimków i kublików pisanych z wielkiej litery podobnie przy skrótach *) | |
527 | - | |
528 | -let rec process_interp (c:ENIAMtokenizerTypes.token_record) (d:ENIAMlexSemanticsTypes.lex_sem) = function (* FIXME: rozpoznawanie lematów nie działa, gdy mają wielką literę *) | |
529 | -"pos=subst",lemma = "co" || lemma = "kto" then (* FIXME: dodać podrzędniki np. co nowego *) | |
530 | - List.flatten (Xlist.map ["int";"rel"] (fun ctype -> | |
531 | - let quant = ["inumber",ge (),[];"igender",ge (),[];"iperson",ge (),[];"plemma",ge (),[];"ctype",ge (),[ctype];"number",d.e.number,expand_numbers numbers;"case",d.e.case,expand_cases cases;"gender",d.e.gender,expand_genders genders; "person",d.e.person,["ter"]] in | |
532 | - let t = ["cp"; "ctype"; lemma] in | |
533 | - let sem_mods = ["CTYPE",SubstVar "ctype"] in (* atrybuty ip *) | |
534 | - let batrs = make_node lemma "subst" c.weight 0 [ctype;"case"] in (* atrybuty liścia *) | |
535 | - let raised_arg = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["np";"number";"case";"gender";"person"])] in | |
536 | - let raised_arg1 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["prepnp";"plemma";"case"])] in | |
537 | - let raised_arg2 = [Raised(["prepnp";"plemma";"case"],Forward,["np";"number";"case";"gender";"person"])] in | |
538 | - let raised_arg3 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["comprepnp";"plemma"])] in | |
539 | - let raised_arg4 = [Raised(["comprepnp";"plemma"],Forward,["np";"number";"case";"gender";"person"])] in | |
540 | - let schema_list = [[schema_field RAISED "" Forward raised_arg]] in | |
541 | - let frame_np = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
542 | - let schema_list = [[schema_field RAISED "" Backward raised_arg2];[schema_field RAISED "" Forward raised_arg1]] in | |
543 | - let frame_prepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
544 | - let schema_list = [[schema_field RAISED "" Backward raised_arg4];[schema_field RAISED "" Forward raised_arg3]] in | |
545 | - let frame_comprepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
546 | - [frame_np;frame_prepnp;frame_comprepnp])) else []) @ | |
547 | -"pos=subst",lemma = "to" then (* FIXME: przetestować *) | |
548 | - let quant = ["ctype",ge (),[];"lemma",ge (),[];"number",d.e.number,expand_numbers numbers;"case",d.e.case,expand_cases cases;"gender",d.e.gender,expand_genders genders; "person",d.e.person,["ter"]] in | |
549 | - let t = ["ncp"; "number"; "case"; "gender"; "person"; "ctype"; "lemma"] in | |
550 | - let batrs = make_node "to" "subst" c.weight 0 ["coreferential"; "number"; "case"; "gender"; "person"; "ctype"] in | |
551 | - let schema_list = [qub_inclusion;[prep_arg_schema_field [Phrase(CP(CompTypeAgr,Comp "lemma"))]]] in | |
552 | - [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] | |
553 | -"pos=ppron3",praep=="praep" -> | |
554 | - let quant = ["lemma",ge (),[]; "number",d.e.number,expand_numbers numbers;"case",d.e.case,expand_cases cases;"gender",d.e.gender,expand_genders genders; "person",d.e.person,persons] in | |
555 | - let t = ["prepnp"; "lemma"; "case"] in | |
556 | - Xlist.fold d.simple_valence [] (fun l -> function | |
557 | - fnum,Frame(NounAtrs(_,nsyn,nsem),schema) -> | |
558 | - let batrs = make_node lemma "ppron3" c.weight fnum (nsyn ::(ENIAMwalStringOf.nsem nsem) :: ["number";"case";"gender";"person"]) in | |
559 | - let raised_arg = [Raised(["prepnp";"lemma";"case"],Forward,["np";"number";"case";"gender";"person"])] in | |
560 | - let schema_list = [[schema_field RAISED "" Backward raised_arg];[inclusion]] in | |
561 | - (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
562 | - | _ -> failwith "process_interp: ppron3 praep") | |
563 | - | _ -> failwith "process_interp: ppron3 praep")) | |
564 | -(* | lemma,"NUM",[["comp"]] -> failwith "num:comp"*) | |
565 | - | lemma,"num",[numbers;cases;genders;acm] -> (* FIXME: liczebniki złożone *) | |
566 | - (if lemma = "ile" then (* FIXME: walencja ile *) | |
567 | - List.flatten (Xlist.map ["int";"rel"] (fun ctype -> | |
568 | - List.flatten (Xlist.map acm (fun acm -> | |
569 | - let phrase,num,gend = match acm with "congr" -> NP AllAgr,"number","gender" | "rec" -> NP GenAgr,"sg","n2" | _ -> failwith "process_interp: num acm" in | |
570 | - let quant = ["inumber",ge (),[];"igender",ge (),[];"iperson",ge (),[];"plemma",ge (),[];"ctype",ge (),[ctype];"number",d.e.number,expand_numbers numbers;"case",d.e.case,expand_cases cases;"gender",d.e.gender,expand_genders genders;"person",d.e.person,["ter"]] in | |
571 | - let t = ["cp"; "ctype"; lemma] in | |
572 | - let sem_mods = ["CTYPE",SubstVar "ctype"] in | |
573 | - let batrs = make_node lemma "num" c.weight 0 [ctype;acm;"number";"case";"gender";"person"] in | |
574 | - let raised_arg1 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["nump";num;"case";gend;"person"])] in | |
575 | - let raised_arg2a = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["prepnp";"plemma";"case"])] in | |
576 | - let raised_arg2b = [Raised(["prepnp";"plemma";"case"],Forward,["nump";num;"case";gend;"person"])] in | |
577 | - let raised_arg3a = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["comprepnp";"plemma"])] in | |
578 | - let raised_arg3b = [Raised(["comprepnp";"plemma"],Forward,["nump";num;"case";gend;"person"])] in | |
579 | - let schema_list = [[num_arg_schema_field [Phrase ProNG; Phrase phrase]];[schema_field RAISED "" Forward raised_arg1]] in | |
580 | - let frame_nump = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
581 | - let schema_list = [[num_arg_schema_field [Phrase ProNG; Phrase phrase]];[schema_field RAISED "" Backward raised_arg2b];[schema_field RAISED "" Forward raised_arg2a]] in | |
582 | - let frame_prepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
583 | - let schema_list = [[num_arg_schema_field [Phrase ProNG; Phrase phrase]];[schema_field RAISED "" Backward raised_arg3b];[schema_field RAISED "" Forward raised_arg3a]] in | |
584 | - let frame_comprepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
585 | - [frame_nump;frame_prepnp;frame_comprepnp])))) else []) | |
586 | - | lemma,"adj",[numbers;cases;genders;grads] -> | |
587 | - (if lemma = "czyj" || lemma = "jaki" || lemma = "który" then | |
588 | - List.flatten (Xlist.map ["int"] (fun ctype -> | |
589 | - let _ = match grads with ["pos"] -> () | _ -> failwith "process_interp adj: grad" in | |
590 | - let quant = ["inumber",ge (),[];"igender",ge (),[];"iperson",ge (),[];"nperson",ge (),[];"plemma",ge (),[];"ctype",ge (),[ctype];"number",d.e.number,expand_numbers numbers;"case",d.e.case,expand_cases cases;"gender",d.e.gender,expand_genders genders] in | |
591 | - let t = ["cp"; "ctype"; lemma] in | |
592 | - let sem_mods = ["CTYPE",SubstVar "ctype"] in | |
593 | - let batrs = make_node lemma "adj" c.weight 0 [ctype;"number";"case";"gender"] in | |
594 | - let raised_arg0 = [Raised(["np";"number";"case";"gender";"nperson"],Backward,["adjp";"number";"case";"gender"])] in | |
595 | - let raised_arg1 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["np";"number";"case";"gender";"nperson"])] in | |
596 | - let raised_arg2a = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["prepnp";"plemma";"case"])] in | |
597 | - let raised_arg2b = [Raised(["prepnp";"plemma";"case"],Forward,["np";"number";"case";"gender";"nperson"])] in | |
598 | - let raised_arg3a = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["comprepnp";"plemma"])] in | |
599 | - let raised_arg3b = [Raised(["comprepnp";"plemma"],Forward,["np";"number";"case";"gender";"nperson"])] in | |
600 | - let schema_list = [[schema_field RAISED "" Forward raised_arg0];[schema_field RAISED "" Forward raised_arg1]] in | |
601 | - let frame_np = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
602 | - let schema_list = [[schema_field RAISED "" Forward raised_arg0];[schema_field RAISED "" Backward raised_arg2b];[schema_field RAISED "" Forward raised_arg2a]] in | |
603 | - let frame_prepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
604 | - let schema_list = [[schema_field RAISED "" Forward raised_arg0];[schema_field RAISED "" Backward raised_arg3b];[schema_field RAISED "" Forward raised_arg3a]] in | |
605 | - let frame_comprepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
606 | - [frame_np;frame_prepnp;frame_comprepnp])) else []) @ | |
607 | - (if lemma = "jaki" || lemma = "który" then | |
608 | - List.flatten (Xlist.map ["rel"] (fun ctype -> | |
609 | - let _ = match grads with ["pos"] -> () | _ -> failwith "process_interp adj: grad" in | |
610 | - let quant = ["inumber",ge (),[];"igender",ge (),[];"iperson",ge (),[];"plemma",ge (),[];"ctype",ge (),[ctype];"number",d.e.number,expand_numbers numbers;"case",d.e.case,expand_cases cases;"gender",d.e.gender,expand_genders genders; "person",d.e.person,["ter"]] in | |
611 | - let t = ["cp"; "ctype"; lemma] in | |
612 | - let sem_mods = ["CTYPE",SubstVar "ctype"] in | |
613 | - let batrs = make_node lemma "adj" c.weight 0 [ctype;"number";"case";"gender";"person"] in | |
614 | - let raised_arg = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["np";"number";"case";"gender";"person"])] in | |
615 | - let raised_arg1 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["prepnp";"plemma";"case"])] in | |
616 | - let raised_arg2 = [Raised(["prepnp";"plemma";"case"],Forward,["np";"number";"case";"gender";"person"])] in | |
617 | - let raised_arg3 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["comprepnp";"plemma"])] in | |
618 | - let raised_arg4 = [Raised(["comprepnp";"plemma"],Forward,["np";"number";"case";"gender";"person"])] in | |
619 | - let schema_list = [[schema_field RAISED "" Forward raised_arg]] in | |
620 | - let frame_np = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
621 | - let schema_list = [[schema_field RAISED "" Backward raised_arg2];[schema_field RAISED "" Forward raised_arg1]] in | |
622 | - let frame_prepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
623 | - let schema_list = [[schema_field RAISED "" Backward raised_arg4];[schema_field RAISED "" Forward raised_arg3]] in | |
624 | - let frame_comprepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
625 | - [frame_np;frame_prepnp;frame_comprepnp])) else []) @ | |
626 | - | "być","aglt",[numbers;persons;aspects;wok] -> | |
627 | - let numbers = expand_numbers numbers in | |
628 | - let quant = ["number",d.e.number,numbers; "person", d.e.person,persons] in | |
629 | - let t = ["aglt"; "number"; "person"] in | |
630 | - [LCGrenderer.make_frame_simple quant t c ( (make_node "być" "aglt" c.weight 0 [])(*[Dot;Dot;Dot]*))] | |
631 | - | lemma,"inf",[aspects] -> (* FIXME: wielopoziomowe InfP *) | |
632 | - make_infp aspects c d lemma | |
633 | - | lemma,"pcon",[aspects] -> | |
634 | - make_padvp aspects c d lemma "pcon" | |
635 | - | lemma,"pant",[aspects] -> | |
636 | - make_padvp aspects c d lemma "pant" | |
637 | - | "się","qub",[] -> [LCGrenderer.make_frame_simple [] ["się"] {c with orth=""} ( (make_node "się" "qub" c.weight 0 [])) (* FIXME: dodać make_np *)] | |
638 | - | "nie","qub",[] -> [LCGrenderer.make_frame_simple [] ["nie"] {c with orth=""} (make_node "nie" "qub" c.weight 0 [])] | |
639 | - | "by","qub",[] -> [LCGrenderer.make_frame_simple [] ["by"] {c with orth=""} (make_node "by" "qub" c.weight 0 [])] | |
640 | - | "niech","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] c (make_node "niech" "qub" c.weight 0 [])] | |
641 | - | "niechaj","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] c (make_node "niechaj" "qub" c.weight 0 [])] | |
642 | - | "niechże","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] c (make_node "niechże" "qub" c.weight 0 [])] | |
643 | - | "niechajże","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] c (make_node "niechajże" "qub" c.weight 0 [])] | |
644 | - | "czy","qub",[] -> (* FIXME: poprawić semantykę *) | |
645 | - let quant = [] in | |
646 | - let t = ["cp"; "int"; "czy"] in | |
647 | - let batrs = make_node "czy" "qub" c.weight 0 ["int"] in | |
648 | - let schema_list = [[comp_arg_schema_field [Phrase IP]]] in | |
649 | - [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] | |
650 | - | "gdyby","qub",[] -> (* FIXME: poprawić semantykę *) (* FIXME: poprawić tryb przypuszczający *) (* FIXME: problem z interpretacją jako 'gdy' *) | |
651 | - let quant = [] in | |
652 | - let t = ["cp"; "rel"; "gdyby"] in | |
653 | - let batrs = make_node "gdyby" "qub" c.weight 0 ["rel"] in | |
654 | - let schema_list = [[comp_arg_schema_field [Phrase IP]]] in | |
655 | - [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] | |
656 | - | lemma,"qub",[] -> [LCGrenderer.make_frame_simple [] ["qub"] c ( (make_node lemma "qub" c.weight 0 []))] (* FIXME: semantyka i rodzaje kublików *) | |
657 | - | lemma,"comp",[] -> make_conjunct c d lemma "comp" | |
658 | - | "i","conj",[] -> make_conj true c d "i" @ (make_conjunct c d "i" "conj") | |
659 | - | "lub","conj",[] -> make_conj true c d "lub" @ (make_conjunct c d "lub" "conj") | |
660 | - | "czy","conj",[] -> make_conj true c d "czy" @ (make_conjunct c d "czy" "conj") | |
661 | - | "bądź","conj",[] -> make_conj true c d "bądź" @ (make_conjunct c d "bądź" "conj") | |
662 | - | lemma,"conj",[] -> make_conjunct c d lemma "conj" | |
663 | -(* | "interp",[] -> [] | |
664 | - | "brev",[pun] -> []*) | |
665 | - | lemma,"interj",[] -> [LCGrenderer.make_frame_simple [] ["interj"] c (make_node lemma "interj" c.weight 0 [])] | |
666 | - | lemma,"burk",[] -> [] (* FIXME *) | |
667 | -(* | "dig",[] -> [] | |
668 | - | "romandig",[] -> [] | |
669 | - | "ign",[] -> [] | |
670 | - | "xxx",[] -> [] (* to występuje w słowniku skrótów *)*) | |
671 | -(* | ".","interp",[] -> [] | |
672 | - | "%","interp",[] -> []*) | |
673 | - | "-","interp",[] -> [LCGrenderer.make_frame_simple [] ["hyphen"] c (make_node "-" "interp" c.weight 0 [])] | |
674 | -(* | ":","interp",[] -> [LCGrenderer.make_frame_simple [] ["colon"] ":" beg len [Dot] [Dot]]*) | |
675 | - | "?","interp",[] -> [LCGrenderer.make_frame_simple [] ["int"] c (make_node "?" "interp" c.weight 0 [])] (*FIXME: zdanie nadrzędne powinno mieć atrybut pytajności(Attr("INT",Val "+"))] *) | |
676 | - | ",","interp",[] -> make_conj false c d "," (*@ [LCGrenderer.make_frame_simple [] ["comma"] "," beg len [Dot] [Dot]]*) | |
677 | - | ";","interp",[] -> [](*[LCGrenderer.make_frame_simple [] ["comma"] ";" beg len [Dot] [Dot]]*) | |
678 | - | "„","interp",[] -> [(* FIXME: zaznaczyć niesemantyczność quotów *) | |
679 | - LCGrenderer.make_quot_frame | |
680 | - ["number",d.e.number,[];"case",d.e.case,[];"gender",d.e.gender,[];"person",d.e.person,[]] | |
681 | - (Tensor[Atom "np"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]) (Tensor[Atom "rquot"]) | |
682 | - ["np";"number";"case";"gender";"person"] d | |
683 | - (make_node "„" "interp" c.weight 0 [])] | |
684 | - | "”","interp",[] -> [LCGrenderer.make_frame_simple [] ["rquot"] c (make_node "”" "interp" c.weight 0 [])] | |
685 | - | "«","interp",[] -> [LCGrenderer.make_frame_simple [] ["rquot3"] c (make_node "«" "interp" c.weight 0 []); | |
686 | - LCGrenderer.make_quot_frame | |
687 | - ["number",d.e.number,[];"case",d.e.case,[];"gender",d.e.gender,[];"person",d.e.person,[]] | |
688 | - (Tensor[Atom "np"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]) (Tensor[Atom "rquot2"]) | |
689 | - ["np";"number";"case";"gender";"person"] d | |
690 | - (make_node "«" "interp" c.weight 0 [])] | |
691 | - | "»","interp",[] -> [LCGrenderer.make_frame_simple [] ["rquot2"] c (make_node "»" "interp" c.weight 0 []); | |
692 | - LCGrenderer.make_quot_frame | |
693 | - ["number",d.e.number,[];"case",d.e.case,[];"gender",d.e.gender,[];"person",d.e.person,[]] | |
694 | - (Tensor[Atom "np"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]) (Tensor[Atom "rquot3"]) | |
695 | - ["np";"number";"case";"gender";"person"] d | |
696 | - (make_node "»" "interp" c.weight 0 [])] | |
697 | - | "(","interp",[] -> [LCGrenderer.make_inclusion_frame (Tensor[Atom "rparen"]) d (make_node "(" "interp" c.weight 0 [])] | |
698 | - | ")","interp",[] -> [LCGrenderer.make_frame_simple [] ["rparen"] c (make_node ")" "interp" c.weight 0 [])] | |
699 | - | "[","interp",[] -> [LCGrenderer.make_inclusion_frame (Tensor[Atom "rparen"]) d (make_node "[" "interp" c.weight 0 [])] | |
700 | - | "]","interp",[] -> [LCGrenderer.make_frame_simple [] ["rparen"] c (make_node "]" "interp" c.weight 0 [])] | |
701 | - | lemma,"unk",[] -> | |
702 | - let quant = ["number",d.e.number,all_numbers;"case",d.e.case,all_cases; "gender",d.e.gender,all_genders; "person",d.e.person, ["ter"]] in | |
703 | - let t = ["np"; "number"; "case"; "gender"; "person"] in | |
704 | - let batrs = make_node lemma "unk" c.weight 0 ["number"; "case"; "gender"; "person"] in | |
705 | - [LCGrenderer.make_frame_simple quant t c ( batrs)] | |
706 | - | _,"xxx",[] -> [] (* FIXME *) | |
707 | - | ".","interp",[] -> [LCGrenderer.make_frame_simple [] ["dot"] c (make_node "." "interp" c.weight 0 [])] (* FIXME: to jest potrzebne przy CONLL *) | |
551 | +"lemma=co|kto,pos=subst", | |
552 | + [Inumber,"";Igender,"";Iperson,"";Ctype,"int&rel";Number,"numbers";Case,"cases";Gender,"genders";Person,"ter"], | |
553 | + "cp*ctype*lemma/(ip*inumber*igender*iperson/np*number*case*gender*person)", | |
554 | + 0., [Ctype]; (*["CTYPE",SubstVar "ctype"]*) | |
555 | +"lemma=co|kto,pos=subst", | |
556 | + [Inumber,"";Igender,"";Iperson,"";Plemma,"";Ctype,"int&rel";Number,"numbers";Case,"cases";Gender,"genders";Person,"ter"], | |
557 | + "cp*ctype*lemma{/(ip*inumber*igender*iperson/prepnp*plemma*case),/(prepnp*plemma*case/np*number*case*gender*person)}", | |
558 | + 0., [Ctype]; (*["CTYPE",SubstVar "ctype"]*) | |
559 | +"lemma=to,pos=subst", | |
560 | + [Ctype,"";Plemma,"";Number,"numbers";Case,"cases";Gender,"genders";Person,"ter"], | |
561 | + "ncp*number*case*gender*person*ctype*plemma{qub_inclusion}{/cp*ctype*plemma}", | |
562 | + 0., [Number;Case;Gender;Person;Ctype]; (*coreferential*) | |
563 | +"pos=ppron3,praep==praep", | |
564 | + [Plemma,"";Number,"numbers";Case,"cases";Gender,"genders";Person,"persons"], | |
565 | + "prepnp*plemma*case\\(prepnp*plemma*case/np*number*case*gender*person)", (*inclusion*) | |
566 | + 0., [Number;Case;Gender;Person]; | |
567 | +"lemma=ile,pos=num", (* FIXME: iloma ma bezpośredni podrzędnik rzeczownikowy, a ile nie *) (* FIXME: mwe "o ile, na ile" *) | |
568 | + [Inumber,"";Igender,"";Iperson,"";Ctype,"int&rel";Number,"numbers";Case,"cases";Gender,"genders";Person,"ter"], | |
569 | + "cp*ctype*lemma/ip*inumber*igender*iperson", (* FIXME: zaślepka, bo podrzędnik ile nie musi z nim sąciadować *) | |
570 | + 0., [Number;Case;Gender;Person;Ctype;Acm]; (*["CTYPE",SubstVar "ctype"]*) (* FIXME: trzeba dodać przypadki, bezpośredniego podrzędnika rzeczownikowego i przyimka nad "ile" *) | |
571 | +"lemma=czyj|jaki|który,pos=adj", | |
572 | + [Inumber,"";Igender,"";Iperson,"";Nperson,"";Ctype,"int";Number,"numbers";Case,"cases";Gender,"genders"], | |
573 | + "cp*ctype*lemma{/(ip*inumber*igender*iperson/np*number*case*gender*nperson)}{/(np*number*case*gender*nperson/adjp*number*case*gender)}", | |
574 | + 0., [Ctype]; (*["CTYPE",SubstVar "ctype"]*) | |
575 | +"lemma=czyj|jaki|który,pos=adj", | |
576 | + [Inumber,"";Igender,"";Iperson,"";Nperson,"";Plemma,"";Ctype,"int";Number,"numbers";Case,"cases";Gender,"genders"], | |
577 | + "cp*ctype*lemma{/(ip*inumber*igender*iperson/prepnp*plemma*case)}{/(prepnp*plemma*case/np*number*case*gender*nperson)}{/(np*number*case*gender*nperson/adjp*number*case*gender)}", | |
578 | + 0., [Ctype]; (*["CTYPE",SubstVar "ctype"]*) | |
579 | +"lemma=czyj|jaki,pos=adj", | |
580 | + [Inumber,"";Igender,"";Iperson,"";Ctype,"rel";Number,"numbers";Case,"cases";Gender,"genders";Person,"ter"], | |
581 | + "cp*ctype*lemma/(ip*inumber*igender*iperson/np*number*case*gender*person)", | |
582 | + 0., [Ctype]; (*["CTYPE",SubstVar "ctype"]*) | |
583 | +"lemma=jaki|który,pos=adj", | |
584 | + [Inumber,"";Igender,"";Iperson,"";Plemma,"";Ctype,"rel";Number,"numbers";Case,"cases";Gender,"genders";Person,"ter"], | |
585 | + "cp*ctype*lemma{/(ip*inumber*igender*iperson/prepnp*plemma*case)}{/(prepnp*plemma*case/np*number*case*gender*person)}", | |
586 | + 0., [Ctype]; (*["CTYPE",SubstVar "ctype"]*) | |
587 | +"lemma=się,pos=qub",[],"się",0.,[]; (* FIXME: dodać make_np *) | |
588 | +"lemma=nie,pos=qub",[],"nie",0.,[]; | |
589 | +"lemma=by,pos=qub",[],"by",0.,[]; | |
590 | +"lemma=niech,pos=qub",[],"aux-imp",0.,[]; | |
591 | +"lemma=niechaj,pos=qub",[],"aux-imp",0.,[]; | |
592 | +"lemma=niechże,pos=qub",[],"aux-imp",0.,[]; | |
593 | +"lemma=niechajże,pos=qub",[],"aux-imp",0.,[]; | |
594 | +"lemma=czy,pos=qub",[],"cp*int*czy/ip*T*T*T",0.,[Int]; | |
595 | +"lemma=gdyby,pos=qub",[],"cp*rel*gdyby/ip*T*T*T",0.,[Rel]; | |
596 | +"pos=qub",[],"qub",0.,[]; | |
597 | +"pos=interj",[],"interj",0.,[]; | |
598 | +"lemma=-,pos=interp",[],"hyphen",0.,[]; | |
599 | +"lemma=?,pos=interp",[],"int",0.,[]; | |
600 | +"lemma=„,pos=interp", | |
601 | + [Number,"";Case,"";Gender,"";Person,""], | |
602 | + "(np*number*case*gender*person/rquot)/np*number*case*gender*person", | |
603 | + 0.,[]; (* make_quot_frame *) | |
604 | +"lemma=«,pos=interp", | |
605 | + [Number,"";Case,"";Gender,"";Person,""], | |
606 | + "(np*number*case*gender*person/rquot2)/np*number*case*gender*person", | |
607 | + 0.,[]; (* make_quot_frame *) | |
608 | +"lemma=»,pos=interp", | |
609 | + [Number,"";Case,"";Gender,"";Person,""], | |
610 | + "(np*number*case*gender*person/rquot3)/np*number*case*gender*person", | |
611 | + 0.,[]; (* make_quot_frame *) | |
612 | +"lemma=”,pos=interp",[],"rquot",0.,[]; | |
613 | +"lemma=»,pos=interp",[],"rquot2",0.,[]; | |
614 | +"lemma=«,pos=interp",[],"rquot3",0.,[]; | |
615 | +"lemma=(,pos=interp",[], | |
616 | + "(inclusion/rparen)/(np*T*T*T*T+ip*T*T*T+adjp*T*T*T+prepnp*T*T)", | |
617 | + 0.,[]; (* make_inclusion_frame *) | |
618 | +"lemma=[,pos=interp",[], | |
619 | + "(inclusion/rparen2)/(np*T*T*T*T+ip*T*T*T+adjp*T*T*T+prepnp*T*T)", | |
620 | + 0.,[]; (* make_inclusion_frame *) | |
621 | +"lemma=),pos=interp",[],"rparen",0.,[]; | |
622 | +"lemma=],pos=interp",[],"rparen2",0.,[]; | |
623 | +"pos=unk", | |
624 | + [Number,"all_numbers";Case,"all_cases";Gender,"all_genders";Person,"all_persons"], | |
625 | + "np*number*case*gender*person", | |
626 | + 0., [Number;Case;Gender;Person]; | |
627 | + | |
628 | +(* | ".","interp",[] -> [LCGrenderer.make_frame_simple [] ["dot"] c (make_node "." "interp" c.weight 0 [])] (* FIXME: to jest potrzebne przy CONLL *) | |
708 | 629 | | "<conll_root>","interp",[] -> |
709 | 630 | let batrs = (make_node "<conll_root>" "interp" c.weight 0 []) in |
710 | 631 | let schema_list = [[schema_field CLAUSE "Clause" Forward [Phrase IP;Phrase (CP(Int,CompUndef));Phrase (NP(Case "voc"));Phrase (Lex "interj")]]] in |
711 | 632 | [LCGrenderer.make_frame false tokens lex_sems [] schema_list ["<conll_root>"] d batrs] |
712 | - | lemma,c,l -> failwith ("process_interp: " ^ lemma ^ ":" ^ c ^ ":" ^ (String.concat ":" (Xlist.map l (String.concat ".")))) in | |
713 | - | |
714 | -let process_bracket_lemma (c:ENIAMtokenizerTypes.token_record) (d:ENIAMlexSemanticsTypes.lex_sem) = function | |
715 | - (* "<query>" -> | |
716 | - [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field NOSEM "" Forward [Phrase Null;Phrase (Lex "<dummy>")]];[arg_schema_field Forward [Phrase (Lex "</query>")]]] (["<query>"]) {d with orth=""} (make_node "<query1>" "interp" c.weight 0 []); | |
717 | - LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field SENTENCE "" Forward [Phrase (Lex "<ors>")]]] (["<query>"]) {d with orth=""} (make_node "<query2>" "interp" c.weight 0 []); | |
718 | - LCGrenderer.make_frame x_flag tokens lex_sems [] [[(*nosem*)arg_schema_field Forward [Phrase (Lex "<speaker>")]];[nosem_schema_field Forward [Phrase (Lex "<colon>")]];[(*nosem*)arg_schema_field Forward [Phrase (Lex "<ors>")]];[(*nosem*)arg_schema_field Forward [Phrase (Lex "</query>")]]] (["<query>"]) {d with orth=""} (make_node "<query3>" "interp" c.weight 0 []); | |
719 | - LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field SENTENCE "" Forward [Phrase (Lex "<colon>")]]] (["<query>"]) {d with orth=""} (make_node "<query4>" "interp" c.weight 0 []); | |
720 | - LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field SENTENCE "" Forward [Phrase (Lex "<colon>")]];[schema_field SENTENCE "" Forward [Phrase (Lex "<ors>")]]] (["<query>"]) {d with orth=""} (make_node "<query5>" "interp" c.weight 0 []); (* FIXME: zdania w odwróconej kolejności *) | |
721 | - LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field SENTENCE "Sentence" Forward [Phrase (Lex "<sentence>")]];[schema_field SENTENCE "" Forward [Phrase (Lex "<ors>")]]] (["<query>"]) {d with orth=""} (make_node "<query6>" "interp" c.weight 0 [])] (* FIXME: zdania w odwróconej kolejności *) | |
722 | - | "</query>" -> | |
723 | - let t = (["</query>"]) in | |
724 | - let batrs = (make_node "</query>" "interp" c.weight 0 []) in | |
725 | - let schema_list = [[schema_field NOSEM "" Backward [Phrase Null;Phrase (Lex "<dummy>")]];[schema_field SENTENCE "Sentence" Backward [Multi[Lex "<sentence>"](*Phrase(Lex "s")*)]]] in | |
726 | - [LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list t d batrs]*) | |
727 | - | "„s" -> [] | |
728 | - (*let batrs = make_node "pro-komunikować" "pro" c.weight 0 [] in | |
729 | - [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field OBJ "Theme" Forward [Phrase (Lex "</or1>")]]] (["<sentence>"(*"or"*)]) {d with orth=""} batrs; | |
730 | - LCGrenderer.make_frame_simple [] ["<dummy>"] d ( (make_node "„s" "interp" c.weight 0 []))]*) | |
731 | - | "”s" -> [] | |
732 | - (*let t = (["</or1>"]) in | |
733 | - let batrs = (make_node "”s" "interp" c.weight 0 []) in | |
734 | - let schema_list = [[schema_field SENTENCE "Sentence" Backward [Multi[Lex "<sentence>"](*Phrase(Lex "s")*)]]] in | |
735 | - [LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list t d batrs; | |
736 | - LCGrenderer.make_frame_simple [] ["<dummy>"] d ( (make_node "”s" "interp" c.weight 0 []))]*) | |
737 | - | "«s" -> [] (* FIXME *) | |
738 | - | "»s" -> [] (* FIXME *) | |
633 | + | lemma,c,l -> failwith ("process_interp: " ^ lemma ^ ":" ^ c ^ ":" ^ (String.concat ":" (Xlist.map l (String.concat ".")))) in*) | |
634 | +] | |
635 | +(* | |
636 | +let bracket_rules = [ | |
637 | + | |
739 | 638 | | ":" -> |
740 | 639 | [LCGrenderer.make_frame_simple [] ["or"] c (LCGrenderer.make_pro_komunikat tokens lex_sems)] |
741 | 640 | | ":s" -> |
... | ... | @@ -768,82 +667,4 @@ let process_bracket_lemma (c:ENIAMtokenizerTypes.token_record) (d:ENIAMlexSemant |
768 | 667 | [LCGrenderer.make_frame_simple [] ["</speaker>"] c ( (make_node "</speaker>" "interp" c.weight 0 [])); |
769 | 668 | LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list t d batrs] |
770 | 669 | | lemma -> raise Not_found in |
771 | - | |
772 | -let get_labels () = { | |
773 | - number=ge (); | |
774 | - case=ge (); | |
775 | - gender=ge (); | |
776 | - person=ge (); | |
777 | - aspect=ge (); | |
778 | - } in | |
779 | - | |
780 | -(* create_entries *) | |
781 | - match c with | |
782 | - {token = Interp "<clause>"} -> [BracketSet(Forward),Dot] | |
783 | - | {token = Interp "</clause>"} -> [BracketSet(Backward),Dot] | |
784 | - | {token = Interp lemma} -> | |
785 | - (try | |
786 | - Xlist.fold (process_bracket_lemma c d lemma) [] (fun l (symbol,sem) -> (Bracket(true,true,symbol),sem) :: l) | |
787 | - with Not_found -> | |
788 | - (* print_endline ("x"^lemma^"x"); *) | |
789 | - let entries = process_interp c d (lemma,"interp",[]) in | |
790 | - Xlist.map entries (fun (symbol,sem) -> Bracket(false,false,symbol),sem)) | |
791 | - | {token = Lemma(lemma,"sinterj",[[]])} -> | |
792 | - let t = ["interj"] in | |
793 | - let batrs = make_node lemma "sinterj" c.weight 0 [] in | |
794 | - let symbol,sem = LCGrenderer.make_frame_simple [] t c ( batrs) in | |
795 | - [Bracket(true,true,symbol),sem] | |
796 | - | {token = Lemma(lemma,pos,interp)} -> | |
797 | - (* print_endline (lemma ^ " " ^ pos); *) | |
798 | - Xlist.fold interp [] (fun l tags -> | |
799 | - let d = {d with e=get_labels (); valence=LCGrenderer.make_controll d.valence} in | |
800 | - let entries = process_interp c d (lemma,pos,tags) in | |
801 | - Xlist.map entries (fun (symbol,sem) -> Bracket(false,false,symbol),sem) @ l) | |
802 | - | _ -> [] | |
803 | - | |
804 | -module OrderedIntInt = struct | |
805 | - | |
806 | - type t = int * int | |
807 | - | |
808 | - let compare = compare | |
809 | - | |
810 | -end | |
811 | - | |
812 | -module IntIntSet = Xset.Make(OrderedIntInt) | |
813 | - | |
814 | - | |
815 | -let create (paths,last) tokens lex_sems = | |
816 | - uni_weight := 0.; | |
817 | - let chart = LCGchart.make last in | |
818 | - let chart = Xlist.fold paths chart (fun chart (id,lnode,rnode) -> | |
819 | - let c = ExtArray.get tokens id in | |
820 | - let d = ExtArray.get lex_sems id in | |
821 | -(* if t.weight < -0.9 || Xlist.mem t.attrs "notvalidated proper" || Xlist.mem t.attrs "lemmatized as lowercase" then chart else *) | |
822 | - let chart = LCGchart.add_inc chart lnode rnode (Tensor[Atom ("[" ^ c.orth ^ "]")], Dot) 0 in | |
823 | - LCGchart.add_inc_list chart lnode rnode (create_entries tokens lex_sems id (c:ENIAMtokenizerTypes.token_record) d false) 0) in | |
824 | - let set = Xlist.fold paths IntIntSet.empty (fun set (_,lnode,rnode) -> IntIntSet.add set (lnode,rnode)) in | |
825 | - let chart = IntIntSet.fold set chart (fun chart (i,j) -> LCGchart.make_unique chart i j) in | |
826 | - chart | |
827 | - | |
828 | -let rec split_sons left id right = function | |
829 | - [] -> List.rev (List.sort compare left), List.sort compare right | |
830 | - | x :: l -> if x < id then split_sons (x :: left) id right l else split_sons left id (x :: right) l | |
831 | - | |
832 | -let rec dep_create_rec nodes sons conll_id = | |
833 | - let node = IntMap.find nodes conll_id in | |
834 | - let l = try IntMap.find sons conll_id with Not_found -> [] in | |
835 | - let left,right = split_sons [] conll_id [] l in | |
836 | - (* Printf.printf "dep_create_rec [%s] %d [%s]\n" (String.concat ";" (Xlist.map left string_of_int)) conll_id (String.concat ";" (Xlist.map right string_of_int)); *) | |
837 | - DepNode(conll_id, Xlist.map left (dep_create_rec nodes sons), node, Xlist.map right (dep_create_rec nodes sons)) | |
838 | - | |
839 | -let dep_create paths tokens lex_sems = | |
840 | - uni_weight := 0.; | |
841 | - let sons = Int.fold 1 (Array.length paths - 1) IntMap.empty (fun sons i -> | |
842 | - let _,super,_ = paths.(i) in | |
843 | - IntMap.add_inc sons super [i] (fun l -> i :: l)) in | |
844 | - let nodes = Int.fold 0 (Array.length paths - 1) IntMap.empty (fun nodes i -> | |
845 | - let id,_,_ = paths.(i) in | |
846 | - let c = ExtArray.get tokens id in | |
847 | - let d = ExtArray.get lex_sems id in | |
848 | - IntMap.add nodes i (create_entries tokens lex_sems id c d true)) in | |
849 | - dep_create_rec nodes sons 0 | |
670 | +]*) | |
... | ... |
parser/makefile
... | ... | @@ -8,7 +8,7 @@ OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa b |
8 | 8 | #OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa xlib.cmxa |
9 | 9 | |
10 | 10 | PRE= ../pre/paths.ml ../tokenizer/ENIAMtokenizerTypes.ml ../subsyntax/ENIAMsubsyntaxTypes.ml ../walenty/ENIAMwalTypes.ml ../lexSemantics/ENIAMlexSemanticsTypes.ml ../walenty/ENIAMwalStringOf.ml ../integration/ENIAM_CONLL.ml |
11 | -LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml | |
11 | +LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml LCGreductions.ml LCGlexicon2.ml LCGlexicon.ml LCGvalence.ml | |
12 | 12 | #LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml |
13 | 13 | DISAMB= disambSelPref.ml disambLemma.ml |
14 | 14 | SEM= semGraph.ml semTypes.ml semStringOf.ml semLatexOf.ml semMmlOf.ml semMrl.ml |
... | ... | @@ -17,8 +17,8 @@ EXEC= execTypes.ml visualization.ml exec.ml |
17 | 17 | |
18 | 18 | all: |
19 | 19 | $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml |
20 | - # $(OCAMLOPT) -o server2 $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml | |
21 | - # $(OCAMLOPT) -o parser2.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml | |
20 | +# $(OCAMLOPT) -o server2 $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml | |
21 | +# $(OCAMLOPT) -o parser2.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml | |
22 | 22 | # $(OCAMLOPT) -o eniam.distr $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) overseer.ml |
23 | 23 | # $(OCAMLOPT) -o eniam.worker $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) worker.ml |
24 | 24 | # $(OCAMLOPT) -o parser.api $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) apiInterface.ml |
... | ... |