Commit ae919dede16428180f3382efc8597e8b7c352cf1
1 parent
2128db1f
przetwarzanie leksykalizacji w Walentym
Showing
6 changed files
with
190 additions
and
321 deletions
walenty/ENIAMwalFrames.ml
... | ... | @@ -374,31 +374,6 @@ lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],nat |
374 | 374 | lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}]) |
375 | 375 | *) |
376 | 376 | |
377 | -let rec split_elexeme = function | |
378 | - Lexeme s -> [],[Lexeme s] | |
379 | - | XOR l -> | |
380 | - let genders,l = Xlist.fold l ([],[]) (fun (genders,lexs) lex -> | |
381 | - let gender,lex = split_elexeme lex in | |
382 | - gender @ genders, lex @ lexs) in | |
383 | - genders,[XOR(List.rev l)] | |
384 | - | ORconcat l -> | |
385 | - let genders,l = Xlist.fold l ([],[]) (fun (genders,lexs) lex -> | |
386 | - let gender,lex = split_elexeme lex in | |
387 | - gender @ genders, lex @ lexs) in | |
388 | - genders,[ORconcat(List.rev l)] | |
389 | - | ORcoord l -> | |
390 | - let genders,l = Xlist.fold l ([],[]) (fun (genders,lexs) lex -> | |
391 | - let gender,lex = split_elexeme lex in | |
392 | - gender @ genders, lex @ lexs) in | |
393 | - genders,[ORcoord(List.rev l)] | |
394 | - | Elexeme gender -> [gender],[] | |
395 | - | |
396 | -let prep_arg_schema_field morfs = | |
397 | - {gf=CORE; role="Ref"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Forward; morfs=morfs} (* FIXME: uporządkować sensy *) | |
398 | - | |
399 | -let prep_arg_schema_field2 morfs = | |
400 | - {gf=CORE; role="Ref"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Forward; morfs=morfs} (* FIXME: uporządkować sensy *) | |
401 | - | |
402 | 377 | let num_arg_schema_field morfs = |
403 | 378 | {gf=CORE; role="QUANT-ARG"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Forward; morfs=morfs} |
404 | 379 | |
... | ... | @@ -412,174 +387,11 @@ let nosem_refl_schema_field = |
412 | 387 | {gf=NOSEM; role=""; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[Phrase(Lex "się")]} |
413 | 388 | |
414 | 389 | |
415 | -let rec expand_lexicalizations_schema schema = | |
416 | - Xlist.map schema (fun s -> | |
417 | - {s with morfs=expand_lexicalizations_morfs s.morfs}) | |
418 | - | |
419 | -and expand_lexicalizations_morfs morfs = (* uproszczenie polegające na zezwoleniu na koordynację przy zwiększaniu ilości LexPhrase *) | |
420 | - List.flatten (Xlist.map morfs (fun morf -> | |
421 | - let morf = match morf with | |
422 | - LexPhrase(pos_lex,(restr,schema)) -> LexPhrase(pos_lex,(restr,expand_lexicalizations_schema schema)) | |
423 | - | morf -> morf in | |
424 | - match morf with | |
425 | -(* LexPhrase([ADV _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf] *) | |
426 | -(* | LexPhrase([PREP _,_;SUBST _,_],(_,schema)) -> if remove_trivial_args schema <> [] then print_endline (ENIAMwalStringOf.morf morf); [morf] *) | |
427 | -(* | LexPhrase([PREP _,_;GER _,_],(_,schema)) -> if remove_trivial_args schema <> [] then print_endline (ENIAMwalStringOf.morf morf); [morf] *) | |
428 | -(* | LexPhrase([NUM _,_;_],(_,schema)) -> if remove_trivial_args schema <> [] then print_endline (ENIAMwalStringOf.morf morf); [morf] *) | |
429 | -(* | LexPhrase([PREP _,_;NUM _,_;_],(_,schema)) -> if remove_trivial_args schema <> [] then print_endline (ENIAMwalStringOf.morf morf); [morf] *) | |
430 | -(* | LexPhrase([PREP _,_;ADJ _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf] | |
431 | - | LexPhrase([PREP _,_;PPAS _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf] | |
432 | - | LexPhrase([PREP _,_;PACT _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf] *) | |
433 | - | Phrase(PrepNumP(_,prep,case)) -> [LexPhrase([PREP case,Lexeme prep],(Ratrs,[prep_arg_schema_field2 [Phrase(NumP(case))]]))] | |
434 | - | LexPhrase([PREP pcase,plex;SUBST(n,c),slex],(Atr1,[{morfs=[LexPhrase([QUB,_],_)]} as s])) -> | |
435 | -(* print_endline (ENIAMwalStringOf.morf morf); *) | |
436 | - [LexPhrase([PREP pcase,plex],(Ratrs,[prep_arg_schema_field [LexPhrase([SUBST(n,c),slex],(Natr,[]))]])); | |
437 | - LexPhrase([PREP pcase,plex],(Ratrs,[prep_arg_schema_field [LexPhrase([SUBST(n,c),slex],(Natr,[]))];{s with dir=Backward}]))] | |
438 | - | LexPhrase([PREP(pcase),plex;SUBST(n,c),slex],(Atr1,[{morfs=[LexPhrase([ADV _,_],_)]} as s])) -> | |
439 | -(* print_endline (ENIAMwalStringOf.morf morf); *) | |
440 | - [LexPhrase([PREP pcase,plex],(Ratrs,[prep_arg_schema_field [LexPhrase([SUBST(n,c),slex],(Natr,[]))]])); | |
441 | - LexPhrase([PREP pcase,plex],(Ratrs,[prep_arg_schema_field [LexPhrase([SUBST(n,c),slex],(Natr,[]))];{s with dir=Backward}]))] | |
442 | - | LexPhrase([PREP pcase,plex;SUBST(n,c),slex],(Ratr1,[{morfs=[LexPhrase([ADV _,_],_)]} as s])) -> | |
443 | -(* print_endline (ENIAMwalStringOf.morf morf); *) | |
444 | - [LexPhrase([PREP pcase,plex],(Ratrs,[prep_arg_schema_field [LexPhrase([SUBST(n,c),slex],(Natr,[]))];{s with dir=Backward}]))] | |
445 | - | LexPhrase([PREP pcase,plex;pos,lex],restr) -> | |
446 | - [LexPhrase([PREP pcase,plex],(Ratrs,[prep_arg_schema_field [LexPhrase([pos,lex],restr)]]))] | |
447 | - | LexPhrase([PREP pcase,plex;NUM(c,g,a),nlex;pos,lex],restr) -> | |
448 | - let genders,lexs = split_elexeme lex in | |
449 | - Xlist.map genders (fun gender -> | |
450 | - LexPhrase([PREP pcase,plex],(Ratrs,[prep_arg_schema_field [LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[num_arg_schema_field [Phrase Pro]]))]]))) @ | |
451 | - Xlist.map lexs (fun lex -> | |
452 | - LexPhrase([PREP pcase,plex],(Ratrs,[prep_arg_schema_field [LexPhrase([NUM(c,g,a),nlex],(Ratrs,[num_arg_schema_field [LexPhrase([pos,lex],restr)]]))]]))) | |
453 | - | LexPhrase([NUM(c,g,a),nlex;pos,lex],restr) -> | |
454 | - let genders,lexs = split_elexeme lex in | |
455 | - Xlist.map genders (fun gender -> | |
456 | - LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[num_arg_schema_field [Phrase Pro]]))) @ | |
457 | - Xlist.map lexs (fun lex -> | |
458 | - LexPhrase([NUM(c,g,a),nlex],(Ratrs,[num_arg_schema_field [LexPhrase([pos,lex],restr)]]))) | |
459 | - | LexPhrase([COMP ctype,clex;pos,lex],restr) -> | |
460 | - [LexPhrase([COMP ctype,clex],(Ratrs,[std_arg_schema_field Forward [LexPhrase([pos,lex],restr)]]))] | |
461 | - | LexPhrase([SUBST(n,c),slex;COMP ctype,clex;pos,lex],restr) -> | |
462 | - [LexPhrase([SUBST(n,c),slex],(Ratrs,[std_arg_schema_field Forward [LexPhrase([COMP ctype,clex],(Ratrs,[std_arg_schema_field Forward [LexPhrase([pos,lex],restr)]]))]]))] (* FIXME: poprawić po zrobieniu NCP *) | |
463 | - | LexPhrase(_::_::_,_) -> failwith ("expand_lexicalizations_morfs: " ^ ENIAMwalStringOf.morf morf) | |
464 | -(* | LexPhrase([PREP pcase,plex;SUBST(n,c),slex],(Atr1,[gf,cr,ce,[LexPhrase([QUB,lex],arestr)]])) -> | |
465 | -(* print_endline (ENIAMwalStringOf.morf morf); *) | |
466 | - [LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))]])); | |
467 | - LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))];gf,cr,ce,[LexPhrase([QUB,lex],arestr)]]))] | |
468 | - | LexPhrase([PREP(pcase),plex;SUBST(n,c),slex],(Atr1,[gf,cr,ce,[LexPhrase([ADV gr,lex],arestr)]])) -> | |
469 | -(* print_endline (ENIAMwalStringOf.morf morf); *) | |
470 | - [LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))]])); | |
471 | - LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))];gf,cr,ce,[LexPhrase([ADV gr,lex],arestr)]]))] | |
472 | - | LexPhrase([PREP pcase,plex;SUBST(n,c),slex],(Ratr1,[gf,cr,ce,[LexPhrase([ADV gr,lex],arestr)]])) -> | |
473 | -(* print_endline (ENIAMwalStringOf.morf morf); *) | |
474 | - [LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([SUBST(n,c),slex],(Natr,[]))];gf,cr,ce,[LexPhrase([ADV gr,lex],arestr)]]))] | |
475 | - | LexPhrase([PREP pcase,plex;pos,lex],restr) -> | |
476 | - [LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))] | |
477 | - | LexPhrase([PREP pcase,plex;NUM(c,g,a),nlex;pos,lex],restr) -> | |
478 | - let genders,lexs = split_elexeme lex in | |
479 | - Xlist.map genders (fun gender -> | |
480 | - LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[("OBJ","QUANT-ARG",["T"]),[],[],[Phrase Pro]]))]]))) @ | |
481 | - Xlist.map lexs (fun lex -> | |
482 | - LexPhrase([PREP pcase,plex],(Ratrs,[("OBJ","Ref",["T"]),[],[],[LexPhrase([NUM(c,g,a),nlex],(Ratrs,[("OBJ","QUANT-ARG",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))]]))) | |
483 | - | LexPhrase([NUM(c,g,a),nlex;pos,lex],restr) -> | |
484 | - let genders,lexs = split_elexeme lex in | |
485 | - Xlist.map genders (fun gender -> | |
486 | - LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[("OBJ","QUANT-ARG",["T"]),[],[],[Phrase Pro]]))) @ | |
487 | - Xlist.map lexs (fun lex -> | |
488 | - LexPhrase([NUM(c,g,a),nlex],(Ratrs,[("OBJ","QUANT-ARG",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))) | |
489 | - | LexPhrase([COMP ctype,clex;pos,lex],restr) -> | |
490 | - [LexPhrase([COMP ctype,clex],(Ratrs,[("C","",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))] | |
491 | - | LexPhrase([SUBST(n,c),slex;COMP ctype,clex;pos,lex],restr) -> | |
492 | - [LexPhrase([SUBST(n,c),slex],(Ratrs,[("OBJ","",["T"]),[],[],[LexPhrase([COMP ctype,clex],(Ratrs,[("C","",["T"]),[],[],[LexPhrase([pos,lex],restr)]]))]]))] | |
493 | - | LexPhrase(_::_::_,_) -> failwith ("expand_lexicalizations_morfs: " ^ ENIAMwalStringOf.morf morf)*) | |
494 | - | morf -> [morf])) | |
495 | - | |
496 | 390 | let expand_lexicalizations = function |
497 | 391 | Frame(atrs,schema) -> Frame(atrs,expand_lexicalizations_schema schema) |
498 | 392 | (* ComprepFrame(s,morfs) -> ComprepFrame(atrs,expand_lexicalizations_morfs morfs) *) |
499 | 393 | | _ -> failwith "expand_lexicalizations" |
500 | 394 | |
501 | -let lex_id_counter = ref 0 | |
502 | - | |
503 | -let get_lex_id () = | |
504 | - incr lex_id_counter; | |
505 | - string_of_int (!lex_id_counter) | |
506 | - | |
507 | -let get_pos lex = function | |
508 | - SUBST _ -> | |
509 | - (match lex with | |
510 | - "ja" -> ["ppron12"] | |
511 | - | "my" -> ["ppron12"] | |
512 | - | "ty" -> ["ppron12"] | |
513 | - | "wy" -> ["ppron12"] | |
514 | - | "on" -> ["ppron3"] | |
515 | - | "siebie" -> ["siebie"] | |
516 | - | "się" -> ["qub"] | |
517 | - | _ -> ["subst"]) | |
518 | - | PREP _ -> ["prep"] | |
519 | - | NUM _ -> ["num"] | |
520 | - | ADV _ -> ["adv"] | |
521 | - | ADJ _ -> ["adj"] | |
522 | - | GER _ -> ["ger"] | |
523 | - | PPAS _ -> ["ppas"] | |
524 | - | PACT _ -> ["pact"] | |
525 | - | PERS _ -> ["fin";"praet";"winien"(*;"impt";"imps"*);"pred"] | |
526 | - | INF _ -> ["inf"] | |
527 | - | QUB -> ["qub"] | |
528 | - | COMPAR -> ["compar"] | |
529 | - | COMP _ -> ["comp"] | |
530 | - | |
531 | -let rec extract_lex_frames lexeme p frames = function | |
532 | - Frame(atrs,schema) -> | |
533 | - let schema,frames = Xlist.fold schema ([],frames) (fun (schema,frames) s -> | |
534 | - let morfs,frames = Xlist.fold s.morfs ([],frames) extract_lex_morf in | |
535 | - {s with morfs=List.rev morfs} :: schema, frames) in | |
536 | - (lexeme,p,Frame(atrs,List.rev schema)) :: frames | |
537 | - | LexFrame(id,pos,restr,schema) -> | |
538 | - let schema,frames = Xlist.fold schema ([],frames) (fun (schema,frames) s -> | |
539 | - let morfs,frames = Xlist.fold s.morfs ([],frames) extract_lex_morf in | |
540 | - {s with morfs=List.rev morfs} :: schema, frames) in | |
541 | - (lexeme,p,LexFrame(id,pos,restr,List.rev schema)) :: frames | |
542 | - | ComprepFrame(s,pos,restr,schema) -> | |
543 | - let schema,frames = Xlist.fold schema ([],frames) (fun (schema,frames) s -> | |
544 | - let morfs,frames = Xlist.fold s.morfs ([],frames) extract_lex_morf in | |
545 | - {s with morfs=List.rev morfs} :: schema, frames) in | |
546 | - (lexeme,p,ComprepFrame(s,pos,restr,List.rev schema)) :: frames | |
547 | -(* | _ -> failwith "extract_lex_frames" *) | |
548 | - | |
549 | -and extract_lex_morf (morfs,frames) = function | |
550 | - LexPhrase([pos,lex],(restr,schema)) -> | |
551 | - let id = get_lex_id () in | |
552 | - let lexemes = ENIAMwalParser.get_lexemes lex in | |
553 | - let frames = Xlist.fold lexemes frames (fun frames lexeme -> | |
554 | - let poss = get_pos lexeme pos in | |
555 | - Xlist.fold poss frames (fun frames p -> | |
556 | - extract_lex_frames lexeme p frames (LexFrame(id,pos,restr,schema)))) in | |
557 | - LexPhraseId(id,pos,lex) :: morfs, frames | |
558 | - | LexPhrase _ -> failwith "extract_lex_morf" | |
559 | - | morf -> morf :: morfs, frames | |
560 | - | |
561 | -let split_xor schema = | |
562 | - Xlist.multiply_list (Xlist.map schema (fun s -> | |
563 | - Xlist.map (Xlist.multiply_list (Xlist.map s.morfs (function | |
564 | - LexPhraseId(id,pos,XOR l) -> Xlist.map l (fun lex -> LexPhraseId(id,pos,lex)) | |
565 | - | LexPhraseId(id,pos,lex) -> [LexPhraseId(id,pos,lex)] | |
566 | - | morf -> [morf]))) (fun morfs -> {s with morfs=morfs}))) | |
567 | - | |
568 | -let split_or_coord schema = | |
569 | - Xlist.map schema (fun s -> | |
570 | - {s with morfs=List.flatten (Xlist.map s.morfs (function | |
571 | - LexPhraseId(id,pos,ORcoord l) -> Xlist.map l (fun lex -> LexPhraseId(id,pos,lex)) | |
572 | - | LexPhraseId(id,pos,ORconcat l) -> Xlist.map l (fun lex -> LexPhraseId(id,pos,lex)) (* FIXME: koordynacja zamiast konkatenacji *) | |
573 | - | LexPhraseId(id,pos,lex) -> [LexPhraseId(id,pos,lex)] | |
574 | - | morf -> [morf]))}) | |
575 | - | |
576 | -let simplify_lex schemas = | |
577 | - Xlist.map schemas (fun schema -> | |
578 | - Xlist.map schema (fun s -> | |
579 | - {s with morfs=Xlist.map s.morfs (function | |
580 | - LexPhraseId(id,pos,Lexeme lex) -> LexArg(id,pos,lex) | |
581 | - | LexPhraseId _ as morf -> failwith ("simplify_lex: " ^ ENIAMwalStringOf.morf morf) | |
582 | - | morf -> morf)})) | |
583 | 395 | |
584 | 396 | let prepare_schema_comprep expands subtypes equivs schema = |
585 | 397 | assign_pro_args (assign_role_and_sense (ENIAMwalParser.expand_equivs_schema equivs (ENIAMwalParser.expand_subtypes subtypes (ENIAMwalParser.expand_schema expands schema)))) |
... | ... | @@ -827,51 +639,6 @@ let remove_pro_args schema = (* FIXME: sprawdzić czy Pro i Null są zawsze na p |
827 | 639 | | {morfs=(Phrase Null) :: morfs} as s -> {s with morfs=morfs} :: schema |
828 | 640 | | s -> s :: schema)) |
829 | 641 | |
830 | -let rec expand_restr valence lexeme pos = function | |
831 | - LexFrame(id,pos2,Natr,[]) -> [LexFrame(id,pos2,NoRestr,[])] | |
832 | - | LexFrame(id,pos2,Natr,_) -> failwith "expand_restr" | |
833 | - | LexFrame(id,pos2,restr,[]) -> | |
834 | -(* print_endline "expand_restr"; *) | |
835 | - let frames = try StringMap.find (StringMap.find valence lexeme) pos with Not_found -> failwith ("expand_restr:" ^ lexeme ^ " " ^ pos) in | |
836 | -(* Printf.printf "%s %s %d\n" lexeme pos (Xlist.size frames); | |
837 | - Xlist.iter frames (fun frame -> print_endline (ENIAMwalStringOf.frame lexeme frame)); | |
838 | - print_endline "";*) | |
839 | - (if restr = Atr || restr = Atr1 then [LexFrame(id,pos2,NoRestr,[])] else []) @ | |
840 | - (Xlist.fold frames [] (fun frames -> function | |
841 | - Frame(_,schema) -> | |
842 | - let schema = remove_pro_args schema in | |
843 | - if schema = [] then frames else | |
844 | - (expand_restr valence lexeme pos (LexFrame(id,pos2,restr,schema))) @ frames | |
845 | - | _ -> frames)) | |
846 | - | LexFrame(id,pos2,Atr,schema) -> | |
847 | - let schemas = Xlist.map (Xlist.multiply_list (Xlist.map schema (fun x -> [[x];[]]))) List.flatten in | |
848 | - Xlist.map schemas (fun schema -> LexFrame(id,pos2,NoRestr,schema)) | |
849 | - | LexFrame(id,pos2,Atr1,schema) -> | |
850 | - LexFrame(id,pos2,NoRestr,[]) :: (Xlist.map schema (fun x -> LexFrame(id,pos2,NoRestr,[x]))) | |
851 | - | LexFrame(id,pos2,Ratr,schema) -> | |
852 | - let schemas = Xlist.map (Xlist.multiply_list (Xlist.map schema (fun x -> [[x];[]]))) List.flatten in | |
853 | - Xlist.fold schemas [] (fun schemas schema -> if schema = [] then schemas else LexFrame(id,pos2,NoRestr,schema) :: schemas) | |
854 | - | LexFrame(id,pos2,Ratr1,schema) -> | |
855 | - Xlist.map schema (fun x -> LexFrame(id,pos2,NoRestr,[x])) | |
856 | - | LexFrame(id,pos2,Ratrs,schema) -> [LexFrame(id,pos2,NoRestr,schema)] | |
857 | - | LexFrame(id,pos2,NoRestr,_) -> failwith "expand_restr" | |
858 | - | ComprepFrame(s,pos2,Natr,[]) -> [ComprepFrame(s,pos2,NoRestr,[])] | |
859 | - | ComprepFrame(s,pos2,Natr,_) -> failwith "expand_restr" | |
860 | - | ComprepFrame(s,pos2,restr,[]) as frame -> failwith ("expand_restr: " ^ ENIAMwalStringOf.frame lexeme frame) | |
861 | - | ComprepFrame(s,pos2,Atr,schema) -> | |
862 | - let schemas = Xlist.map (Xlist.multiply_list (Xlist.map schema (fun x -> [[x];[]]))) List.flatten in | |
863 | - Xlist.map schemas (fun schema -> ComprepFrame(s,pos2,NoRestr,schema)) | |
864 | - | ComprepFrame(s,pos2,Atr1,schema) -> | |
865 | - ComprepFrame(s,pos2,NoRestr,[]) :: (Xlist.map schema (fun x -> ComprepFrame(s,pos2,NoRestr,[x]))) | |
866 | - | ComprepFrame(s,pos2,Ratr,schema) -> | |
867 | - let schemas = Xlist.map (Xlist.multiply_list (Xlist.map schema (fun x -> [[x];[]]))) List.flatten in | |
868 | - Xlist.fold schemas [] (fun schemas schema -> if schema = [] then schemas else ComprepFrame(s,pos2,NoRestr,schema) :: schemas) | |
869 | - | ComprepFrame(s,pos2,Ratr1,schema) -> | |
870 | - Xlist.map schema (fun x -> ComprepFrame(s,pos2,NoRestr,[x])) | |
871 | - | ComprepFrame(s,pos2,Ratrs,schema) -> [ComprepFrame(s,pos2,NoRestr,schema)] | |
872 | - | ComprepFrame(s,pos2,NoRestr,_) -> failwith "expand_restr" | |
873 | - | Frame _ as frame -> [frame] | |
874 | -(* | _ -> failwith "expand_restr" *) | |
875 | 642 | |
876 | 643 | let simplify_pos = function |
877 | 644 | "subst" -> "noun" |
... | ... |
walenty/ENIAMwalLex.ml
... | ... | @@ -24,7 +24,7 @@ let prep_arg_schema_field morfs = |
24 | 24 | (* {gf=CORE; role="Ref"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Forward; morfs=morfs} (* FIXME: uporządkować sensy *) |
25 | 25 | |
26 | 26 | let prep_arg_schema_field2 morfs = *) |
27 | - {psn_id=(-1); gf=ARG(*CORE*); role=""(*"Ref"*); role_attr=""; sel_prefs=[(*"ALL"*)]; cr=[]; ce=[]; (*dir=Forward;*) morfs=morfs} (* FIXME: uporządkować sensy *) | |
27 | + {psn_id=(-1); gf=ARG(*CORE*); role=""(*"Ref"*); role_attr=""; mode=[]; sel_prefs=[(*"ALL"*)]; cr=[]; ce=[]; (*dir=Forward;*) morfs=morfs} (* FIXME: uporządkować sensy *) | |
28 | 28 | |
29 | 29 | let rec split_elexeme = function |
30 | 30 | Lexeme s -> [],[Lexeme s] |
... | ... | @@ -49,7 +49,6 @@ let rec expand_lexicalizations_schema schema = |
49 | 49 | Xlist.map schema (fun s -> |
50 | 50 | {s with morfs=expand_lexicalizations_morfs s.morfs}) |
51 | 51 | |
52 | -(* FIXME: LexPhraseMode *) | |
53 | 52 | and expand_lexicalizations_morfs morfs = (* uproszczenie polegające na zezwoleniu na koordynację przy zwiększaniu ilości LexPhrase *) |
54 | 53 | List.flatten (Xlist.map morfs (fun morf -> |
55 | 54 | let morf = match morf with |
... | ... | @@ -127,11 +126,12 @@ and expand_lexicalizations_morfs morfs = (* uproszczenie polegające na zezwolen |
127 | 126 | | LexPhrase(_::_::_,_) -> failwith ("expand_lexicalizations_morfs: " ^ ENIAMwalStringOf.morf morf)*) |
128 | 127 | | morf -> [morf])) |
129 | 128 | |
130 | -let lex_id_counter = ref 0 | |
131 | - | |
132 | -let get_lex_id () = | |
133 | - incr lex_id_counter; | |
134 | - string_of_int (!lex_id_counter) | |
129 | +let rec get_lexemes = function | |
130 | + Lexeme s -> [s] | |
131 | + | ORconcat l -> List.flatten (Xlist.map l get_lexemes) | |
132 | + | ORcoord l -> List.flatten (Xlist.map l get_lexemes) | |
133 | + | XOR l -> List.flatten (Xlist.map l get_lexemes) | |
134 | + | Elexeme gender -> failwith "get_lexemes" | |
135 | 135 | |
136 | 136 | let get_pos lex = function |
137 | 137 | SUBST _ -> |
... | ... | @@ -158,63 +158,142 @@ let get_pos lex = function |
158 | 158 | | COMP _ -> ["comp"] |
159 | 159 | | FIXED -> ["fixed"] |
160 | 160 | |
161 | -let rec extract_lex_frames lexeme p frames = function | |
162 | - Frame(atrs,schema) -> | |
163 | - let schema,frames = Xlist.fold schema ([],frames) (fun (schema,frames) s -> | |
164 | - let morfs,frames = Xlist.fold s.morfs ([],frames) extract_lex_morf in | |
165 | - {s with morfs=List.rev morfs} :: schema, frames) in | |
166 | - (lexeme,p,Frame(atrs,List.rev schema)) :: frames | |
167 | - | LexFrame(id,pos,restr,schema) -> | |
168 | - let schema,frames = Xlist.fold schema ([],frames) (fun (schema,frames) s -> | |
169 | - let morfs,frames = Xlist.fold s.morfs ([],frames) extract_lex_morf in | |
170 | - {s with morfs=List.rev morfs} :: schema, frames) in | |
171 | - (lexeme,p,LexFrame(id,pos,restr,List.rev schema)) :: frames | |
172 | - | ComprepFrame(s,pos,restr,schema) -> | |
173 | - let schema,frames = Xlist.fold schema ([],frames) (fun (schema,frames) s -> | |
174 | - let morfs,frames = Xlist.fold s.morfs ([],frames) extract_lex_morf in | |
175 | - {s with morfs=List.rev morfs} :: schema, frames) in | |
176 | - (lexeme,p,ComprepFrame(s,pos,restr,List.rev schema)) :: frames | |
177 | -(* | _ -> failwith "extract_lex_frames" *) | |
178 | - | |
179 | -and extract_lex_morf (morfs,frames) = function | |
180 | - LexPhrase([pos,lex],(restr,schema)) -> | |
161 | +let lex_id_counter = ref 0 | |
162 | + | |
163 | +let get_lex_id () = | |
164 | + incr lex_id_counter; | |
165 | + !lex_id_counter | |
166 | + | |
167 | +(* FIXME: to trzeba będzie poprawić przy unlike coordination *) | |
168 | +(* FIXME: słownik pos wywołuje redundancję *) | |
169 | +let rec extract_lex_entries (morfs,entries) = function | |
170 | + LexPhrase([pos,lex],(Natr,[])) -> | |
171 | + let lexemes = get_lexemes lex in | |
172 | + let entries = Xlist.fold lexemes entries (fun entries lemma -> | |
173 | + Xlist.fold (get_pos lemma pos) entries (fun entries pos2 -> | |
174 | + let entries2 = try StringMap.find entries pos2 with Not_found -> StringMap.empty in | |
175 | + let entry = SimpleLexEntry(lemma,pos2) in | |
176 | + let entries2 = StringMap.add_inc entries2 lemma (EntrySet.singleton entry) (fun set -> EntrySet.add set entry) in | |
177 | + StringMap.add entries pos2 entries2)) in | |
178 | + let morfs = Xlist.fold lexemes morfs (fun morfs lemma -> SimpleLexArg(lemma,pos) :: morfs) in | |
179 | + morfs,entries | |
180 | + | LexPhrase([pos,lex],(restr,schema)) -> | |
181 | 181 | let id = get_lex_id () in |
182 | - let lexemes = ENIAMwalRealizations.get_lexemes lex in | |
183 | - let frames = Xlist.fold lexemes frames (fun frames lexeme -> | |
184 | - let poss = get_pos lexeme pos in | |
185 | - Xlist.fold poss frames (fun frames p -> | |
186 | - extract_lex_frames lexeme p frames (LexFrame(id,pos,restr,schema)))) in | |
187 | - LexPhraseId(id,pos,lex) :: morfs, frames (* FIXME: Czy potrzebne jest tworzenie LexFrame, gdy schema=[] ? *) | |
188 | - | LexPhrase _ -> failwith "extract_lex_morf" | |
189 | - | morf -> morf :: morfs, frames | |
190 | - | |
191 | -let split_xor schema = | |
192 | - Xlist.multiply_list (Xlist.map schema (fun s -> | |
193 | - Xlist.map (Xlist.multiply_list (Xlist.map s.morfs (function | |
194 | - LexPhraseId(id,pos,XOR l) -> Xlist.map l (fun lex -> LexPhraseId(id,pos,lex)) | |
195 | - | LexPhraseId(id,pos,lex) -> [LexPhraseId(id,pos,lex)] | |
196 | - | morf -> [morf]))) (fun morfs -> {s with morfs=morfs}))) | |
197 | - | |
198 | -let split_or_coord schema = | |
199 | - Xlist.map schema (fun s -> | |
200 | - {s with morfs=List.flatten (Xlist.map s.morfs (function | |
201 | - LexPhraseId(id,pos,ORcoord l) -> Xlist.map l (fun lex -> LexPhraseId(id,pos,lex)) | |
202 | - | LexPhraseId(id,pos,ORconcat l) -> Xlist.map l (fun lex -> LexPhraseId(id,pos,lex)) (* FIXME: koordynacja zamiast konkatenacji *) | |
203 | - | LexPhraseId(id,pos,lex) -> [LexPhraseId(id,pos,lex)] | |
204 | - | morf -> [morf]))}) | |
205 | - | |
206 | -let simplify_lex schemas = | |
207 | - Xlist.map schemas (fun schema -> | |
208 | - Xlist.map schema (fun s -> | |
209 | - {s with morfs=Xlist.map s.morfs (function | |
210 | - LexPhraseId(id,pos,Lexeme lex) -> LexArg(id,pos,lex) | |
211 | - | LexPhraseId _ as morf -> failwith ("simplify_lex: " ^ ENIAMwalStringOf.morf morf) | |
212 | - | morf -> morf)})) | |
182 | + let lexemes = get_lexemes lex in | |
183 | + let schema,entries = extract_lex_entries_schema entries schema in | |
184 | + let entries = Xlist.fold lexemes entries (fun entries lemma -> | |
185 | + Xlist.fold (get_pos lemma pos) entries (fun entries pos2 -> | |
186 | + let entries2 = try StringMap.find entries pos2 with Not_found -> StringMap.empty in | |
187 | + let entry = LexEntry(id,lemma,pos2,restr,schema) in | |
188 | + let entries2 = StringMap.add_inc entries2 lemma (EntrySet.singleton entry) (fun set -> EntrySet.add set entry) in | |
189 | + StringMap.add entries pos2 entries2)) in | |
190 | + let morfs = Xlist.fold lexemes morfs (fun morfs lemma -> LexArg(id,lemma,pos) :: morfs) in | |
191 | + morfs,entries | |
192 | + | LexPhrase _ as morf -> failwith ("extract_lex_entries: " ^ ENIAMwalStringOf.morf morf) | |
193 | + | morf -> morf :: morfs, entries | |
213 | 194 | |
195 | +and extract_lex_entries_schema entries schema = | |
196 | + let schema,entries = Xlist.fold schema ([],entries) (fun (schema,entries) p -> | |
197 | + let morfs,entries = Xlist.fold p.morfs ([],entries) extract_lex_entries in | |
198 | + {p with morfs=List.rev morfs} :: schema, entries) in | |
199 | + List.rev schema, entries | |
200 | + | |
201 | +let extract_lex_entries_comprepnp entries compreps = | |
202 | + Xlist.fold compreps entries (fun entries (clemma,morfs) -> | |
203 | + Xlist.fold morfs entries (fun entries -> function | |
204 | + LexPhrase([pos,lex],(Natr,[])) -> failwith "extract_lex_entries_comprepnp" | |
205 | + | LexPhrase([pos,lex],(restr,schema)) -> | |
206 | + let lexemes = get_lexemes lex in | |
207 | + let schema,entries = extract_lex_entries_schema entries schema in | |
208 | + Xlist.fold lexemes entries (fun entries lemma -> | |
209 | + Xlist.fold (get_pos lemma pos) entries (fun entries pos2 -> | |
210 | + let entries2 = try StringMap.find entries pos2 with Not_found -> StringMap.empty in | |
211 | + let entry = ComprepNPEntry(clemma,restr,schema) in | |
212 | + let entries2 = StringMap.add_inc entries2 lemma (EntrySet.singleton entry) (fun set -> EntrySet.add set entry) in | |
213 | + StringMap.add entries pos2 entries2)) | |
214 | + | _ -> failwith "extract_lex_entries_comprepnp")) | |
215 | + | |
216 | +let phrases,entries = | |
217 | + let compreps = Xlist.map ENIAMwalRealizations.compreps (fun (lemma,morfs) -> | |
218 | + lemma, expand_lexicalizations_morfs morfs) in | |
219 | + let entries = extract_lex_entries_comprepnp StringMap.empty compreps in | |
220 | + IntMap.fold ENIAMwalRealizations.phrases (IntMap.empty,entries) (fun (phrases,entries) id morfs -> | |
221 | + let morfs = expand_lexicalizations_morfs morfs in | |
222 | + let morfs,entries = Xlist.fold morfs ([],entries) extract_lex_entries in | |
223 | + IntMap.add phrases id morfs, entries) | |
224 | + | |
225 | +let print_entries entries = | |
226 | + StringMap.iter entries (fun pos entries2 -> | |
227 | + StringMap.iter entries2 (fun lemma entries3 -> | |
228 | + EntrySet.iter entries3 (fun entry -> | |
229 | + Printf.printf "%s: %s: %s\n" pos lemma (ENIAMwalStringOf.entry entry)))) | |
230 | + | |
231 | +(* let _ = print_entries entries *) | |
232 | + | |
233 | +let rec expand_restr valence lexeme pos = function | |
234 | + SimpleLexEntry(lemma,pos2) -> [SimpleLexEntry(lemma,pos2)] | |
235 | + (* | LexEntry(id,lemma,pos2,Natr,[]) -> [LexEntry(id,lemma,pos2,NoRestr,[])] *) | |
236 | + | LexEntry(id,lemma,pos2,Natr,_) -> failwith "expand_restr" | |
237 | + | LexEntry(id,lemma,pos2,restr,[]) -> | |
238 | + print_endline (lexeme ^ " " ^ pos); | |
239 | + [LexEntry(id,lemma,pos2,restr,[])] (* FIXME *) | |
240 | +(* (* print_endline "expand_restr"; *) | |
241 | + let frames = try StringMap.find (StringMap.find valence lexeme) pos | |
242 | + with Not_found -> failwith ("expand_restr:" ^ lexeme ^ " " ^ pos) in | |
243 | + (* Printf.printf "%s %s %d\n" lexeme pos (Xlist.size frames); | |
244 | + Xlist.iter frames (fun frame -> print_endline (ENIAMwalStringOf.frame lexeme frame)); | |
245 | + print_endline "";*) | |
246 | + (if restr = Atr || restr = Atr1 then [LexEntry(id,lemma,pos2,NoRestr,[])] else []) @ | |
247 | + (Xlist.fold frames [] (fun frames -> function | |
248 | + Frame(_,schema) -> | |
249 | + let schema = remove_pro_args schema in | |
250 | + if schema = [] then frames else | |
251 | + (expand_restr valence lexeme pos (LexEntry(id,lemma,pos2,restr,schema))) @ frames | |
252 | + | _ -> frames))*) | |
253 | + | LexEntry(id,lemma,pos2,Atr,schema) -> | |
254 | + let schema = Xlist.map schema (fun p -> {p with morfs=Phrase Null :: p.morfs}) in | |
255 | + [LexEntry(id,lemma,pos2,NoRestr,schema)] | |
256 | + | LexEntry(id,lemma,pos2,Atr1,schema) -> | |
257 | + LexEntry(id,lemma,pos2,NoRestr,[]) :: (Xlist.map schema (fun x -> LexEntry(id,lemma,pos2,NoRestr,[x]))) | |
258 | + | LexEntry(id,lemma,pos2,Ratr,schema) -> | |
259 | + let schemas = Xlist.map (Xlist.multiply_list (Xlist.map schema (fun x -> [[x];[]]))) List.flatten in | |
260 | + Xlist.fold schemas [] (fun schemas schema -> | |
261 | + if schema = [] then schemas else LexEntry(id,lemma,pos2,NoRestr,schema) :: schemas) | |
262 | + | LexEntry(id,lemma,pos2,Ratr1,schema) -> | |
263 | + Xlist.map schema (fun x -> LexEntry(id,lemma,pos2,NoRestr,[x])) | |
264 | + | LexEntry(id,lemma,pos2,Ratrs,schema) -> [LexEntry(id,lemma,pos2,NoRestr,schema)] | |
265 | + | LexEntry(id,lemma,pos2,NoRestr,_) -> failwith "expand_restr" | |
266 | + (* | ComprepNPEntry(lemma,Natr,[]) -> [ComprepNPEntry(lemma,NoRestr,[])] *) | |
267 | + | ComprepNPEntry(lemma,Natr,_) -> failwith "expand_restr" | |
268 | + | ComprepNPEntry(lemma,restr,[]) as entry -> failwith ("expand_restr: " ^ ENIAMwalStringOf.entry entry) | |
269 | + | ComprepNPEntry(lemma,Atr,schema) -> | |
270 | + let schema = Xlist.map schema (fun p -> {p with morfs=Phrase Null :: p.morfs}) in | |
271 | + [ComprepNPEntry(lemma,NoRestr,schema)] | |
272 | + | ComprepNPEntry(lemma,Atr1,schema) -> | |
273 | + ComprepNPEntry(lemma,NoRestr,[]) :: (Xlist.map schema (fun x -> ComprepNPEntry(lemma,NoRestr,[x]))) | |
274 | + | ComprepNPEntry(lemma,Ratr,schema) -> | |
275 | + let schemas = Xlist.map (Xlist.multiply_list (Xlist.map schema (fun x -> [[x];[]]))) List.flatten in | |
276 | + Xlist.fold schemas [] (fun schemas schema -> | |
277 | + if schema = [] then schemas else ComprepNPEntry(lemma,NoRestr,schema) :: schemas) | |
278 | + | ComprepNPEntry(lemma,Ratr1,schema) -> | |
279 | + Xlist.map schema (fun x -> ComprepNPEntry(lemma,NoRestr,[x])) | |
280 | + | ComprepNPEntry(lemma,Ratrs,schema) -> [ComprepNPEntry(lemma,NoRestr,schema)] | |
281 | + | ComprepNPEntry(lemma,NoRestr,_) -> failwith "expand_restr" | |
282 | + (* | Frame _ as frame -> [frame] *) | |
283 | + | _ -> failwith "expand_restr" | |
284 | + | |
285 | +let entries = | |
286 | + StringMap.mapi entries (fun pos entries2 -> | |
287 | + StringMap.mapi entries2 (fun lemma entries3 -> | |
288 | + EntrySet.fold entries3 [] (fun entries3 entry -> | |
289 | + (expand_restr [] lemma pos entry) @ entries3))) | |
290 | + | |
291 | + | |
292 | +(* | |
214 | 293 | let convert morfs = |
215 | 294 | let morfs = expand_lexicalizations_morfs morfs in |
216 | 295 | let morfs,frames = Xlist.fold morfs ([],[]) extract_lex_morf in |
217 | - Xlist.fold frames(*extract_lex_frames lexeme pos [] frame*) valence (fun valence -> function | |
296 | + (*Xlist.fold frames(*extract_lex_frames lexeme pos [] frame*) valence (fun valence -> function | |
218 | 297 | lexeme,pos,Frame(atrs,schema) -> |
219 | 298 | let schemas = simplify_lex (split_xor (split_or_coord schema)) in |
220 | 299 | Xlist.fold schemas valence (fun valence schema -> |
... | ... | @@ -231,3 +310,4 @@ let convert morfs = |
231 | 310 | |
232 | 311 | |
233 | 312 | let phrases = IntMap.map ENIAMwalRealizations.phrases convert |
313 | +*) | |
... | ... |
walenty/ENIAMwalRealizations.ml
... | ... | @@ -121,14 +121,7 @@ let rec load_realizations_rec (expands,subtypes,equivs) found rev = function |
121 | 121 | | [] :: l -> load_realizations_rec (expands,subtypes,equivs) found rev l |
122 | 122 | | _ -> failwith "load_realizations_rec" |
123 | 123 | |
124 | -let rec get_lexemes = function | |
125 | - Lexeme s -> [s] | |
126 | - | ORconcat l -> List.flatten (Xlist.map l get_lexemes) | |
127 | - | ORcoord l -> List.flatten (Xlist.map l get_lexemes) | |
128 | - | XOR l -> List.flatten (Xlist.map l get_lexemes) | |
129 | - | Elexeme gender -> failwith "get_lexemes" | |
130 | - | |
131 | -let find_comprep_reqs compreps = | |
124 | +(* let find_comprep_reqs compreps = | |
132 | 125 | Xlist.fold compreps StringMap.empty (fun comprep_reqs (s,l) -> |
133 | 126 | let l = Xlist.map l (function |
134 | 127 | LexPhrase(pos_lex,_) -> Xlist.fold pos_lex StringSet.empty (fun set -> function |
... | ... | @@ -139,9 +132,9 @@ let find_comprep_reqs compreps = |
139 | 132 | | _ -> set) *) |
140 | 133 | | morf -> failwith ("find_compreps_reqs: " ^ ENIAMwalStringOf.morf morf)) in |
141 | 134 | if l = [] then failwith "find_compreps_reqs"; |
142 | - StringMap.add comprep_reqs s (StringSet.to_list (Xlist.fold (List.tl l) (List.hd l) StringSet.union))) | |
135 | + StringMap.add comprep_reqs s (StringSet.to_list (Xlist.fold (List.tl l) (List.hd l) StringSet.union))) *) | |
143 | 136 | |
144 | -let create_comprep_dict compreps = | |
137 | +(* let create_comprep_dict compreps = | |
145 | 138 | Xlist.fold compreps StringMap.empty (fun compreps (s,l) -> |
146 | 139 | Xlist.fold l compreps (fun compreps -> function |
147 | 140 | LexPhrase([PREP _,_;SUBST _,lex],_) as morf -> |
... | ... | @@ -157,7 +150,7 @@ let create_comprep_dict compreps = |
157 | 150 | let lexemes = get_lexemes lex in |
158 | 151 | Xlist.fold lexemes compreps (fun compreps lexeme -> |
159 | 152 | StringMap.add_inc compreps lexeme ["subst",(s,morf)] (fun l -> ("subst",(s,morf)) :: l)) |
160 | - | morf -> failwith ("create_comprep_dict: " ^ ENIAMwalStringOf.morf morf))) | |
153 | + | morf -> failwith ("create_comprep_dict: " ^ ENIAMwalStringOf.morf morf))) *) | |
161 | 154 | |
162 | 155 | let load_realizations () = |
163 | 156 | (* let lines = Str.split (Str.regexp "\n") (File.load_file realizations_filename) in |
... | ... | @@ -181,11 +174,11 @@ let load_realizations () = |
181 | 174 | | _ -> failwith "load_realizations 2") in |
182 | 175 | let compreps = Xlist.map compreps (fun (s,morfs) -> |
183 | 176 | s, List.flatten (List.flatten (Xlist.map morfs (fun morf -> Xlist.map (expand_subtypes_morf subtypes (expand_schema_morf expands morf)) (expand_equivs_morf equivs))))) in |
184 | - let comprep_reqs = find_comprep_reqs compreps in | |
177 | + (* let comprep_reqs = find_comprep_reqs compreps in *) | |
185 | 178 | (* let compreps = create_comprep_dict compreps in *) |
186 | - expands(*,compreps*),comprep_reqs,subtypes,equivs | |
179 | + expands,compreps,(*comprep_reqs,*)subtypes,equivs | |
187 | 180 | |
188 | -let expands(*,compreps*),comprep_reqs,subtypes,equivs = load_realizations () | |
181 | +let expands,compreps,(*comprep_reqs,*)subtypes,equivs = load_realizations () | |
189 | 182 | |
190 | 183 | |
191 | 184 | (* Wypisanie realizacji *) |
... | ... |
walenty/ENIAMwalStringOf.ml
... | ... | @@ -284,9 +284,12 @@ and morf = function |
284 | 284 | (* | LexRPhraseMode(m,pos_lex,(r,s)) -> "lex([" ^ m ^ "," ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])" *) |
285 | 285 | | PhraseAbbr(p,ml) -> phrase_abbr p ^ "[" ^ String.concat ";" (Xlist.map ml morf) ^ "]" |
286 | 286 | | PhraseComp(p,(ct,l)) -> phrase_comp p ^ "," ^ comp_type ct ^ "[" ^ String.concat ";" (Xlist.map l comp) ^ "]" |
287 | - | LexPhraseId(id,p,le) -> "lex(" ^ id ^ "," ^ pos p ^ "," ^ lex le ^ ")" | |
288 | - | LexArg(id,p,le) -> "lex(" ^ id ^ "," ^ pos p ^ "," ^ le ^ ")" | |
287 | + (* | LexPhraseId(id,p,le) -> "lex(" ^ id ^ "," ^ pos p ^ "," ^ lex le ^ ")" | |
288 | + | LexArg(id,p,le) -> "lex(" ^ id ^ "," ^ pos p ^ "," ^ le ^ ")" *) | |
289 | + (* | LexPhraseId(id,p,le) -> "lex(" ^ id ^ "," ^ pos p ^ "," ^ lex le ^ ")" *) | |
289 | 290 | | MorfId id -> Printf.sprintf "id(%d)" id |
291 | + | SimpleLexArg(le,p) -> "lex(" ^ le ^ "," ^ pos p ^ ")" | |
292 | + | LexArg(id,le,p) -> "lex(" ^ string_of_int id ^ "," ^ le ^ "," ^ pos p ^ ")" | |
290 | 293 | (* | LexRealization(mrf,le) -> "lex(" ^ morf mrf ^ "," ^ le ^ ")"*) |
291 | 294 | (* | Raised(mrf1,dir,mrf2) -> "raised([" ^ String.concat ";" mrf1 ^ "]," ^ direction dir ^ "[" ^ String.concat ";" mrf2 ^ "])" |
292 | 295 | | Multi l -> "multi(" ^ String.concat ";" (Xlist.map l phrase) ^ ")" *) |
... | ... | @@ -315,10 +318,16 @@ let frame_atrs = function |
315 | 318 | let frame lexeme = function |
316 | 319 | Frame(atrs,s) -> |
317 | 320 | Printf.sprintf "%s: %s: %s" lexeme (frame_atrs atrs) (schema s) |
318 | - | LexFrame(id,p,r,s) -> | |
319 | - Printf.sprintf "%s: %s: %s: %s: %s" lexeme id (pos p) (restr r) (schema s) | |
321 | + | SimpleLexEntry(le,p) -> | |
322 | + Printf.sprintf "%s: %s" le p | |
323 | + | LexEntry(id,le,p,r,s) -> | |
324 | + Printf.sprintf "%d: %s: %s: %s: %s" id le p (restr r) (schema s) | |
325 | + | ComprepNPEntry(le,r,s) -> | |
326 | + Printf.sprintf "%s: %s: %s" le (restr r) (schema s) | |
327 | + (* | LexFrame(id,p,r,s) -> | |
328 | + Printf.sprintf "%s: %s: %s: %s: %s" lexeme id (pos p) (restr r) (schema s) | |
320 | 329 | | ComprepFrame(le,p,r,s) -> |
321 | - Printf.sprintf "%s: %s: %s: %s: %s" lexeme le (pos p) (restr r) (schema s) | |
330 | + Printf.sprintf "%s: %s: %s: %s: %s" lexeme le (pos p) (restr r) (schema s) *) | |
322 | 331 | (* | FrameR(atrs,s) -> |
323 | 332 | Printf.sprintf "%s: %s: %s" lexeme (frame_atrs atrs) (schema_role s) |
324 | 333 | | LexFrameR(id,p,r,s) -> |
... | ... | @@ -327,12 +336,22 @@ let frame lexeme = function |
327 | 336 | Printf.sprintf "%s: %s: %s: %s: %s" lexeme le (pos p) (restr r) (schema_role s)*) |
328 | 337 | (* | _ -> failwith "WalStringOf.frame" *) |
329 | 338 | |
330 | -let fnum_frame lexeme = function | |
339 | +let entry = function | |
340 | + Frame(atrs,s) -> | |
341 | + Printf.sprintf "%s: %s: %s" "lexeme" (frame_atrs atrs) (schema s) | |
342 | + | SimpleLexEntry(le,p) -> | |
343 | + Printf.sprintf "%s: %s" le p | |
344 | + | LexEntry(id,le,p,r,s) -> | |
345 | + Printf.sprintf "%d: %s: %s: %s: %s" id le p (restr r) (schema s) | |
346 | + | ComprepNPEntry(le,r,s) -> | |
347 | + Printf.sprintf "%s: %s: %s" le (restr r) (schema s) | |
348 | + | |
349 | +(* let fnum_frame lexeme = function | |
331 | 350 | fnum,Frame(atrs,s) -> |
332 | 351 | Printf.sprintf "%d: %s: %s: %s" fnum lexeme (frame_atrs atrs) (schema s) |
333 | 352 | | fnum,LexFrame(id,p,r,s) -> |
334 | 353 | Printf.sprintf "%d: %s: %s: %s: %s: %s" fnum lexeme id (pos p) (restr r) (schema s) |
335 | 354 | | fnum,ComprepFrame(le,p,r,s) -> |
336 | - Printf.sprintf "%d: %s: %s: %s: %s: %s" fnum lexeme le (pos p) (restr r) (schema s) | |
355 | + Printf.sprintf "%d: %s: %s: %s: %s: %s" fnum lexeme le (pos p) (restr r) (schema s) *) | |
337 | 356 | |
338 | 357 | let unparsed_frame lexeme (r,o,neg,p,a,s) = lexeme ^ " " ^ String.concat ": " [r;o;neg;p;a;s] |
... | ... |
walenty/ENIAMwalTypes.ml
... | ... | @@ -136,12 +136,14 @@ and morf = |
136 | 136 | (* | LexRPhraseMode of string * (pos * lex) list * (restr * position list) *) |
137 | 137 | | PhraseAbbr of phrase_abbr * morf list |
138 | 138 | | PhraseComp of phrase_comp * (comp_type * comp list) |
139 | - | LexPhraseId of string * pos * lex | |
140 | - | LexArg of string * pos * string | |
139 | + (* | LexPhraseId of string * pos * lex | |
140 | + | LexArg of string * pos * string *) | |
141 | 141 | | MorfId of int |
142 | 142 | (* | LexRealization of morf * string*) |
143 | 143 | (* | Raised of string list * direction * string list |
144 | 144 | | Multi of phrase list*) |
145 | + | SimpleLexArg of string * pos | |
146 | + | LexArg of int * string * pos | |
145 | 147 | |
146 | 148 | let empty_position = |
147 | 149 | {psn_id=(-1); gf=ARG; role=""; role_attr=""; mode=[]; sel_prefs=[]; cr=[]; ce=[]; morfs=[]} |
... | ... | @@ -190,22 +192,30 @@ type frame_atrs = |
190 | 192 | type schema = {sch_id: int; opinion: opinion; reflexiveMark: refl; aspect: aspect; |
191 | 193 | negativity: negation; predicativity: pred; positions: position list; text_rep: string} |
192 | 194 | |
193 | -type schema2 = | |
195 | +type entry2 = | |
194 | 196 | Frame of frame_atrs * position list |
195 | - | LexFrame of string * pos * restr * position list | |
196 | - | ComprepFrame of string * pos * restr * position list | |
197 | + (* | LexFrame of string * pos * restr * position list | |
198 | + | ComprepFrame of string * pos * restr * position list *) | |
199 | + | SimpleLexEntry of string * string | |
200 | + | LexEntry of int * string * string * restr * position list | |
201 | + | ComprepNPEntry of string * restr * position list | |
197 | 202 | (* | FrameR of frame_atrs * (string * string * string list * string list * morf list) list |
198 | 203 | | LexFrameR of string * pos * restr * (string * string * string list * string list * morf list) list |
199 | 204 | | ComprepFrameR of string * pos * restr * (string * string * string list * string list * morf list) list *) |
200 | 205 | |
201 | 206 | |
207 | +module OrderedEntry = struct | |
208 | + type t = entry2 | |
209 | + let compare = compare | |
210 | +end | |
211 | + | |
212 | +module EntrySet = Xset.Make(OrderedEntry) | |
202 | 213 | |
203 | 214 | module OrderedAbbr = struct |
204 | 215 | type t = phrase_abbr |
205 | 216 | let compare = compare |
206 | 217 | end |
207 | 218 | |
208 | -(* module MorfSet = Xset.Make(OrderedMorf) *) | |
209 | 219 | module AbbrMap = Xmap.Make(OrderedAbbr) |
210 | 220 | |
211 | 221 | module OrderedComp = struct |
... | ... |
walenty/makefile
... | ... | @@ -28,8 +28,8 @@ eniam-walenty.cmxa: $(SOURCES) |
28 | 28 | test: test.ml |
29 | 29 | $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml |
30 | 30 | |
31 | -loader: ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml #ENIAMwalLex.ml | |
32 | - $(OCAMLOPT) -o loader $(OCAMLOPTFLAGS) ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml #ENIAMwalLex.ml | |
31 | +loader: ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml ENIAMwalLex.ml | |
32 | + $(OCAMLOPT) -o loader $(OCAMLOPTFLAGS) ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml ENIAMwalLex.ml | |
33 | 33 | |
34 | 34 | .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx |
35 | 35 | |
... | ... |