Commit 25c2221c352fa384a1578c9dc6d155e5391ba4b2

Authored by Wojciech Jaworski
1 parent cf09ae51

obsługa concrafta

pre/.gitignore
1 1 pre
  2 +concraft_test
... ...
pre/concraft_test.ml 0 → 100644
  1 +
  2 +(* let concraft_in, concraft_out, concraft_err = Unix.open_process_full "../../../.local/bin/concraft-pl tag ../concraft/nkjp-model-0.2.gz" [| |] *)
  3 +let concraft_in, concraft_out, concraft_err =
  4 + Unix.open_process_full "concraft-pl tag ../concraft/nkjp-model-0.2.gz"
  5 + [|"PATH=" ^ Sys.getenv "PATH"|]
  6 +
  7 +let _ =
  8 + print_endline "out";
  9 + Printf.fprintf concraft_out "Ala ma kota.\n\n%!";
  10 + print_endline "in";
  11 + print_endline ("concraft error message: " ^ input_line concraft_err);
  12 + ()
... ...
pre/makefile
1 1 OCAMLC=ocamlc
2 2 OCAMLOPT=ocamlopt
3 3 OCAMLDEP=ocamldep
4   -INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../morphology -I ../parser
  4 +INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../morphology -I ../parser -I ../corpora
5 5 OCAMLFLAGS=$(INCLUDES) -g
6 6 OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa inflexion.cmxa
7 7 INSTALLDIR=`ocamlc -where`
8 8  
9 9 WAL= paths.ml walTypes.ml walStringOf.ml preTypes.ml preWordnet.ml walParser.ml walTEI.ml walFrames.ml
10   -PRE= preTokenizer.ml preAcronyms.ml prePatterns.ml prePaths.ml preMWE.ml preSemantics.ml preSentences.ml preProcessing.ml
  10 +PRE= preTokenizer.ml preAcronyms.ml prePatterns.ml prePaths.ml preMWE.ml preSemantics.ml preSentences.ml ../corpora/CONLL.ml preProcessing.ml
11 11  
12 12 all:
13 13 $(OCAMLOPT) -o pre $(OCAMLOPTFLAGS) $(WAL) $(PRE)
14 14  
  15 +concraft_test: concraft_test.ml
  16 + $(OCAMLOPT) -o concraft_test $(OCAMLOPTFLAGS) concraft_test.ml
  17 +
15 18 .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx
16 19  
17 20 .mll.ml:
... ... @@ -33,4 +36,4 @@ all:
33 36 $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $<
34 37  
35 38 clean:
36   - rm -f *~ *.cm[oix] *.o pre
  39 + rm -f *~ *.cm[oix] *.o pre concraft_test
... ...
pre/preProcessing.ml
... ... @@ -604,13 +604,68 @@ let parse_conll tokens dep_paths =
604 604 conll_id + 1) in
605 605 ()
606 606  
  607 +(* let concraft_in, concraft_out, concraft_err = Unix.open_process_full "../../../.local/bin/concraft-pl tag ../concraft/nkjp-model-0.2.gz" [| |] *)
  608 +(*let concraft_in, concraft_out, concraft_err =
  609 + Unix.open_process_full "concraft-pl tag ../concraft/nkjp-model-0.2.gz"
  610 + [|"PATH=" ^ Sys.getenv "PATH"|]
  611 +
  612 +let rec load_concraft_sentence white orth rev ic =
  613 + (* print_endline "load_concraft_sentence 1"; *)
  614 + (* print_endline ("concraft error message: " ^ input_line concraft_err); *)
  615 + let s = input_line ic in
  616 + (* print_endline ("load_concraft_sentence: " ^ s); *)
  617 + if s = "" then List.rev rev else
  618 + match Xstring.split_delim "\t" s with
  619 + [""; lemma; interp; "disamb"] -> load_concraft_sentence "" "" ((white,orth,lemma,interp) :: rev) ic
  620 + | [""; lemma; interp] -> load_concraft_sentence white orth rev ic
  621 + | [orth; white] -> load_concraft_sentence white orth rev ic
  622 + | _ -> failwith ("load_concraft_sentence: " ^ s)
  623 +
  624 +let make_token (white,orth,lemma,interp) =
  625 + let cat,interp = match Xstring.split ":" interp with
  626 + cat :: l -> cat, [Xlist.map l (fun tag -> [tag])]
  627 + | _ -> failwith "make_token" in
  628 + {empty_token with orth = orth; token = Lemma(lemma,cat,interp)}
  629 +
  630 +let parse_mate tokens pbeg s =
  631 + (* print_endline ("parse_mate: " ^ s); *)
  632 + Printf.fprintf concraft_out "%s\n\n%!" s;
  633 + let l = load_concraft_sentence "" "" [] concraft_in in
  634 + let l = Xlist.map l make_token in
  635 + let l = {empty_token with token = Interp "<conll_root>"} :: l in
  636 + let l = Xlist.map l (fun t -> ExtArray.add tokens t,-1,"") in
  637 + let _ = CONLL.establish_for_token pbeg s tokens (List.tl l) in
  638 + let dep_paths = Array.of_list l in
  639 + parse_conll tokens dep_paths;
  640 + dep_paths
  641 +
  642 +let rec parse_mate_sentence tokens mode pbeg = function
  643 + RawSentence s -> if mode <> Mate then RawSentence s else DepSentence (parse_mate tokens pbeg s)
  644 + | StructSentence(paths,last) -> StructSentence(paths,last)
  645 + | DepSentence(paths) -> DepSentence(paths)
  646 + | QuotedSentences sentences ->
  647 + QuotedSentences(Xlist.map sentences (fun p ->
  648 + {pid=p.PreTypes.pid; pbeg=p.PreTypes.pbeg; plen=p.PreTypes.plen; pnext=p.PreTypes.pnext; pfile_prefix=p.PreTypes.pfile_prefix;
  649 + psentence=parse_mate_sentence tokens mode pbeg p.PreTypes.psentence}))
  650 + | AltSentence l -> AltSentence(Xlist.map l (fun (mode,sentence) ->
  651 + mode, parse_mate_sentence tokens mode pbeg sentence))
  652 +
  653 +let parse_mate_sentences tokens sentences =
  654 + Xlist.map sentences (fun p ->
  655 + {p with psentence=parse_mate_sentence tokens Struct p.pbeg p.psentence})*)
  656 +
607 657 let parse_text = function
608 658 RawText query ->
  659 + (* print_endline ("parse_text: " ^ query); *)
609 660 let tokens = ExtArray.make 100 empty_token in
610 661 let _ = ExtArray.add tokens empty_token in (* id=0 jest zarezerwowane dla pro; FIXME: czy to jest jeszcze aktualne? *)
611 662 let paragraphs = Xlist.map (Xstring.split "\n" query) (fun paragraph ->
612 663 let paths = parse paragraph in
  664 + (* print_endline "parse_text 1"; *)
613 665 let sentences = PreSentences.split_into_sentences paragraph tokens paths in
  666 + (* print_endline "parse_text 2"; *)
  667 + (* let sentences = parse_mate_sentences tokens sentences in *)
  668 + (* print_endline "parse_text 3"; *)
614 669 AltParagraph[Raw,RawParagraph paragraph; Struct,StructParagraph sentences]) in
615 670 AltText[Raw,RawText query; Struct,StructText(List.rev paragraphs, tokens)]
616 671 | AltText[Raw,RawText query;CONLL,StructText([
... ... @@ -633,9 +688,9 @@ let rec main_loop in_chan out_chan =
633 688 (try
634 689 (* let time0 = Sys.time () in *)
635 690 let utime0 = Unix.gettimeofday () in
636   - (* print_endline "main_loop 3a"; *)
  691 + (* print_endline "main_loop 3a"; *)
637 692 let text = parse_text query in
638   - (* print_endline "main_loop 4a"; *)
  693 + (* print_endline "main_loop 4a"; *)
639 694 (* let time2 = Sys.time () in *)
640 695 let utime2 = Unix.gettimeofday () in
641 696 (* Printf.printf "time=%f utime=%f\n%!" (time2 -. time0) (utime2 -. utime0); *)
... ...
swigra/parser/morfeusz2-swi.so 0 → 100755
No preview for this file type