Commit ea092bb13b7b5769863c2f5a9158dfba5b64375b
1 parent
7dc3851b
poprawiony błąd w komunikacji między parserem a pre
Showing
3 changed files
with
41 additions
and
11 deletions
parser/exec.ml
... | ... | @@ -111,6 +111,9 @@ let process_text = function |
111 | 111 | Xlist.iter paragraphs process_paragraph; |
112 | 112 | [],0,0 |
113 | 113 | | PreTypes.RawText "" -> [],0,0 |
114 | + | AltText[Raw,RawText query; Struct, struct_par; CONLL, conll] -> | |
115 | + (* tu trzeba dodac wypisywanie na ekran *) | |
116 | + [],0,0 | |
114 | 117 | | _ -> failwith "process_text" |
115 | 118 | |
116 | 119 | let rec extract_query_text = function |
... | ... | @@ -126,10 +129,14 @@ let process_query ic oc timeout test_only_flag id full_query max_n = |
126 | 129 | (* Printf.fprintf oc "%s\n%!" query; *) |
127 | 130 | print_endline "process_query 1"; |
128 | 131 | (* Marshal.to_channel oc (PreTypes.RawText query) []; *) |
129 | - Marshal.to_channel oc query []; | |
132 | + Marshal.to_channel oc full_query []; | |
130 | 133 | flush oc; |
131 | 134 | print_endline "process_query 2"; |
132 | 135 | let text,msg,pre_time1 = (Marshal.from_channel ic : PreTypes.text * string * float) in |
136 | + (* let text,msg,pre_time1 = PreProcessing.mail_loop2 query in *) | |
137 | + (* let text = PreTypes.RawText "" in | |
138 | + let msg = "" in | |
139 | + let pre_time1 = 0. in *) | |
133 | 140 | print_endline "process_query 3"; |
134 | 141 | let paths = process_text text in |
135 | 142 | (* let paths = match paths with |
... | ... |
pre/preProcessing.ml
... | ... | @@ -586,8 +586,9 @@ let split_into_sentences par paths last next_id = |
586 | 586 | (* [{pid="";pbeg=(-1); plen=(-1); psentence=StructSentence(paths,last,next_id)}] *) |
587 | 587 | |
588 | 588 | let parse_conll paths = |
589 | + (* print_endline "a11"; *) | |
589 | 590 | let paths = PreMWE.process paths in |
590 | - (* print_endline "a12"; *) | |
591 | + (* print_endline "a12"; *) | |
591 | 592 | let paths = find_proper_names paths in |
592 | 593 | (* print_endline "a13"; *) |
593 | 594 | let paths = modify_weights paths in |
... | ... | @@ -607,23 +608,43 @@ let split_into_sentences par paths last next_id = |
607 | 608 | let paths = if !single_frame_flag then single_frame paths else paths in*) |
608 | 609 | (* let paths, next_id = add_ids paths in *) (* FIXME: jak powiązać id z connl z tymi z pre *) |
609 | 610 | let paths = prepare_indexes paths in |
610 | - (* print_endline "a18"; *) | |
611 | + (* print_endline "a18"; *) | |
611 | 612 | paths, (*next_id*) -1 |
612 | 613 | |
613 | 614 | let parse_text = function |
614 | 615 | RawText query -> |
615 | - print_endline query; | |
616 | + (* print_endline "parse_text 0"; *) | |
617 | + (* print_endline query; *) | |
616 | 618 | AltText[Raw,RawText query; Struct,StructText (Xlist.map (Xstring.split "\n" query) (fun par -> |
617 | 619 | let (paths,last : PreTypes.token_record list * int), next_id = parse par in |
618 | 620 | let sentences, next_id = split_into_sentences par paths last next_id in |
619 | 621 | AltParagraph[Raw,RawParagraph par; Struct,StructParagraph(sentences,next_id)]))] |
620 | 622 | | AltText[Raw,RawText query;CONLL,StructText[ |
621 | - StructParagraph([{psentence = AltSentence[Raw, RawSentence text; CONLL, StructSentence(paths,last)]} as p],_)]] -> | |
623 | + StructParagraph([{psentence = AltSentence[Raw, RawSentence text; CONLL, StructSentence(paths,last)]} as p],_)]] -> | |
624 | + (* | AltText[Raw,RawText query;CONLL,StructText[StructParagraph([p],_)]] -> *) | |
625 | + (* | AltText[Raw,RawText query;CONLL,x] -> *) | |
626 | + (* | t -> print_endline "parse_text 7"; | |
627 | + let query,x = match t with | |
628 | + AltText l -> | |
629 | + print_endline ("parse_text 8: " ^ string_of_int (Xlist.size l)); | |
630 | + (match l with [a;b] -> print_endline "parse_text 9"; | |
631 | + (match a with Raw,RawText query -> print_endline "parse_text 10";query | _ -> failwith "parse_text: not implemented5"), | |
632 | + (match b with CONLL,x -> print_endline "parse_text 11";x | _ -> failwith "parse_text: not implemented6") | |
633 | + | _ -> failwith "parse_text: not implemented4") | |
634 | + | _ -> failwith "parse_text: not implemented" in | |
635 | + print_endline "parse_text 1a"; | |
636 | + let p = match x with StructText[StructParagraph([p],_)] -> p | _ -> failwith "parse_text: not implemented3" in *) | |
637 | + (* print_endline "parse_text 1"; *) | |
638 | + let text,paths,last = match p with | |
639 | + {psentence = AltSentence[Raw, RawSentence text; CONLL, StructSentence(paths,last)]} -> text,paths,last | |
640 | + | _ -> failwith "parse_text: not implemented2" in | |
641 | + (* print_endline "parse_text 2"; *) | |
622 | 642 | let (paths,last), next_id = parse_conll (paths,last) in |
623 | 643 | let conll = StructText[StructParagraph([{p with psentence = AltSentence[Raw, RawSentence text; CONLL, StructSentence(paths,last)]}],next_id)] in |
624 | 644 | let (paths,last), next_id = parse query in |
625 | 645 | let sentences, next_id = split_into_sentences query paths last next_id in |
626 | 646 | let struct_par = StructText[StructParagraph(sentences,next_id)] in |
647 | + (* print_endline "parse_text 6"; *) | |
627 | 648 | AltText[Raw,RawText query; Struct, struct_par; CONLL, conll] |
628 | 649 | | _ -> failwith "parse_text: not implemented" |
629 | 650 | |
... | ... | @@ -636,9 +657,9 @@ let rec main_loop in_chan out_chan = |
636 | 657 | (try |
637 | 658 | (* let time0 = Sys.time () in *) |
638 | 659 | let utime0 = Unix.gettimeofday () in |
639 | - (* print_endline "main_loop 3"; *) | |
660 | + (* print_endline "main_loop 3a"; *) | |
640 | 661 | let text = parse_text query in |
641 | - (* print_endline "main_loop 4"; *) | |
662 | + (* print_endline "main_loop 4a"; *) | |
642 | 663 | (* let (paths,last : (int * int * PreTypes.token_record) list * int), next_id = parse query in *) |
643 | 664 | (* let time2 = Sys.time () in *) |
644 | 665 | let utime2 = Unix.gettimeofday () in |
... | ... | @@ -647,10 +668,12 @@ let rec main_loop in_chan out_chan = |
647 | 668 | (* print_endline "main_loop 5"; *) |
648 | 669 | () |
649 | 670 | (* output_string out_chan (Xml.to_string_fmt (PrePaths.to_xml paths) ^ "\n") *) |
650 | - with e -> | |
651 | - Marshal.to_channel out_chan (RawText ""(*[],0*),Printexc.to_string e,0.) []); | |
671 | + with e -> ( | |
672 | + (* print_endline "main_loop 7"; *) | |
673 | + Marshal.to_channel out_chan (RawText ""(*[],0*),Printexc.to_string e,0.) [])); | |
652 | 674 | (* output_string out_chan (Xml.to_string_fmt (Xml.Element("error",[], |
653 | 675 | [Xml.PCData (Printexc.to_string e)])) ^ "\n"));*) |
676 | + (* print_endline "main_loop 6"; *) | |
654 | 677 | flush out_chan; |
655 | 678 | main_loop in_chan out_chan) |
656 | 679 | |
... | ... |
pre/preSentences.ml
... | ... | @@ -43,7 +43,7 @@ let rec find_sentence beg found = function |
43 | 43 | if t.beg > beg then found else find_sentence beg found l |
44 | 44 | |
45 | 45 | let find_sentences (paths,last) next_id = |
46 | - print_endline (PrePaths.to_string (PrePaths.sort (paths,last))); | |
46 | + (* print_endline (PrePaths.to_string (PrePaths.sort (paths,last))); *) | |
47 | 47 | let begs = find_sentence_begs paths in |
48 | 48 | Xlist.fold begs (paths,last,next_id) (fun (paths,last,next_id) beg -> |
49 | 49 | (* Printf.printf "BEG=%d\n%!" beg; *) |
... | ... | @@ -88,7 +88,7 @@ let get_raw_sentence a beg len = |
88 | 88 | if len mod factor <> 0 then failwith ("get_raw_sentence: len " ^ string_of_int len) else |
89 | 89 | let buf = Buffer.create 512 in |
90 | 90 | Int.iter (beg / factor - 1) (beg / factor + len / factor - 2) (fun i -> |
91 | - printf "%d" i; printf " %s\n%!" a.(i); | |
91 | + (* printf "%d" i; printf " %s\n%!" a.(i); *) | |
92 | 92 | Buffer.add_string buf a.(i)); |
93 | 93 | Buffer.contents buf |
94 | 94 | |
... | ... |