Commit 5569a92ab7bd4ed4faf6c83c00049be7ebe4552b

Authored by Wojciech Jaworski
1 parent 5bff1aae

dopasowywanie parsera do nowego podziału na moduły

parser/LCGlexicon.ml
... ... @@ -1276,7 +1276,7 @@ end
1276 1276 module IntIntSet = Xset.Make(OrderedIntInt)
1277 1277  
1278 1278  
1279   -let create (paths,last) tokens =
  1279 +let create (paths,last) tokens lex_sems =
1280 1280 uni_weight := 0.;
1281 1281 let chart = LCGchart.make last in
1282 1282 let chart = Xlist.fold paths chart (fun chart (id,lnode,rnode) ->
... ...
parser/exec.ml
... ... @@ -104,46 +104,46 @@ let empty_sum_result = {
104 104 open Printf
105 105  
106 106 let translate_mode = function
107   - PreTypes.Raw -> Raw
108   - | PreTypes.Struct -> Struct
109   - | PreTypes.CONLL -> CONLL
110   - | PreTypes.ENIAM -> ENIAM
111   - | PreTypes.Mate -> Mate
112   - | PreTypes.Swigra -> Swigra
113   - | PreTypes.POLFIE -> POLFIE
  107 + ENIAMsubsyntaxTypes.Raw -> Raw
  108 + | ENIAMsubsyntaxTypes.Struct -> Struct
  109 + | ENIAMsubsyntaxTypes.CONLL -> CONLL
  110 + | ENIAMsubsyntaxTypes.ENIAM -> ENIAM
  111 + | ENIAMsubsyntaxTypes.Mate -> Mate
  112 + | ENIAMsubsyntaxTypes.Swigra -> Swigra
  113 + | ENIAMsubsyntaxTypes.POLFIE -> POLFIE
114 114  
115 115 let rec translate_sentence = function
116   - PreTypes.RawSentence s -> RawSentence s
117   - | PreTypes.StructSentence(paths,last) -> StructSentence(paths,last)
118   - | PreTypes.DepSentence(paths) -> DepSentence(paths)
119   - | PreTypes.QuotedSentences sentences ->
  116 + ENIAMsubsyntaxTypes.RawSentence s -> RawSentence s
  117 + | ENIAMsubsyntaxTypes.StructSentence(paths,last) -> StructSentence(paths,last)
  118 + | ENIAMsubsyntaxTypes.DepSentence(paths) -> DepSentence(paths)
  119 + | ENIAMsubsyntaxTypes.QuotedSentences sentences ->
120 120 QuotedSentences(Xlist.map sentences (fun p ->
121   - {pid=p.PreTypes.pid; pbeg=p.PreTypes.pbeg; plen=p.PreTypes.plen; pnext=p.PreTypes.pnext; pfile_prefix=p.PreTypes.pfile_prefix;
122   - psentence=translate_sentence p.PreTypes.psentence}))
123   - | PreTypes.AltSentence l -> AltSentence(Xlist.map l (fun (mode,sentence) ->
  121 + {pid=p.ENIAMsubsyntaxTypes.pid; pbeg=p.ENIAMsubsyntaxTypes.pbeg; plen=p.ENIAMsubsyntaxTypes.plen; pnext=p.ENIAMsubsyntaxTypes.pnext; pfile_prefix=p.ENIAMsubsyntaxTypes.pfile_prefix;
  122 + psentence=translate_sentence p.ENIAMsubsyntaxTypes.psentence}))
  123 + | ENIAMsubsyntaxTypes.AltSentence l -> AltSentence(Xlist.map l (fun (mode,sentence) ->
124 124 translate_mode mode, translate_sentence sentence))
125 125  
126 126 let rec translate_paragraph = function
127   - PreTypes.RawParagraph s -> RawParagraph s
128   - | PreTypes.StructParagraph sentences ->
  127 + ENIAMsubsyntaxTypes.RawParagraph s -> RawParagraph s
  128 + | ENIAMsubsyntaxTypes.StructParagraph sentences ->
129 129 StructParagraph(Xlist.map sentences (fun p ->
130   - {pid=p.PreTypes.pid; pbeg=p.PreTypes.pbeg; plen=p.PreTypes.plen; pnext=p.PreTypes.pnext; pfile_prefix=p.PreTypes.pfile_prefix;
131   - psentence=translate_sentence p.PreTypes.psentence}))
132   - | PreTypes.AltParagraph l -> AltParagraph(Xlist.map l (fun (mode,paragraph) ->
  130 + {pid=p.ENIAMsubsyntaxTypes.pid; pbeg=p.ENIAMsubsyntaxTypes.pbeg; plen=p.ENIAMsubsyntaxTypes.plen; pnext=p.ENIAMsubsyntaxTypes.pnext; pfile_prefix=p.ENIAMsubsyntaxTypes.pfile_prefix;
  131 + psentence=translate_sentence p.ENIAMsubsyntaxTypes.psentence}))
  132 + | ENIAMsubsyntaxTypes.AltParagraph l -> AltParagraph(Xlist.map l (fun (mode,paragraph) ->
133 133 translate_mode mode, translate_paragraph paragraph))
134 134  
135 135 let rec translate_text = function
136   - PreTypes.RawText s -> RawText s
137   - | PreTypes.StructText(paragraphs,tokens) ->
138   - StructText(Xlist.map paragraphs translate_paragraph,tokens)
139   - | PreTypes.AltText l -> AltText(Xlist.map l (fun (mode,text) ->
  136 + ENIAMsubsyntaxTypes.RawText s -> RawText s
  137 + | ENIAMsubsyntaxTypes.StructText paragraphs ->
  138 + StructText(Xlist.map paragraphs translate_paragraph)
  139 + | ENIAMsubsyntaxTypes.AltText l -> AltText(Xlist.map l (fun (mode,text) ->
140 140 translate_mode mode, translate_text text))
141 141  
142   -let eniam_parse_sentence timeout test_only_flag paths last tokens =
  142 +let eniam_parse_sentence timeout test_only_flag paths last tokens lex_sems =
143 143 let result = empty_eniam_parse_result in
144 144 let time2 = time_fun () in
145 145 try
146   - let chart = LCGlexicon.create (paths,last) tokens in
  146 + let chart = LCGlexicon.create (paths,last) tokens lex_sems in
147 147 let chart,references = LCGchart.lazify chart in
148 148 let result = if test_only_flag then result else {result with chart=chart} in
149 149 let time3 = time_fun () in
... ... @@ -196,11 +196,11 @@ let eniam_parse_sentence timeout test_only_flag paths last tokens =
196 196 let time3 = time_fun () in
197 197 {result with status=LexiconError; msg=Printexc.to_string e; lex_time=time3 -. time2}
198 198  
199   -let conll_parse_sentence timeout test_only_flag paths tokens =
  199 +let conll_parse_sentence timeout test_only_flag paths tokens lex_sems =
200 200 let result = empty_conll_parse_result in
201 201 let time2 = time_fun () in
202 202 try
203   - let dep_chart = LCGlexicon.dep_create paths tokens in
  203 + let dep_chart = LCGlexicon.dep_create paths tokens lex_sems in
204 204 let dep_chart,references = LCGchart.dep_lazify dep_chart in
205 205 let result = if test_only_flag then result else {result with dep_chart=dep_chart} in
206 206 let time3 = time_fun () in
... ... @@ -281,7 +281,7 @@ let file_prefix_of_mode = function
281 281 | POLFIE -> "P"
282 282  
283 283 let get_paths old_paths = function
284   - {PreTypes.psentence=PreTypes.DepSentence(paths)},_ ->
  284 + {ENIAMsubsyntaxTypes.psentence=ENIAMsubsyntaxTypes.DepSentence(paths)},_ ->
285 285 Int.iter 0 (Array.length paths - 1) (fun i ->
286 286 let id,_,_ = old_paths.(i) in
287 287 let _,super,label = paths.(i) in
... ... @@ -289,7 +289,7 @@ let get_paths old_paths = function
289 289 paths
290 290 | _ -> failwith "get_paths"
291 291  
292   -let rec parse_sentence timeout test_only_flag mode file_prefix tokens = function
  292 +let rec parse_sentence timeout test_only_flag mode file_prefix tokens lex_sems = function
293 293 RawSentence s ->
294 294 (match mode with
295 295 Swigra ->
... ... @@ -301,14 +301,14 @@ let rec parse_sentence timeout test_only_flag mode file_prefix tokens = function
301 301 | StructSentence(paths,last) ->
302 302 (match mode with
303 303 ENIAM ->
304   - let result = eniam_parse_sentence timeout test_only_flag paths last tokens in
  304 + let result = eniam_parse_sentence timeout test_only_flag paths last tokens lex_sems in
305 305 let result = {result with file_prefix = file_prefix_of_mode mode ^ file_prefix} in
306 306 ENIAMSentence result
307 307 | _ -> failwith "parse_sentence")
308 308 | DepSentence(paths) ->
309 309 (match mode with
310 310 CONLL ->
311   - let result = conll_parse_sentence timeout test_only_flag paths tokens in
  311 + let result = conll_parse_sentence timeout test_only_flag paths tokens lex_sems in
312 312 let result = {result with
313 313 file_prefix = file_prefix_of_mode mode ^ file_prefix;
314 314 paths = paths} in
... ... @@ -322,7 +322,7 @@ let rec parse_sentence timeout test_only_flag mode file_prefix tokens = function
322 322 if not Paths.config.Paths.mate_parser_enabled then DepSentence paths else (
323 323 print_endline "parse_sentence 1";
324 324 (* print_endline (Visualization.html_of_dep_sentence tokens paths); *)
325   - let conll = CONLL.string_of_paths PreTypes.Mate tokens paths in
  325 + let conll = CONLL.string_of_paths ENIAMsubsyntaxTypes.Mate tokens paths in
326 326 print_endline "parse_sentence 2";
327 327 (* printf "|%s|\n" conll; *)
328 328 Printf.fprintf mate_out "%s%!" conll;
... ... @@ -330,7 +330,7 @@ let rec parse_sentence timeout test_only_flag mode file_prefix tokens = function
330 330 let new_paths = get_paths paths (CONLL.load_sentence mate_in) in
331 331 print_endline "parse_sentence 4";
332 332 (* print_endline (Visualization.html_of_dep_sentence tokens new_paths); *)
333   - let result = conll_parse_sentence timeout test_only_flag new_paths tokens in
  333 + let result = conll_parse_sentence timeout test_only_flag new_paths tokens lex_sems in
334 334 let result = {result with
335 335 file_prefix = file_prefix_of_mode mode ^ file_prefix;
336 336 paths=new_paths} in
... ... @@ -338,35 +338,35 @@ let rec parse_sentence timeout test_only_flag mode file_prefix tokens = function
338 338 | _ -> failwith "parse_sentence")
339 339 | QuotedSentences sentences ->
340 340 let sentences = Xlist.rev_map sentences (fun p ->
341   - let sentence = parse_sentence timeout test_only_flag mode p.pfile_prefix tokens p.psentence in
  341 + let sentence = parse_sentence timeout test_only_flag mode p.pfile_prefix tokens lex_sems p.psentence in
342 342 {p with psentence=sentence}) in
343 343 QuotedSentences(List.rev sentences)
344 344 | AltSentence l ->
345 345 let l = Xlist.rev_map l (fun (mode,sentence) ->
346   - mode, parse_sentence timeout test_only_flag mode file_prefix tokens sentence) in
  346 + mode, parse_sentence timeout test_only_flag mode file_prefix tokens lex_sems sentence) in
347 347 AltSentence(List.rev l)
348 348 | _ -> failwith "parse_sentence"
349 349  
350   -let rec parse_paragraph timeout test_only_flag mode tokens = function
  350 +let rec parse_paragraph timeout test_only_flag mode tokens lex_sems = function
351 351 RawParagraph s -> RawParagraph s
352 352 | StructParagraph sentences ->
353 353 let sentences = Xlist.rev_map sentences (fun p ->
354   - let sentence = parse_sentence timeout test_only_flag mode p.pfile_prefix tokens p.psentence in
  354 + let sentence = parse_sentence timeout test_only_flag mode p.pfile_prefix tokens lex_sems p.psentence in
355 355 {p with psentence=sentence}) in
356 356 StructParagraph(List.rev sentences)
357 357 | AltParagraph l ->
358 358 let l = Xlist.rev_map l (fun (mode,paragraph) ->
359   - mode, parse_paragraph timeout test_only_flag mode tokens paragraph) in
  359 + mode, parse_paragraph timeout test_only_flag mode tokens lex_sems paragraph) in
360 360 AltParagraph(List.rev l)
361 361  
362   -let rec parse_text timeout test_only_flag mode = function
  362 +let rec parse_text timeout test_only_flag mode tokens lex_sems = function
363 363 RawText s -> RawText s
364   - | StructText(paragraphs,tokens) ->
  364 + | StructText paragraphs ->
365 365 let paragraphs = Xlist.rev_map paragraphs (fun paragraph ->
366   - parse_paragraph timeout test_only_flag mode tokens paragraph) in
  366 + parse_paragraph timeout test_only_flag mode tokens lex_sems paragraph) in
367 367 StructText(List.rev paragraphs, tokens)
368 368 | AltText l -> AltText(Xlist.map l (fun (mode,text) ->
369   - mode, parse_text timeout test_only_flag mode text))
  369 + mode, parse_text timeout test_only_flag mode tokens lex_sems text))
370 370  
371 371 let select_mode = function
372 372 (Raw,_),_ -> failwith "select_mode"
... ... @@ -518,7 +518,10 @@ let process_query pre_in pre_out timeout test_only_flag id full_query max_n =
518 518 Marshal.to_channel pre_out full_query [];
519 519 flush pre_out;
520 520 (* print_endline "process_query 2"; *)
521   - let pre_text,msg,pre_time1 = (Marshal.from_channel pre_in : PreTypes.text * string * float) in
  521 + let pre_text,tokens,lex_sems,msg,pre_time1 = (Marshal.from_channel pre_in :
  522 + ENIAMsubsyntaxTypes.text *
  523 + ENIAMtokenizerTypes.token_record ExtArray.t *
  524 + ENIAMlexSemanticsTypes.lex_sem ExtArray.t * string * float) in
522 525 let time2 = time_fun () in
523 526 let result = if test_only_flag then result else {result with pre_text=translate_text pre_text} in
524 527 let result = {result with pre_time1=pre_time1; pre_time2=time2 -. time1} in
... ...
parser/execTypes.ml
... ... @@ -92,7 +92,7 @@ and paragraph =
92 92  
93 93 type text =
94 94 RawText of string
95   - | StructText of paragraph list * PreTypes.token_record ExtArray.t (* akapity * tokeny *)
  95 + | StructText of paragraph list
96 96 | AltText of (mode * text) list
97 97  
98 98  
... ...
parser/makefile
1 1 OCAMLC=ocamlc
2 2 OCAMLOPT=ocamlopt
3 3 OCAMLDEP=ocamldep
4   -INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../../../Dropbox/lib/latexvis -I ../../installed/latexvis -I ../lib/xt -I ../../../Dropbox/Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre -I ../corpora
  4 +INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../../../Dropbox/lib/latexvis -I ../../installed/latexvis -I ../lib/xt -I ../../../Dropbox/Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre -I ../tokenizer -I ../subsyntax -I ../walenty -I ../lexSemantics -I ../corpora
5 5 #INCLUDES=-I +xml-light -I +xlib -I ../pre
6 6 OCAMLFLAGS=$(INCLUDES) -g
7 7 OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa latexvis.cmxa #nkjp.cmxa
8 8 #OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa xlib.cmxa
9 9  
10   -PRE= ../pre/paths.ml ../pre/walTypes.ml ../pre/preTypes.ml ../pre/walStringOf.ml ../corpora/CONLL.ml
  10 +PRE= ../pre/paths.ml ../walenty/ENIAMwalTypes.ml ../tokenizer/ENIAMtokenizerTypes.ml ../subsyntax/ENIAMsubsyntaxTypes.ml ../lexSemantics/ENIAMlexSemanticsTypes.ml ../walenty/ENIAMwalStringOf.ml ../corpora/CONLL.ml
11 11 LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml
12 12 #LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml
13 13 DISAMB= disambSelPref.ml disambLemma.ml
... ...
parser/pipe.ml
... ... @@ -45,7 +45,7 @@ let simple_disambiguate (paths,last) =
45 45  
46 46 let lcg_process query =
47 47 let ic,oc = Unix.open_connection (get_sock_addr Paths.pre_host Paths.pre_port) in
48   - let result = Exec.process_query ic oc 30. false "x" (PreTypes.RawText query) 10 in
  48 + let result = Exec.process_query ic oc 30. false "x" (PreTypes.RawText query,ExtArray.make 1 ENIAMtokenizerTypes.empty_token) 10 in
49 49 let path = "results/" in
50 50 Visualization.print_html_text path "input_text" result.input_text;
51 51 Visualization.print_html_text path "pre_text" result.pre_text;
... ... @@ -117,7 +117,7 @@ let lcg_process query =
117 117 LatexMain.latex_compile_and_clean "results/" "chart"*)
118 118 | _ -> ());*)
119 119 (* Printf.fprintf oc "\n%!"; *)
120   - Marshal.to_channel oc (PreTypes.RawText "") [];
  120 + Marshal.to_channel oc (PreTypes.RawText "",ExtArray.make 1 ENIAMtokenizerTypes.empty_token) [];
121 121 flush oc;
122 122 let _ = Unix.shutdown_connection ic in
123 123 ()
... ... @@ -188,6 +188,7 @@ let process_id s =
188 188 Xstring.cut_prefix "NKJP_1M_" a ^ "." ^ Xstring.cut_sufix "-s" (Xstring.cut_prefix "morph_" c)
189 189 else failwith ("process_id: " ^ s)
190 190  
  191 +(* FIXME
191 192 let process_conll_corpus filename =
192 193 let corpus = File.file_in filename (fun file -> CONLL.match_corpus (CONLL.load_corpus file)) in
193 194 print_endline "process_conll_corpus";
... ... @@ -206,11 +207,11 @@ let process_conll_corpus filename =
206 207 (* Exec.print_result stdout result; *)
207 208 (* Visualization.print_paths "results/" "paths" result.paths; *)
208 209 ());
209   - Marshal.to_channel oc (PreTypes.RawText "") [];
  210 + Marshal.to_channel oc (PreTypes.RawText "",ExtArray.make 1 ENIAMtokenizerTypes.empty_token) [];
210 211 flush oc;
211 212 let _ = Unix.shutdown_connection ic in
212 213 ()
213   -
  214 +*)
214 215 let _ =
215 216 (* process_conll_corpus "../../NLP resources/Skladnica-zaleznosciowa-mod_130121.conll"; *)
216 217 (* process_conll_corpus "../../NLP resources/skladnica_zaleznosciowa.conll"; *)
... ...
parser/visualization.ml
... ... @@ -902,22 +902,23 @@ let rec html_of_paragraph path tokens = function
902 902 sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_paragraph path tokens paragraph))) ^
903 903 "</table>"
904 904  
905   -let rec html_of_text path = function
  905 +let rec html_of_text path tokens = function
906 906 RawText s -> s
907   - | StructText(paragraphs,tokens) ->
908   - sprintf "%s<BR>\n%s\n%s\n%s"
909   - (String.concat "<BR>\n" (Xlist.map paragraphs (html_of_paragraph path tokens)))
910   - (html_of_tokens tokens) (html_of_tokens_simple_valence tokens) (html_of_tokens_valence tokens)
  907 + | StructText paragraphs ->
  908 + String.concat "<BR>\n" (Xlist.map paragraphs (html_of_paragraph path tokens))
911 909 | AltText l ->
912 910 "<table border=3>" ^
913 911 String.concat "\n" (Xlist.map l (fun (mode,text) ->
914   - sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_text path text))) ^
  912 + sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_text path tokens text))) ^
915 913 "</table>"
916 914  
917   -let print_html_text path name text =
  915 +let print_html_text path name text tokens lex_sems =
918 916 File.file_out (path ^ name ^ ".html") (fun file ->
919 917 fprintf file "%s\n" html_header;
920   - fprintf file "%s\n" (html_of_text path text);
  918 + fprintf file "%s<BR>\n" (html_of_text path tokens text);
  919 + fprintf file "%s<BR>\n" (html_of_tokens tokens);
  920 +(* fprintf file "%s<BR>\n" (html_of_tokens_simple_valence tokens);
  921 + fprintf file "%s<BR>\n" (html_of_tokens_valence tokens);*)
921 922 fprintf file "%s\n" html_trailer)
922 923  
923 924 let rec find_prev_next_sentence pid rev = function
... ... @@ -989,13 +990,13 @@ let rec print_main_result_paragraph cg_bin_path path id tokens prev_next_map = f
989 990 Xlist.iter sentences (fun p -> print_main_result_sentence cg_bin_path path id tokens p.pid prev_next_map p.psentence)
990 991 | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> print_main_result_paragraph cg_bin_path path id tokens prev_next_map paragraph)
991 992  
992   -let rec print_main_result_text cg_bin_path path id = function
  993 +let rec print_main_result_text cg_bin_path path id tokens = function
993 994 RawText s -> ()
994   - | StructText(paragraphs,tokens) ->
  995 + | StructText paragraphs ->
995 996 let prev_next_map = make_prev_next_map StringMap.empty ""
996 997 (List.rev (Xlist.fold paragraphs [] find_prev_next_paragraph)) in
997 998 Xlist.iter paragraphs (print_main_result_paragraph cg_bin_path path id tokens prev_next_map)
998   - | AltText l -> Xlist.iter l (fun (mode,text) -> print_main_result_text cg_bin_path path id text)
  999 + | AltText l -> Xlist.iter l (fun (mode,text) -> print_main_result_text cg_bin_path path id tokens text)
999 1000  
1000 1001 let print_main_result_first_page cg_bin_path mode path id tokens query result prev_next_map =
1001 1002 let prev,next = try StringMap.find prev_next_map result.file_prefix with Not_found -> failwith "print_main_result" in
... ... @@ -1045,10 +1046,10 @@ let rec print_main_result_first_page_paragraph cg_bin_path path id tokens prev_n
1045 1046 print_main_result_first_page_sentence cg_bin_path path id tokens p.pid prev_next_map p.psentence
1046 1047 | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> print_main_result_first_page_paragraph cg_bin_path path id tokens prev_next_map paragraph)
1047 1048  
1048   -let rec print_main_result_first_page_text cg_bin_path path id = function
  1049 +let rec print_main_result_first_page_text cg_bin_path path id tokens = function
1049 1050 RawText s -> ()
1050   - | StructText(paragraphs,tokens) ->
  1051 + | StructText paragraphs ->
1051 1052 let prev_next_map = make_prev_next_map StringMap.empty ""
1052 1053 (List.rev (Xlist.fold paragraphs [] find_prev_next_paragraph)) in
1053 1054 print_main_result_first_page_paragraph cg_bin_path path id tokens prev_next_map (List.hd paragraphs)
1054   - | AltText l -> Xlist.iter l (fun (mode,text) -> print_main_result_first_page_text cg_bin_path path id text)
  1055 + | AltText l -> Xlist.iter l (fun (mode,text) -> print_main_result_first_page_text cg_bin_path path id tokens text)
... ...