Commit 1bd8753116db8f229a44d8d7db061625d45d92a6

Authored by Wojciech Jaworski
1 parent cb8aa514

Interfejs do semparsera

exec/ENIAMvisualization.ml
... ... @@ -799,16 +799,18 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
799 799 sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix) ^
800 800 ""
801 801 | NotParsed ->
  802 + if verbosity = 0 then () else (
  803 + ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.chart1);
802 804 if verbosity < 2 then () else (
803   - ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.chart1;
804 805 ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.chart2;
805 806 ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2;
806 807 ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3);
807 808 if verbosity = 0 then () else (
808 809 ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.chart3);
809 810 sprintf "not_parsed: paths_size=%d chart_size=%d\n" result.paths_size result.chart_size ^
  811 + (if verbosity = 0 then "" else
  812 + sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix) ^
810 813 (if verbosity < 2 then "" else
811   - sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix ^
812 814 sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix ^
813 815 sprintf "<BR><A HREF=\"%s_2_references.pdf\">References 2</A>\n" file_prefix ^
814 816 sprintf "<BR><A HREF=\"%s_3_references.pdf\">References 3</A>\n" file_prefix) ^
... ...
exec/TODO
1 1 poprawienie text-paragraph-sentence na fold w pozostałych modułach
2   -interferjs z linii poleceń dla parsera i semparsera
  2 +interferjs z linii poleceń dla parsera
3 3 przetwarzanie biogramu do końca
4 4 przetwarzanie dialogów
5 5 przechwytywanie błędów subsyntax itp w parserze i semparserze
6 6 interfejs dla clarin
  7 +drukowanym w „Dialogu”
... ...
exec/semparser.ml
... ... @@ -26,25 +26,10 @@ let rules = ENIAM_LCGlexicon.make_rules false ENIAM_LCGlexiconTypes.user_lexicon
26 26 let load_cats_map filename =
27 27 File.fold_tab filename StringMap.empty (fun map -> function
28 28 [lemma;cat] -> StringMap.add_inc map lemma [cat] (fun l -> cat :: l)
29   - | l -> failwith ("load_senses_map: " ^ String.concat "\t" l))
  29 + | l -> failwith ("load_cats_map: " ^ String.concat "\t" l))
30 30  
31 31 let cats_map = load_cats_map ENIAM_LCGlexiconTypes.user_cats_filename
32 32  
33   -
34   -let examples = [
35   - (* "liceum","W 1984-89 uczęszczał do VII Liceum Ogólnokształcącego im. K.K. Baczyńskiego w Szczecinie."; *)
36   - "studia","Następnie studiował architekturę na Politechnice Szczecińskiej, dyplom uzyskał w 1994.";
37   -]
38   -
39   -
40   -(*
41   -type output = Text | Xml | Html | Marsh | Graphviz
42   -
43   -let output = ref Text
44   -let comm_stdio = ref true
45   -let sentence_split = ref true
46   -let port = ref 0
47   -*)
48 33 let subsyntax_built_in = ref true
49 34 let subsyntax_host = ref "localhost"
50 35 let subsyntax_port = ref 5739
... ... @@ -72,12 +57,20 @@ let spec_list = [
72 57 "-b", Arg.Unit (fun () -> subsyntax_built_in:=true), "Use built in version of ENIAMsubsyntax (default)";
73 58 "--port", Arg.Int (fun p -> subsyntax_built_in:=false; subsyntax_port:=p), "<port> Connect to ENIAMsubsyntax on a given port";
74 59 "--host", Arg.String (fun s -> subsyntax_built_in:=false; subsyntax_host:=s), "<hostname> Connect to ENIAMsubsyntax on a given host (by default localhost)";
  60 + "--timeout", Arg.Float (fun x -> timeout:=x), "<seconds> Sets timeout value for parser (default 30 seconds)";
  61 + "-v", Arg.Int (fun v -> verbosity:=v), "<val> Sets verbosity level of parser\n 0 - print only status information\n 1 - print data relevant to the status of a given sentence (default)\n 2 - print all data structures";
  62 + "--img", Arg.Int (fun v -> img:=v), "<val> Selects which images are included in output html page \n 0 - no images included\n 1 - simple dependency trees included (default)\n 2 - dependency trees included";
  63 + "--output", Arg.String (fun s -> output_dir:=s), "<dir> Sets output directory (by default results/)";
  64 + "--sel_modes", Arg.Unit (fun () -> select_sentence_modes_flag:=true), "Select sencence modes";
  65 + "--no_sel_modes", Arg.Unit (fun () -> select_sentence_modes_flag:=false), "Do not select sencence modes (default)";
  66 + "--sel_sent", Arg.Unit (fun () -> select_sentences_flag:=true), "Select parsed sentences (default)";
  67 + "--no_sel_sent", Arg.Unit (fun () -> select_sentences_flag:=false), "Do not select parsed sentences";
75 68 ]
76 69  
77 70 let usage_msg =
78 71 "Usage: semparser <options>\nInput is a sequence of lines. Empty line ends the sequence and invoke parsing. Double empty line shutdown parser.\nOptions are:"
79 72  
80   -let message = "ENIAM_LCGsemparser, a parser for Logical Categorial Grammar formalism\n\
  73 +let message = "ENIAM_LCGsemparser, semantic parser for Logical Categorial Grammar formalism\n\
81 74 Copyright (C) 2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>\n\
82 75 Copyright (C) 2017 Institute of Computer Science Polish Academy of Sciences"
83 76  
... ... @@ -95,7 +88,7 @@ let input_text channel =
95 88 let get_cats cats_map = function
96 89 Interp orth -> (try StringMap.find cats_map orth with Not_found -> ["X"])
97 90 | Lemma(lemma,_,_) -> (try StringMap.find cats_map lemma with Not_found -> ["X"])
98   - | Proper(_,_,_,cats) -> cats
  91 + | Proper(_,_,_,cats) -> if cats = [] then ["X"] else cats
99 92 | _ -> ["X"]
100 93  
101 94  
... ... @@ -109,8 +102,8 @@ let assign_lex_sems cats_map tokens =
109 102 ());
110 103 lex_sems
111 104  
112   -let rec main_loop sub_in sub_out in_chan out_chan =
113   - let text = input_text in_chan in
  105 +let rec main_loop sub_in sub_out =
  106 + let text = input_text stdin in
114 107 if text = "" then () else (
115 108 let text,tokens,msg =
116 109 if !subsyntax_built_in then ENIAMsubsyntax.catch_parse_text text else (
... ... @@ -122,28 +115,9 @@ let rec main_loop sub_in sub_out in_chan out_chan =
122 115 let text = ENIAMexec.parse !timeout !verbosity rules tokens lex_sems text in
123 116 let text = if !select_sentence_modes_flag then ENIAMselectSent.select_sentence_modes_text text else text in
124 117 let text = if !select_sentences_flag then ENIAMselectSent.select_sentences_text ENIAMexecTypes.Struct text else text in
125   - ENIAMvisualization.print_html_text !output_dir "parsed_text" text !img !verbosity tokens)
126   - (* print_endline "input text begin";
127   - print_endline text;
128   - print_endline "input text end"; *)
129   - (*if !sentence_split then
130   - let text,tokens = ENIAMsubsyntax.parse_text text in
131   - (match !output with
132   - Text -> output_string out_chan (ENIAMsubsyntaxStringOf.text "" tokens text ^ "\n" ^ ENIAMsubsyntaxStringOf.token_extarray tokens ^ "\n\n")
133   - | Xml -> output_string out_chan (Xml.to_string (ENIAMsubsyntaxXMLof.text_and_tokens text tokens) ^ "\n\n")
134   - | Html -> output_string out_chan (ENIAMsubsyntaxHTMLof.text_and_tokens text tokens ^ "\n\n")
135   - | Marsh -> Marshal.to_channel out_chan (text,tokens) []
136   - | Graphviz -> failwith "main_loop: ni")
137   - else
138   - let tokens = ENIAMsubsyntax.parse text in
139   - (match !output with
140   - Text -> output_string out_chan (ENIAMsubsyntaxStringOf.token_list tokens ^ "\n\n")
141   - | Xml -> output_string out_chan (Xml.to_string (ENIAMsubsyntaxXMLof.token_list tokens) ^ "\n\n")
142   - | Html -> output_string out_chan (ENIAMsubsyntaxHTMLof.token_list tokens ^ "\n\n")
143   - | Marsh -> Marshal.to_channel out_chan tokens []
144   - | Graphviz -> output_string out_chan (ENIAMsubsyntaxGraphOf.token_list tokens ^ "\n\n"))*);
145   - flush out_chan;
146   - main_loop sub_in sub_out in_chan out_chan)
  118 + ENIAMvisualization.print_html_text !output_dir "parsed_text" text !img !verbosity tokens);
  119 + prerr_endline "Done!";
  120 + main_loop sub_in sub_out)
147 121  
148 122 let get_sock_addr host_name port =
149 123 let he = Unix.gethostbyname host_name in
... ... @@ -160,7 +134,4 @@ let _ =
160 134 if !subsyntax_built_in then stdin,stdout
161 135 else Unix.open_connection (get_sock_addr !subsyntax_host !subsyntax_port) in
162 136 prerr_endline "Ready!";
163   - (*if !comm_stdio then*) main_loop sub_in sub_out stdin stdout
164   - (*else
165   - let sockaddr = Unix.ADDR_INET(Unix.inet_addr_any,!port) in
166   - Unix.establish_server main_loop sockaddr*)
  137 + main_loop sub_in sub_out
... ...