Commit 1bd8753116db8f229a44d8d7db061625d45d92a6
1 parent
cb8aa514
Interfejs do semparsera
Showing
3 changed files
with
23 additions
and
49 deletions
exec/ENIAMvisualization.ml
... | ... | @@ -799,16 +799,18 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam |
799 | 799 | sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix) ^ |
800 | 800 | "" |
801 | 801 | | NotParsed -> |
802 | + if verbosity = 0 then () else ( | |
803 | + ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.chart1); | |
802 | 804 | if verbosity < 2 then () else ( |
803 | - ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.chart1; | |
804 | 805 | ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.chart2; |
805 | 806 | ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2; |
806 | 807 | ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3); |
807 | 808 | if verbosity = 0 then () else ( |
808 | 809 | ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.chart3); |
809 | 810 | sprintf "not_parsed: paths_size=%d chart_size=%d\n" result.paths_size result.chart_size ^ |
811 | + (if verbosity = 0 then "" else | |
812 | + sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix) ^ | |
810 | 813 | (if verbosity < 2 then "" else |
811 | - sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix ^ | |
812 | 814 | sprintf "<BR><A HREF=\"%s_2_chart.pdf\">Chart 2</A>\n" file_prefix ^ |
813 | 815 | sprintf "<BR><A HREF=\"%s_2_references.pdf\">References 2</A>\n" file_prefix ^ |
814 | 816 | sprintf "<BR><A HREF=\"%s_3_references.pdf\">References 3</A>\n" file_prefix) ^ |
... | ... |
exec/TODO
1 | 1 | poprawienie text-paragraph-sentence na fold w pozostałych modułach |
2 | -interferjs z linii poleceń dla parsera i semparsera | |
2 | +interferjs z linii poleceń dla parsera | |
3 | 3 | przetwarzanie biogramu do końca |
4 | 4 | przetwarzanie dialogów |
5 | 5 | przechwytywanie błędów subsyntax itp w parserze i semparserze |
6 | 6 | interfejs dla clarin |
7 | +drukowanym w „Dialogu” | |
... | ... |
exec/semparser.ml
... | ... | @@ -26,25 +26,10 @@ let rules = ENIAM_LCGlexicon.make_rules false ENIAM_LCGlexiconTypes.user_lexicon |
26 | 26 | let load_cats_map filename = |
27 | 27 | File.fold_tab filename StringMap.empty (fun map -> function |
28 | 28 | [lemma;cat] -> StringMap.add_inc map lemma [cat] (fun l -> cat :: l) |
29 | - | l -> failwith ("load_senses_map: " ^ String.concat "\t" l)) | |
29 | + | l -> failwith ("load_cats_map: " ^ String.concat "\t" l)) | |
30 | 30 | |
31 | 31 | let cats_map = load_cats_map ENIAM_LCGlexiconTypes.user_cats_filename |
32 | 32 | |
33 | - | |
34 | -let examples = [ | |
35 | - (* "liceum","W 1984-89 uczęszczał do VII Liceum Ogólnokształcącego im. K.K. Baczyńskiego w Szczecinie."; *) | |
36 | - "studia","Następnie studiował architekturę na Politechnice Szczecińskiej, dyplom uzyskał w 1994."; | |
37 | -] | |
38 | - | |
39 | - | |
40 | -(* | |
41 | -type output = Text | Xml | Html | Marsh | Graphviz | |
42 | - | |
43 | -let output = ref Text | |
44 | -let comm_stdio = ref true | |
45 | -let sentence_split = ref true | |
46 | -let port = ref 0 | |
47 | -*) | |
48 | 33 | let subsyntax_built_in = ref true |
49 | 34 | let subsyntax_host = ref "localhost" |
50 | 35 | let subsyntax_port = ref 5739 |
... | ... | @@ -72,12 +57,20 @@ let spec_list = [ |
72 | 57 | "-b", Arg.Unit (fun () -> subsyntax_built_in:=true), "Use built in version of ENIAMsubsyntax (default)"; |
73 | 58 | "--port", Arg.Int (fun p -> subsyntax_built_in:=false; subsyntax_port:=p), "<port> Connect to ENIAMsubsyntax on a given port"; |
74 | 59 | "--host", Arg.String (fun s -> subsyntax_built_in:=false; subsyntax_host:=s), "<hostname> Connect to ENIAMsubsyntax on a given host (by default localhost)"; |
60 | + "--timeout", Arg.Float (fun x -> timeout:=x), "<seconds> Sets timeout value for parser (default 30 seconds)"; | |
61 | + "-v", Arg.Int (fun v -> verbosity:=v), "<val> Sets verbosity level of parser\n 0 - print only status information\n 1 - print data relevant to the status of a given sentence (default)\n 2 - print all data structures"; | |
62 | + "--img", Arg.Int (fun v -> img:=v), "<val> Selects which images are included in output html page \n 0 - no images included\n 1 - simple dependency trees included (default)\n 2 - dependency trees included"; | |
63 | + "--output", Arg.String (fun s -> output_dir:=s), "<dir> Sets output directory (by default results/)"; | |
64 | + "--sel_modes", Arg.Unit (fun () -> select_sentence_modes_flag:=true), "Select sencence modes"; | |
65 | + "--no_sel_modes", Arg.Unit (fun () -> select_sentence_modes_flag:=false), "Do not select sencence modes (default)"; | |
66 | + "--sel_sent", Arg.Unit (fun () -> select_sentences_flag:=true), "Select parsed sentences (default)"; | |
67 | + "--no_sel_sent", Arg.Unit (fun () -> select_sentences_flag:=false), "Do not select parsed sentences"; | |
75 | 68 | ] |
76 | 69 | |
77 | 70 | let usage_msg = |
78 | 71 | "Usage: semparser <options>\nInput is a sequence of lines. Empty line ends the sequence and invoke parsing. Double empty line shutdown parser.\nOptions are:" |
79 | 72 | |
80 | -let message = "ENIAM_LCGsemparser, a parser for Logical Categorial Grammar formalism\n\ | |
73 | +let message = "ENIAM_LCGsemparser, semantic parser for Logical Categorial Grammar formalism\n\ | |
81 | 74 | Copyright (C) 2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>\n\ |
82 | 75 | Copyright (C) 2017 Institute of Computer Science Polish Academy of Sciences" |
83 | 76 | |
... | ... | @@ -95,7 +88,7 @@ let input_text channel = |
95 | 88 | let get_cats cats_map = function |
96 | 89 | Interp orth -> (try StringMap.find cats_map orth with Not_found -> ["X"]) |
97 | 90 | | Lemma(lemma,_,_) -> (try StringMap.find cats_map lemma with Not_found -> ["X"]) |
98 | - | Proper(_,_,_,cats) -> cats | |
91 | + | Proper(_,_,_,cats) -> if cats = [] then ["X"] else cats | |
99 | 92 | | _ -> ["X"] |
100 | 93 | |
101 | 94 | |
... | ... | @@ -109,8 +102,8 @@ let assign_lex_sems cats_map tokens = |
109 | 102 | ()); |
110 | 103 | lex_sems |
111 | 104 | |
112 | -let rec main_loop sub_in sub_out in_chan out_chan = | |
113 | - let text = input_text in_chan in | |
105 | +let rec main_loop sub_in sub_out = | |
106 | + let text = input_text stdin in | |
114 | 107 | if text = "" then () else ( |
115 | 108 | let text,tokens,msg = |
116 | 109 | if !subsyntax_built_in then ENIAMsubsyntax.catch_parse_text text else ( |
... | ... | @@ -122,28 +115,9 @@ let rec main_loop sub_in sub_out in_chan out_chan = |
122 | 115 | let text = ENIAMexec.parse !timeout !verbosity rules tokens lex_sems text in |
123 | 116 | let text = if !select_sentence_modes_flag then ENIAMselectSent.select_sentence_modes_text text else text in |
124 | 117 | let text = if !select_sentences_flag then ENIAMselectSent.select_sentences_text ENIAMexecTypes.Struct text else text in |
125 | - ENIAMvisualization.print_html_text !output_dir "parsed_text" text !img !verbosity tokens) | |
126 | - (* print_endline "input text begin"; | |
127 | - print_endline text; | |
128 | - print_endline "input text end"; *) | |
129 | - (*if !sentence_split then | |
130 | - let text,tokens = ENIAMsubsyntax.parse_text text in | |
131 | - (match !output with | |
132 | - Text -> output_string out_chan (ENIAMsubsyntaxStringOf.text "" tokens text ^ "\n" ^ ENIAMsubsyntaxStringOf.token_extarray tokens ^ "\n\n") | |
133 | - | Xml -> output_string out_chan (Xml.to_string (ENIAMsubsyntaxXMLof.text_and_tokens text tokens) ^ "\n\n") | |
134 | - | Html -> output_string out_chan (ENIAMsubsyntaxHTMLof.text_and_tokens text tokens ^ "\n\n") | |
135 | - | Marsh -> Marshal.to_channel out_chan (text,tokens) [] | |
136 | - | Graphviz -> failwith "main_loop: ni") | |
137 | - else | |
138 | - let tokens = ENIAMsubsyntax.parse text in | |
139 | - (match !output with | |
140 | - Text -> output_string out_chan (ENIAMsubsyntaxStringOf.token_list tokens ^ "\n\n") | |
141 | - | Xml -> output_string out_chan (Xml.to_string (ENIAMsubsyntaxXMLof.token_list tokens) ^ "\n\n") | |
142 | - | Html -> output_string out_chan (ENIAMsubsyntaxHTMLof.token_list tokens ^ "\n\n") | |
143 | - | Marsh -> Marshal.to_channel out_chan tokens [] | |
144 | - | Graphviz -> output_string out_chan (ENIAMsubsyntaxGraphOf.token_list tokens ^ "\n\n"))*); | |
145 | - flush out_chan; | |
146 | - main_loop sub_in sub_out in_chan out_chan) | |
118 | + ENIAMvisualization.print_html_text !output_dir "parsed_text" text !img !verbosity tokens); | |
119 | + prerr_endline "Done!"; | |
120 | + main_loop sub_in sub_out) | |
147 | 121 | |
148 | 122 | let get_sock_addr host_name port = |
149 | 123 | let he = Unix.gethostbyname host_name in |
... | ... | @@ -160,7 +134,4 @@ let _ = |
160 | 134 | if !subsyntax_built_in then stdin,stdout |
161 | 135 | else Unix.open_connection (get_sock_addr !subsyntax_host !subsyntax_port) in |
162 | 136 | prerr_endline "Ready!"; |
163 | - (*if !comm_stdio then*) main_loop sub_in sub_out stdin stdout | |
164 | - (*else | |
165 | - let sockaddr = Unix.ADDR_INET(Unix.inet_addr_any,!port) in | |
166 | - Unix.establish_server main_loop sockaddr*) | |
137 | + main_loop sub_in sub_out | |
... | ... |