Commit 57d87407db4229d6ad5ff7907ff0b5ff89dd86b6

Authored by Wojciech Jaworski
1 parent bc4868ee

Poprawki w lexSemantics

exec/domparser.ml deleted
1 -(*  
2 - * ENIAMexec implements ENIAM processing stream  
3 - * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>  
4 - * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences  
5 - *  
6 - * This library is free software: you can redistribute it and/or modify  
7 - * it under the terms of the GNU Lesser General Public License as published by  
8 - * the Free Software Foundation, either version 3 of the License, or  
9 - * (at your option) any later version.  
10 - *  
11 - * This library is distributed in the hope that it will be useful,  
12 - * but WITHOUT ANY WARRANTY; without even the implied warranty of  
13 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  
14 - * GNU Lesser General Public License for more details.  
15 - *  
16 - * You should have received a copy of the GNU Lesser General Public License  
17 - * along with this program. If not, see <http://www.gnu.org/licenses/>.  
18 - *)  
19 -  
20 -open ENIAMsubsyntaxTypes  
21 -open Xstd  
22 -  
23 -let rules = ENIAM_LCGlexicon.make_rules false ENIAM_LCGlexiconTypes.user_lexicon_filename  
24 -let dep_rules = ENIAM_LCGlexicon.make_rules true ENIAM_LCGlexiconTypes.user_lexicon_filename  
25 -  
26 -type output = (*Text | Xml |*) Html | Marsh (*| Graphviz*)  
27 -  
28 -let output = ref Html  
29 -let comm_stdio = ref true  
30 -let port = ref 5439  
31 -let lexSemantics_built_in = ref true  
32 -let lexSemantics_host = ref "localhost"  
33 -let lexSemantics_port = ref 5739  
34 -let verbosity = ref 1  
35 -let img = ref 1  
36 -let timeout = ref 30.  
37 -let select_sentence_modes_flag = ref false  
38 -let select_sentences_flag = ref true  
39 -let semantic_processing_flag = ref true  
40 -let output_dir = ref "results/"  
41 -let spec_list = [  
42 - "-i", Arg.Unit (fun () -> comm_stdio:=true), "Communication using stdio (default)";  
43 - "-p", Arg.Int (fun p -> comm_stdio:=false; port:=p), "<port> Communication using sockets on given port number";  
44 - (*"-t", Arg.Unit (fun () -> output:=Text), "Output as plain text (default)";  
45 - "-x", Arg.Unit (fun () -> output:=Xml), "Output as XML";*)  
46 - "-m", Arg.Unit (fun () -> output:=Marsh), "Output as marshalled Ocaml data structure";  
47 - "-h", Arg.Unit (fun () -> output:=Html), "Output as HTML (default)";  
48 - (*"-g", Arg.Unit (fun () -> output:=Graphviz; sentence_split:=false), "Output as graphviz dot file; turns sentence split off";*)  
49 - (* "-r", Arg.String (fun p ->  
50 - ENIAMtokenizerTypes.set_resource_path p;  
51 - ENIAMmorphologyTypes.set_resource_path p;  
52 - ENIAMsubsyntaxTypes.set_resource_path p), "<path> Set resource path"; *)  
53 - "-b", Arg.Unit (fun () -> lexSemantics_built_in:=true), "Use built in version of ENIAMlexSemantics (default)";  
54 - "--port", Arg.Int (fun p -> lexSemantics_built_in:=false; lexSemantics_port:=p), "<port> Connect to ENIAMlexSemantics on a given port";  
55 - "--host", Arg.String (fun s -> lexSemantics_built_in:=false; lexSemantics_host:=s), "<hostname> Connect to ENIAMlexSemantics on a given host (by default localhost)";  
56 - "--timeout", Arg.Float (fun x -> timeout:=x), "<seconds> Sets timeout value for parser (default 30 seconds)";  
57 - "-v", Arg.Int (fun v -> verbosity:=v), "<val> Sets verbosity level of parser\n 0 - print only status information\n 1 - print data relevant to the status of a given sentence (default)\n 2 - print all data structures";  
58 - "--img", Arg.Int (fun v -> img:=v), "<val> Selects which images are included in output html page \n 0 - no images included\n 1 - simple dependency trees included (default)\n 2 - dependency trees included";  
59 - "--output", Arg.String (fun s -> output_dir:=s), "<dir> Sets output directory (by default results/)";  
60 - "--sel_modes", Arg.Unit (fun () -> select_sentence_modes_flag:=true), "Select sencence modes";  
61 - "--no_sel_modes", Arg.Unit (fun () -> select_sentence_modes_flag:=false), "Do not select sencence modes (default)";  
62 - "--sel_sent", Arg.Unit (fun () -> select_sentences_flag:=true), "Select parsed sentences (default)";  
63 - "--no_sel_sent", Arg.Unit (fun () -> select_sentences_flag:=false), "Do not select parsed sentences";  
64 - "--sem", Arg.Unit (fun () -> semantic_processing_flag:=true), "Perform semantic processing (default)";  
65 - "--no_sem", Arg.Unit (fun () -> semantic_processing_flag:=false), "Do not perforf semantic processing";  
66 - ]  
67 -  
68 -let usage_msg =  
69 - "Usage: domparser <options>\nInput is a sequence of lines. Empty line ends the sequence and invoke parsing. Double empty line shutdown parser.\nOptions are:"  
70 -  
71 -let message = "ENIAM_LCGparser, semantic parser for Logical Categorial Grammar formalism\n\  
72 -Copyright (C) 2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>\n\  
73 -Copyright (C) 2017 Institute of Computer Science Polish Academy of Sciences"  
74 -  
75 -let anon_fun s = raise (Arg.Bad ("invalid argument: " ^ s))  
76 -  
77 -let input_text channel =  
78 - let s = ref (try input_line channel with End_of_file -> "") in  
79 - let lines = ref [] in  
80 - while !s <> "" do  
81 - lines := !s :: !lines;  
82 - s := try input_line channel with End_of_file -> ""  
83 - done;  
84 - String.concat "\n" (List.rev !lines)  
85 -  
86 -let rec main_loop sub_in sub_out in_chan out_chan =  
87 - let text = input_text in_chan in  
88 - if text = "" then () else (  
89 - let text,tokens,lex_sems,msg =  
90 - if !lexSemantics_built_in then  
91 - let text,tokens,msg = ENIAMsubsyntax.catch_parse_text text in  
92 - let lex_sems,msg =  
93 - if msg <> "" then ExtArray.make 0 ENIAMlexSemanticsTypes.empty_lex_sem, msg  
94 - else ENIAMdomainLexSemantics.catch_assign2 tokens text in  
95 - text,tokens,lex_sems,msg else (  
96 - Printf.fprintf sub_out "%s\n\n%!" text;  
97 - (Marshal.from_channel sub_in : ENIAMsubsyntaxTypes.text * ENIAMtokenizerTypes.token_env ExtArray.t * ENIAMlexSemanticsTypes.lex_sem ExtArray.t * string)) in  
98 - if msg <> "" then  
99 - (match !output with  
100 - | Html -> Printf.fprintf out_chan "%s\n%!" msg  
101 - | Marsh -> Marshal.to_channel out_chan (text,tokens,lex_sems,msg) []; flush out_chan) else (  
102 - let text = ENIAMexec.translate_text text in  
103 - let text = ENIAMexec.parse !timeout !verbosity rules dep_rules tokens lex_sems text in  
104 - let text = if !select_sentence_modes_flag then ENIAMselectSent.select_sentence_modes_text text else text in  
105 - let text = if !select_sentences_flag then ENIAMselectSent.select_sentences_text ENIAMexecTypes.Struct text else text in  
106 - let text = if !semantic_processing_flag then ENIAMexec.semantic_processing !verbosity tokens lex_sems text else text in  
107 - (match !output with  
108 - | Html -> ENIAMvisualization.print_html_text !output_dir "parsed_text" text !img !verbosity tokens  
109 - | Marsh -> Marshal.to_channel out_chan (text,tokens,lex_sems,msg) []; flush out_chan));  
110 - prerr_endline "Done!";  
111 - main_loop sub_in sub_out in_chan out_chan)  
112 -  
113 -let get_sock_addr host_name port =  
114 - let he = Unix.gethostbyname host_name in  
115 - let addr = he.Unix.h_addr_list in  
116 - Unix.ADDR_INET(addr.(0),port)  
117 -  
118 -let _ =  
119 - prerr_endline message;  
120 - ENIAMcategoriesPL.initialize ();  
121 - ENIAMsemLexicon.initialize ();  
122 - Arg.parse spec_list anon_fun usage_msg;  
123 - if !lexSemantics_built_in then ENIAMlexSemantics.initialize ();  
124 - Gc.compact ();  
125 - let sub_in,sub_out =  
126 - if !lexSemantics_built_in then stdin,stdout  
127 - else Unix.open_connection (get_sock_addr !lexSemantics_host !lexSemantics_port) in  
128 - prerr_endline "Ready!";  
129 - if !comm_stdio then main_loop sub_in sub_out stdin stdout  
130 - else  
131 - let sockaddr = Unix.ADDR_INET(Unix.inet_addr_any,!port) in  
132 - Unix.establish_server (main_loop sub_in sub_out) sockaddr  
133 -  
134 -let examples = [  
135 - (* "Szpak","Szpak śpiewa."; *)  
136 - (* "miał","Miałem miał."; *)  
137 -(* "Ala","Ala ma kota.";  
138 - "Ale","Ale mają kota:"; *)  
139 - (* "zima","Szpak frunie zimą.";*)  
140 - (* "październik","Kot miauczy w październiku."; *)  
141 -(* "Szpak-Kot","Szpak frunie. Kot miauczy.";  
142 - "powiedział","Szpak powiedział: „Frunę. Kiszę.”";*)  
143 - (* "teraz","Teraz frunie jakiś szpak.";  
144 - "chłopcy","Chłopcy mają ulicę kwiatami."; *)  
145 - (* "arabia","Arabia Saudyjska biegnie.";*)  
146 -(* "Tom","Tom idzie."; *)  
147 - (* "liceum","W 1984-89 uczęszczał do VII Liceum Ogólnokształcącego im. K.K. Baczyńskiego w Szczecinie.";  
148 - "studia","Następnie studiował architekturę na Politechnice Szczecińskiej, dyplom uzyskał w 1994."; *)  
149 - (* "przez_nią","Frunę przez nią."; *)  
150 - (* "o_nie","Witold frasuje się o nie."; *)  
151 - (* "or1","- Frunę."; *)  
152 - (* "or2","- Frunę - powiedział szpak."; *)  
153 - (*"or3","- Frunę! - powiedział szpak.";*)  
154 -]  
155 -(*  
156 -let _ =  
157 - ENIAMsubsyntax.initialize ();  
158 - ENIAMcategoriesPL.initialize ();  
159 - ENIAMwalParser.initialize ();  
160 - ENIAMwalReduce.initialize ();  
161 - Xlist.iter examples (fun (name,example) ->  
162 - let text,tokens,msg = ENIAMsubsyntax.catch_parse_text example in  
163 - if msg <> "" then print_endline msg else (  
164 - let lex_sems = ENIAMlexSemantics.assign tokens text in  
165 - let text = ENIAMexec.translate_text text in  
166 - let text = ENIAMexec.parse 30. !verbosity rules tokens lex_sems text in  
167 - (* let text = ENIAMselectSent.select_sentence_modes_text text in *)  
168 - let text = ENIAMselectSent.select_sentences_text ENIAMexecTypes.Struct text in  
169 - ENIAMvisualization.print_html_text "results/" "parsed_text" text !img !verbosity tokens))  
170 - *)  
exec/makefile
@@ -6,7 +6,7 @@ OCAMLFLAGS=$(INCLUDES) -g @@ -6,7 +6,7 @@ OCAMLFLAGS=$(INCLUDES) -g
6 OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa \ 6 OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa \
7 eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-integration.cmxa \ 7 eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-integration.cmxa \
8 eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa \ 8 eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa \
9 - eniam-lexSemantics.cmxa eniam-semantics.cmxa #eniam-exec.cmxa 9 + eniam-lexSemantics.cmxa eniam-semantics.cmxa eniam-exec.cmxa
10 INSTALLDIR=`ocamlc -where`/eniam 10 INSTALLDIR=`ocamlc -where`/eniam
11 11
12 SOURCES= ENIAMexecTypes.ml ENIAMexec.ml ENIAMselectSent.ml ENIAMvisualization.ml 12 SOURCES= ENIAMexecTypes.ml ENIAMexec.ml ENIAMselectSent.ml ENIAMvisualization.ml
@@ -31,13 +31,10 @@ eniam-exec.cma: $(SOURCES) @@ -31,13 +31,10 @@ eniam-exec.cma: $(SOURCES)
31 eniam-exec.cmxa: $(SOURCES) 31 eniam-exec.cmxa: $(SOURCES)
32 ocamlopt -linkall -a -o eniam-exec.cmxa $(INCLUDES) $^ 32 ocamlopt -linkall -a -o eniam-exec.cmxa $(INCLUDES) $^
33 33
34 -parser: $(SOURCES) parser.ml 34 +parser: parser.ml
35 $(OCAMLOPT) -o parser $(OCAMLOPTFLAGS) $^ 35 $(OCAMLOPT) -o parser $(OCAMLOPTFLAGS) $^
36 36
37 -domparser: $(SOURCES) domparser.ml  
38 - $(OCAMLOPT) -o domparser $(OCAMLOPTFLAGS) eniam-domainLexSemantics.cmxa $^  
39 -  
40 -semparser: $(SOURCES) semparser.ml 37 +semparser: semparser.ml
41 mkdir -p results 38 mkdir -p results
42 $(OCAMLOPT) -o semparser $(OCAMLOPTFLAGS) $^ 39 $(OCAMLOPT) -o semparser $(OCAMLOPTFLAGS) $^
43 40
lexSemantics/ENIAMwalRenderer.ml
@@ -257,7 +257,7 @@ let render_morf_cat cats = function @@ -257,7 +257,7 @@ let render_morf_cat cats = function
257 257
258 let extract_sel_prefs sel_prefs = 258 let extract_sel_prefs sel_prefs =
259 Xlist.map sel_prefs (function 259 Xlist.map sel_prefs (function
260 - Predef s -> s 260 + SynsetName s -> s
261 | _ -> failwith "extract_sel_prefs") 261 | _ -> failwith "extract_sel_prefs")
262 262
263 let render_schema schema = 263 let render_schema schema =