Commit 57d87407db4229d6ad5ff7907ff0b5ff89dd86b6
1 parent
bc4868ee
Poprawki w lexSemantics
Showing
3 changed files
with
4 additions
and
177 deletions
exec/domparser.ml deleted
1 | -(* | ||
2 | - * ENIAMexec implements ENIAM processing stream | ||
3 | - * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | ||
4 | - * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | ||
5 | - * | ||
6 | - * This library is free software: you can redistribute it and/or modify | ||
7 | - * it under the terms of the GNU Lesser General Public License as published by | ||
8 | - * the Free Software Foundation, either version 3 of the License, or | ||
9 | - * (at your option) any later version. | ||
10 | - * | ||
11 | - * This library is distributed in the hope that it will be useful, | ||
12 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | - * GNU Lesser General Public License for more details. | ||
15 | - * | ||
16 | - * You should have received a copy of the GNU Lesser General Public License | ||
17 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
18 | - *) | ||
19 | - | ||
20 | -open ENIAMsubsyntaxTypes | ||
21 | -open Xstd | ||
22 | - | ||
23 | -let rules = ENIAM_LCGlexicon.make_rules false ENIAM_LCGlexiconTypes.user_lexicon_filename | ||
24 | -let dep_rules = ENIAM_LCGlexicon.make_rules true ENIAM_LCGlexiconTypes.user_lexicon_filename | ||
25 | - | ||
26 | -type output = (*Text | Xml |*) Html | Marsh (*| Graphviz*) | ||
27 | - | ||
28 | -let output = ref Html | ||
29 | -let comm_stdio = ref true | ||
30 | -let port = ref 5439 | ||
31 | -let lexSemantics_built_in = ref true | ||
32 | -let lexSemantics_host = ref "localhost" | ||
33 | -let lexSemantics_port = ref 5739 | ||
34 | -let verbosity = ref 1 | ||
35 | -let img = ref 1 | ||
36 | -let timeout = ref 30. | ||
37 | -let select_sentence_modes_flag = ref false | ||
38 | -let select_sentences_flag = ref true | ||
39 | -let semantic_processing_flag = ref true | ||
40 | -let output_dir = ref "results/" | ||
41 | -let spec_list = [ | ||
42 | - "-i", Arg.Unit (fun () -> comm_stdio:=true), "Communication using stdio (default)"; | ||
43 | - "-p", Arg.Int (fun p -> comm_stdio:=false; port:=p), "<port> Communication using sockets on given port number"; | ||
44 | - (*"-t", Arg.Unit (fun () -> output:=Text), "Output as plain text (default)"; | ||
45 | - "-x", Arg.Unit (fun () -> output:=Xml), "Output as XML";*) | ||
46 | - "-m", Arg.Unit (fun () -> output:=Marsh), "Output as marshalled Ocaml data structure"; | ||
47 | - "-h", Arg.Unit (fun () -> output:=Html), "Output as HTML (default)"; | ||
48 | - (*"-g", Arg.Unit (fun () -> output:=Graphviz; sentence_split:=false), "Output as graphviz dot file; turns sentence split off";*) | ||
49 | - (* "-r", Arg.String (fun p -> | ||
50 | - ENIAMtokenizerTypes.set_resource_path p; | ||
51 | - ENIAMmorphologyTypes.set_resource_path p; | ||
52 | - ENIAMsubsyntaxTypes.set_resource_path p), "<path> Set resource path"; *) | ||
53 | - "-b", Arg.Unit (fun () -> lexSemantics_built_in:=true), "Use built in version of ENIAMlexSemantics (default)"; | ||
54 | - "--port", Arg.Int (fun p -> lexSemantics_built_in:=false; lexSemantics_port:=p), "<port> Connect to ENIAMlexSemantics on a given port"; | ||
55 | - "--host", Arg.String (fun s -> lexSemantics_built_in:=false; lexSemantics_host:=s), "<hostname> Connect to ENIAMlexSemantics on a given host (by default localhost)"; | ||
56 | - "--timeout", Arg.Float (fun x -> timeout:=x), "<seconds> Sets timeout value for parser (default 30 seconds)"; | ||
57 | - "-v", Arg.Int (fun v -> verbosity:=v), "<val> Sets verbosity level of parser\n 0 - print only status information\n 1 - print data relevant to the status of a given sentence (default)\n 2 - print all data structures"; | ||
58 | - "--img", Arg.Int (fun v -> img:=v), "<val> Selects which images are included in output html page \n 0 - no images included\n 1 - simple dependency trees included (default)\n 2 - dependency trees included"; | ||
59 | - "--output", Arg.String (fun s -> output_dir:=s), "<dir> Sets output directory (by default results/)"; | ||
60 | - "--sel_modes", Arg.Unit (fun () -> select_sentence_modes_flag:=true), "Select sencence modes"; | ||
61 | - "--no_sel_modes", Arg.Unit (fun () -> select_sentence_modes_flag:=false), "Do not select sencence modes (default)"; | ||
62 | - "--sel_sent", Arg.Unit (fun () -> select_sentences_flag:=true), "Select parsed sentences (default)"; | ||
63 | - "--no_sel_sent", Arg.Unit (fun () -> select_sentences_flag:=false), "Do not select parsed sentences"; | ||
64 | - "--sem", Arg.Unit (fun () -> semantic_processing_flag:=true), "Perform semantic processing (default)"; | ||
65 | - "--no_sem", Arg.Unit (fun () -> semantic_processing_flag:=false), "Do not perforf semantic processing"; | ||
66 | - ] | ||
67 | - | ||
68 | -let usage_msg = | ||
69 | - "Usage: domparser <options>\nInput is a sequence of lines. Empty line ends the sequence and invoke parsing. Double empty line shutdown parser.\nOptions are:" | ||
70 | - | ||
71 | -let message = "ENIAM_LCGparser, semantic parser for Logical Categorial Grammar formalism\n\ | ||
72 | -Copyright (C) 2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>\n\ | ||
73 | -Copyright (C) 2017 Institute of Computer Science Polish Academy of Sciences" | ||
74 | - | ||
75 | -let anon_fun s = raise (Arg.Bad ("invalid argument: " ^ s)) | ||
76 | - | ||
77 | -let input_text channel = | ||
78 | - let s = ref (try input_line channel with End_of_file -> "") in | ||
79 | - let lines = ref [] in | ||
80 | - while !s <> "" do | ||
81 | - lines := !s :: !lines; | ||
82 | - s := try input_line channel with End_of_file -> "" | ||
83 | - done; | ||
84 | - String.concat "\n" (List.rev !lines) | ||
85 | - | ||
86 | -let rec main_loop sub_in sub_out in_chan out_chan = | ||
87 | - let text = input_text in_chan in | ||
88 | - if text = "" then () else ( | ||
89 | - let text,tokens,lex_sems,msg = | ||
90 | - if !lexSemantics_built_in then | ||
91 | - let text,tokens,msg = ENIAMsubsyntax.catch_parse_text text in | ||
92 | - let lex_sems,msg = | ||
93 | - if msg <> "" then ExtArray.make 0 ENIAMlexSemanticsTypes.empty_lex_sem, msg | ||
94 | - else ENIAMdomainLexSemantics.catch_assign2 tokens text in | ||
95 | - text,tokens,lex_sems,msg else ( | ||
96 | - Printf.fprintf sub_out "%s\n\n%!" text; | ||
97 | - (Marshal.from_channel sub_in : ENIAMsubsyntaxTypes.text * ENIAMtokenizerTypes.token_env ExtArray.t * ENIAMlexSemanticsTypes.lex_sem ExtArray.t * string)) in | ||
98 | - if msg <> "" then | ||
99 | - (match !output with | ||
100 | - | Html -> Printf.fprintf out_chan "%s\n%!" msg | ||
101 | - | Marsh -> Marshal.to_channel out_chan (text,tokens,lex_sems,msg) []; flush out_chan) else ( | ||
102 | - let text = ENIAMexec.translate_text text in | ||
103 | - let text = ENIAMexec.parse !timeout !verbosity rules dep_rules tokens lex_sems text in | ||
104 | - let text = if !select_sentence_modes_flag then ENIAMselectSent.select_sentence_modes_text text else text in | ||
105 | - let text = if !select_sentences_flag then ENIAMselectSent.select_sentences_text ENIAMexecTypes.Struct text else text in | ||
106 | - let text = if !semantic_processing_flag then ENIAMexec.semantic_processing !verbosity tokens lex_sems text else text in | ||
107 | - (match !output with | ||
108 | - | Html -> ENIAMvisualization.print_html_text !output_dir "parsed_text" text !img !verbosity tokens | ||
109 | - | Marsh -> Marshal.to_channel out_chan (text,tokens,lex_sems,msg) []; flush out_chan)); | ||
110 | - prerr_endline "Done!"; | ||
111 | - main_loop sub_in sub_out in_chan out_chan) | ||
112 | - | ||
113 | -let get_sock_addr host_name port = | ||
114 | - let he = Unix.gethostbyname host_name in | ||
115 | - let addr = he.Unix.h_addr_list in | ||
116 | - Unix.ADDR_INET(addr.(0),port) | ||
117 | - | ||
118 | -let _ = | ||
119 | - prerr_endline message; | ||
120 | - ENIAMcategoriesPL.initialize (); | ||
121 | - ENIAMsemLexicon.initialize (); | ||
122 | - Arg.parse spec_list anon_fun usage_msg; | ||
123 | - if !lexSemantics_built_in then ENIAMlexSemantics.initialize (); | ||
124 | - Gc.compact (); | ||
125 | - let sub_in,sub_out = | ||
126 | - if !lexSemantics_built_in then stdin,stdout | ||
127 | - else Unix.open_connection (get_sock_addr !lexSemantics_host !lexSemantics_port) in | ||
128 | - prerr_endline "Ready!"; | ||
129 | - if !comm_stdio then main_loop sub_in sub_out stdin stdout | ||
130 | - else | ||
131 | - let sockaddr = Unix.ADDR_INET(Unix.inet_addr_any,!port) in | ||
132 | - Unix.establish_server (main_loop sub_in sub_out) sockaddr | ||
133 | - | ||
134 | -let examples = [ | ||
135 | - (* "Szpak","Szpak śpiewa."; *) | ||
136 | - (* "miał","Miałem miał."; *) | ||
137 | -(* "Ala","Ala ma kota."; | ||
138 | - "Ale","Ale mają kota:"; *) | ||
139 | - (* "zima","Szpak frunie zimą.";*) | ||
140 | - (* "październik","Kot miauczy w październiku."; *) | ||
141 | -(* "Szpak-Kot","Szpak frunie. Kot miauczy."; | ||
142 | - "powiedział","Szpak powiedział: „Frunę. Kiszę.”";*) | ||
143 | - (* "teraz","Teraz frunie jakiś szpak."; | ||
144 | - "chłopcy","Chłopcy mają ulicę kwiatami."; *) | ||
145 | - (* "arabia","Arabia Saudyjska biegnie.";*) | ||
146 | -(* "Tom","Tom idzie."; *) | ||
147 | - (* "liceum","W 1984-89 uczęszczał do VII Liceum Ogólnokształcącego im. K.K. Baczyńskiego w Szczecinie."; | ||
148 | - "studia","Następnie studiował architekturę na Politechnice Szczecińskiej, dyplom uzyskał w 1994."; *) | ||
149 | - (* "przez_nią","Frunę przez nią."; *) | ||
150 | - (* "o_nie","Witold frasuje się o nie."; *) | ||
151 | - (* "or1","- Frunę."; *) | ||
152 | - (* "or2","- Frunę - powiedział szpak."; *) | ||
153 | - (*"or3","- Frunę! - powiedział szpak.";*) | ||
154 | -] | ||
155 | -(* | ||
156 | -let _ = | ||
157 | - ENIAMsubsyntax.initialize (); | ||
158 | - ENIAMcategoriesPL.initialize (); | ||
159 | - ENIAMwalParser.initialize (); | ||
160 | - ENIAMwalReduce.initialize (); | ||
161 | - Xlist.iter examples (fun (name,example) -> | ||
162 | - let text,tokens,msg = ENIAMsubsyntax.catch_parse_text example in | ||
163 | - if msg <> "" then print_endline msg else ( | ||
164 | - let lex_sems = ENIAMlexSemantics.assign tokens text in | ||
165 | - let text = ENIAMexec.translate_text text in | ||
166 | - let text = ENIAMexec.parse 30. !verbosity rules tokens lex_sems text in | ||
167 | - (* let text = ENIAMselectSent.select_sentence_modes_text text in *) | ||
168 | - let text = ENIAMselectSent.select_sentences_text ENIAMexecTypes.Struct text in | ||
169 | - ENIAMvisualization.print_html_text "results/" "parsed_text" text !img !verbosity tokens)) | ||
170 | - *) |
exec/makefile
@@ -6,7 +6,7 @@ OCAMLFLAGS=$(INCLUDES) -g | @@ -6,7 +6,7 @@ OCAMLFLAGS=$(INCLUDES) -g | ||
6 | OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa \ | 6 | OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa \ |
7 | eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-integration.cmxa \ | 7 | eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-integration.cmxa \ |
8 | eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa \ | 8 | eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa \ |
9 | - eniam-lexSemantics.cmxa eniam-semantics.cmxa #eniam-exec.cmxa | 9 | + eniam-lexSemantics.cmxa eniam-semantics.cmxa eniam-exec.cmxa |
10 | INSTALLDIR=`ocamlc -where`/eniam | 10 | INSTALLDIR=`ocamlc -where`/eniam |
11 | 11 | ||
12 | SOURCES= ENIAMexecTypes.ml ENIAMexec.ml ENIAMselectSent.ml ENIAMvisualization.ml | 12 | SOURCES= ENIAMexecTypes.ml ENIAMexec.ml ENIAMselectSent.ml ENIAMvisualization.ml |
@@ -31,13 +31,10 @@ eniam-exec.cma: $(SOURCES) | @@ -31,13 +31,10 @@ eniam-exec.cma: $(SOURCES) | ||
31 | eniam-exec.cmxa: $(SOURCES) | 31 | eniam-exec.cmxa: $(SOURCES) |
32 | ocamlopt -linkall -a -o eniam-exec.cmxa $(INCLUDES) $^ | 32 | ocamlopt -linkall -a -o eniam-exec.cmxa $(INCLUDES) $^ |
33 | 33 | ||
34 | -parser: $(SOURCES) parser.ml | 34 | +parser: parser.ml |
35 | $(OCAMLOPT) -o parser $(OCAMLOPTFLAGS) $^ | 35 | $(OCAMLOPT) -o parser $(OCAMLOPTFLAGS) $^ |
36 | 36 | ||
37 | -domparser: $(SOURCES) domparser.ml | ||
38 | - $(OCAMLOPT) -o domparser $(OCAMLOPTFLAGS) eniam-domainLexSemantics.cmxa $^ | ||
39 | - | ||
40 | -semparser: $(SOURCES) semparser.ml | 37 | +semparser: semparser.ml |
41 | mkdir -p results | 38 | mkdir -p results |
42 | $(OCAMLOPT) -o semparser $(OCAMLOPTFLAGS) $^ | 39 | $(OCAMLOPT) -o semparser $(OCAMLOPTFLAGS) $^ |
43 | 40 |
lexSemantics/ENIAMwalRenderer.ml
@@ -257,7 +257,7 @@ let render_morf_cat cats = function | @@ -257,7 +257,7 @@ let render_morf_cat cats = function | ||
257 | 257 | ||
258 | let extract_sel_prefs sel_prefs = | 258 | let extract_sel_prefs sel_prefs = |
259 | Xlist.map sel_prefs (function | 259 | Xlist.map sel_prefs (function |
260 | - Predef s -> s | 260 | + SynsetName s -> s |
261 | | _ -> failwith "extract_sel_prefs") | 261 | | _ -> failwith "extract_sel_prefs") |
262 | 262 | ||
263 | let render_schema schema = | 263 | let render_schema schema = |