Commit 57d87407db4229d6ad5ff7907ff0b5ff89dd86b6
1 parent
bc4868ee
Poprawki w lexSemantics
Showing
3 changed files
with
4 additions
and
177 deletions
exec/domparser.ml deleted
1 | -(* | |
2 | - * ENIAMexec implements ENIAM processing stream | |
3 | - * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | - * | |
6 | - * This library is free software: you can redistribute it and/or modify | |
7 | - * it under the terms of the GNU Lesser General Public License as published by | |
8 | - * the Free Software Foundation, either version 3 of the License, or | |
9 | - * (at your option) any later version. | |
10 | - * | |
11 | - * This library is distributed in the hope that it will be useful, | |
12 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | - * GNU Lesser General Public License for more details. | |
15 | - * | |
16 | - * You should have received a copy of the GNU Lesser General Public License | |
17 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | - *) | |
19 | - | |
20 | -open ENIAMsubsyntaxTypes | |
21 | -open Xstd | |
22 | - | |
23 | -let rules = ENIAM_LCGlexicon.make_rules false ENIAM_LCGlexiconTypes.user_lexicon_filename | |
24 | -let dep_rules = ENIAM_LCGlexicon.make_rules true ENIAM_LCGlexiconTypes.user_lexicon_filename | |
25 | - | |
26 | -type output = (*Text | Xml |*) Html | Marsh (*| Graphviz*) | |
27 | - | |
28 | -let output = ref Html | |
29 | -let comm_stdio = ref true | |
30 | -let port = ref 5439 | |
31 | -let lexSemantics_built_in = ref true | |
32 | -let lexSemantics_host = ref "localhost" | |
33 | -let lexSemantics_port = ref 5739 | |
34 | -let verbosity = ref 1 | |
35 | -let img = ref 1 | |
36 | -let timeout = ref 30. | |
37 | -let select_sentence_modes_flag = ref false | |
38 | -let select_sentences_flag = ref true | |
39 | -let semantic_processing_flag = ref true | |
40 | -let output_dir = ref "results/" | |
41 | -let spec_list = [ | |
42 | - "-i", Arg.Unit (fun () -> comm_stdio:=true), "Communication using stdio (default)"; | |
43 | - "-p", Arg.Int (fun p -> comm_stdio:=false; port:=p), "<port> Communication using sockets on given port number"; | |
44 | - (*"-t", Arg.Unit (fun () -> output:=Text), "Output as plain text (default)"; | |
45 | - "-x", Arg.Unit (fun () -> output:=Xml), "Output as XML";*) | |
46 | - "-m", Arg.Unit (fun () -> output:=Marsh), "Output as marshalled Ocaml data structure"; | |
47 | - "-h", Arg.Unit (fun () -> output:=Html), "Output as HTML (default)"; | |
48 | - (*"-g", Arg.Unit (fun () -> output:=Graphviz; sentence_split:=false), "Output as graphviz dot file; turns sentence split off";*) | |
49 | - (* "-r", Arg.String (fun p -> | |
50 | - ENIAMtokenizerTypes.set_resource_path p; | |
51 | - ENIAMmorphologyTypes.set_resource_path p; | |
52 | - ENIAMsubsyntaxTypes.set_resource_path p), "<path> Set resource path"; *) | |
53 | - "-b", Arg.Unit (fun () -> lexSemantics_built_in:=true), "Use built in version of ENIAMlexSemantics (default)"; | |
54 | - "--port", Arg.Int (fun p -> lexSemantics_built_in:=false; lexSemantics_port:=p), "<port> Connect to ENIAMlexSemantics on a given port"; | |
55 | - "--host", Arg.String (fun s -> lexSemantics_built_in:=false; lexSemantics_host:=s), "<hostname> Connect to ENIAMlexSemantics on a given host (by default localhost)"; | |
56 | - "--timeout", Arg.Float (fun x -> timeout:=x), "<seconds> Sets timeout value for parser (default 30 seconds)"; | |
57 | - "-v", Arg.Int (fun v -> verbosity:=v), "<val> Sets verbosity level of parser\n 0 - print only status information\n 1 - print data relevant to the status of a given sentence (default)\n 2 - print all data structures"; | |
58 | - "--img", Arg.Int (fun v -> img:=v), "<val> Selects which images are included in output html page \n 0 - no images included\n 1 - simple dependency trees included (default)\n 2 - dependency trees included"; | |
59 | - "--output", Arg.String (fun s -> output_dir:=s), "<dir> Sets output directory (by default results/)"; | |
60 | - "--sel_modes", Arg.Unit (fun () -> select_sentence_modes_flag:=true), "Select sencence modes"; | |
61 | - "--no_sel_modes", Arg.Unit (fun () -> select_sentence_modes_flag:=false), "Do not select sencence modes (default)"; | |
62 | - "--sel_sent", Arg.Unit (fun () -> select_sentences_flag:=true), "Select parsed sentences (default)"; | |
63 | - "--no_sel_sent", Arg.Unit (fun () -> select_sentences_flag:=false), "Do not select parsed sentences"; | |
64 | - "--sem", Arg.Unit (fun () -> semantic_processing_flag:=true), "Perform semantic processing (default)"; | |
65 | - "--no_sem", Arg.Unit (fun () -> semantic_processing_flag:=false), "Do not perforf semantic processing"; | |
66 | - ] | |
67 | - | |
68 | -let usage_msg = | |
69 | - "Usage: domparser <options>\nInput is a sequence of lines. Empty line ends the sequence and invoke parsing. Double empty line shutdown parser.\nOptions are:" | |
70 | - | |
71 | -let message = "ENIAM_LCGparser, semantic parser for Logical Categorial Grammar formalism\n\ | |
72 | -Copyright (C) 2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>\n\ | |
73 | -Copyright (C) 2017 Institute of Computer Science Polish Academy of Sciences" | |
74 | - | |
75 | -let anon_fun s = raise (Arg.Bad ("invalid argument: " ^ s)) | |
76 | - | |
77 | -let input_text channel = | |
78 | - let s = ref (try input_line channel with End_of_file -> "") in | |
79 | - let lines = ref [] in | |
80 | - while !s <> "" do | |
81 | - lines := !s :: !lines; | |
82 | - s := try input_line channel with End_of_file -> "" | |
83 | - done; | |
84 | - String.concat "\n" (List.rev !lines) | |
85 | - | |
86 | -let rec main_loop sub_in sub_out in_chan out_chan = | |
87 | - let text = input_text in_chan in | |
88 | - if text = "" then () else ( | |
89 | - let text,tokens,lex_sems,msg = | |
90 | - if !lexSemantics_built_in then | |
91 | - let text,tokens,msg = ENIAMsubsyntax.catch_parse_text text in | |
92 | - let lex_sems,msg = | |
93 | - if msg <> "" then ExtArray.make 0 ENIAMlexSemanticsTypes.empty_lex_sem, msg | |
94 | - else ENIAMdomainLexSemantics.catch_assign2 tokens text in | |
95 | - text,tokens,lex_sems,msg else ( | |
96 | - Printf.fprintf sub_out "%s\n\n%!" text; | |
97 | - (Marshal.from_channel sub_in : ENIAMsubsyntaxTypes.text * ENIAMtokenizerTypes.token_env ExtArray.t * ENIAMlexSemanticsTypes.lex_sem ExtArray.t * string)) in | |
98 | - if msg <> "" then | |
99 | - (match !output with | |
100 | - | Html -> Printf.fprintf out_chan "%s\n%!" msg | |
101 | - | Marsh -> Marshal.to_channel out_chan (text,tokens,lex_sems,msg) []; flush out_chan) else ( | |
102 | - let text = ENIAMexec.translate_text text in | |
103 | - let text = ENIAMexec.parse !timeout !verbosity rules dep_rules tokens lex_sems text in | |
104 | - let text = if !select_sentence_modes_flag then ENIAMselectSent.select_sentence_modes_text text else text in | |
105 | - let text = if !select_sentences_flag then ENIAMselectSent.select_sentences_text ENIAMexecTypes.Struct text else text in | |
106 | - let text = if !semantic_processing_flag then ENIAMexec.semantic_processing !verbosity tokens lex_sems text else text in | |
107 | - (match !output with | |
108 | - | Html -> ENIAMvisualization.print_html_text !output_dir "parsed_text" text !img !verbosity tokens | |
109 | - | Marsh -> Marshal.to_channel out_chan (text,tokens,lex_sems,msg) []; flush out_chan)); | |
110 | - prerr_endline "Done!"; | |
111 | - main_loop sub_in sub_out in_chan out_chan) | |
112 | - | |
113 | -let get_sock_addr host_name port = | |
114 | - let he = Unix.gethostbyname host_name in | |
115 | - let addr = he.Unix.h_addr_list in | |
116 | - Unix.ADDR_INET(addr.(0),port) | |
117 | - | |
118 | -let _ = | |
119 | - prerr_endline message; | |
120 | - ENIAMcategoriesPL.initialize (); | |
121 | - ENIAMsemLexicon.initialize (); | |
122 | - Arg.parse spec_list anon_fun usage_msg; | |
123 | - if !lexSemantics_built_in then ENIAMlexSemantics.initialize (); | |
124 | - Gc.compact (); | |
125 | - let sub_in,sub_out = | |
126 | - if !lexSemantics_built_in then stdin,stdout | |
127 | - else Unix.open_connection (get_sock_addr !lexSemantics_host !lexSemantics_port) in | |
128 | - prerr_endline "Ready!"; | |
129 | - if !comm_stdio then main_loop sub_in sub_out stdin stdout | |
130 | - else | |
131 | - let sockaddr = Unix.ADDR_INET(Unix.inet_addr_any,!port) in | |
132 | - Unix.establish_server (main_loop sub_in sub_out) sockaddr | |
133 | - | |
134 | -let examples = [ | |
135 | - (* "Szpak","Szpak śpiewa."; *) | |
136 | - (* "miał","Miałem miał."; *) | |
137 | -(* "Ala","Ala ma kota."; | |
138 | - "Ale","Ale mają kota:"; *) | |
139 | - (* "zima","Szpak frunie zimą.";*) | |
140 | - (* "październik","Kot miauczy w październiku."; *) | |
141 | -(* "Szpak-Kot","Szpak frunie. Kot miauczy."; | |
142 | - "powiedział","Szpak powiedział: „Frunę. Kiszę.”";*) | |
143 | - (* "teraz","Teraz frunie jakiś szpak."; | |
144 | - "chłopcy","Chłopcy mają ulicę kwiatami."; *) | |
145 | - (* "arabia","Arabia Saudyjska biegnie.";*) | |
146 | -(* "Tom","Tom idzie."; *) | |
147 | - (* "liceum","W 1984-89 uczęszczał do VII Liceum Ogólnokształcącego im. K.K. Baczyńskiego w Szczecinie."; | |
148 | - "studia","Następnie studiował architekturę na Politechnice Szczecińskiej, dyplom uzyskał w 1994."; *) | |
149 | - (* "przez_nią","Frunę przez nią."; *) | |
150 | - (* "o_nie","Witold frasuje się o nie."; *) | |
151 | - (* "or1","- Frunę."; *) | |
152 | - (* "or2","- Frunę - powiedział szpak."; *) | |
153 | - (*"or3","- Frunę! - powiedział szpak.";*) | |
154 | -] | |
155 | -(* | |
156 | -let _ = | |
157 | - ENIAMsubsyntax.initialize (); | |
158 | - ENIAMcategoriesPL.initialize (); | |
159 | - ENIAMwalParser.initialize (); | |
160 | - ENIAMwalReduce.initialize (); | |
161 | - Xlist.iter examples (fun (name,example) -> | |
162 | - let text,tokens,msg = ENIAMsubsyntax.catch_parse_text example in | |
163 | - if msg <> "" then print_endline msg else ( | |
164 | - let lex_sems = ENIAMlexSemantics.assign tokens text in | |
165 | - let text = ENIAMexec.translate_text text in | |
166 | - let text = ENIAMexec.parse 30. !verbosity rules tokens lex_sems text in | |
167 | - (* let text = ENIAMselectSent.select_sentence_modes_text text in *) | |
168 | - let text = ENIAMselectSent.select_sentences_text ENIAMexecTypes.Struct text in | |
169 | - ENIAMvisualization.print_html_text "results/" "parsed_text" text !img !verbosity tokens)) | |
170 | - *) |
exec/makefile
... | ... | @@ -6,7 +6,7 @@ OCAMLFLAGS=$(INCLUDES) -g |
6 | 6 | OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa \ |
7 | 7 | eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-integration.cmxa \ |
8 | 8 | eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa \ |
9 | - eniam-lexSemantics.cmxa eniam-semantics.cmxa #eniam-exec.cmxa | |
9 | + eniam-lexSemantics.cmxa eniam-semantics.cmxa eniam-exec.cmxa | |
10 | 10 | INSTALLDIR=`ocamlc -where`/eniam |
11 | 11 | |
12 | 12 | SOURCES= ENIAMexecTypes.ml ENIAMexec.ml ENIAMselectSent.ml ENIAMvisualization.ml |
... | ... | @@ -31,13 +31,10 @@ eniam-exec.cma: $(SOURCES) |
31 | 31 | eniam-exec.cmxa: $(SOURCES) |
32 | 32 | ocamlopt -linkall -a -o eniam-exec.cmxa $(INCLUDES) $^ |
33 | 33 | |
34 | -parser: $(SOURCES) parser.ml | |
34 | +parser: parser.ml | |
35 | 35 | $(OCAMLOPT) -o parser $(OCAMLOPTFLAGS) $^ |
36 | 36 | |
37 | -domparser: $(SOURCES) domparser.ml | |
38 | - $(OCAMLOPT) -o domparser $(OCAMLOPTFLAGS) eniam-domainLexSemantics.cmxa $^ | |
39 | - | |
40 | -semparser: $(SOURCES) semparser.ml | |
37 | +semparser: semparser.ml | |
41 | 38 | mkdir -p results |
42 | 39 | $(OCAMLOPT) -o semparser $(OCAMLOPTFLAGS) $^ |
43 | 40 | |
... | ... |
lexSemantics/ENIAMwalRenderer.ml