Commit 57d87407db4229d6ad5ff7907ff0b5ff89dd86b6

Authored by Wojciech Jaworski
1 parent bc4868ee

Poprawki w lexSemantics

exec/domparser.ml deleted
1   -(*
2   - * ENIAMexec implements ENIAM processing stream
3   - * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
5   - *
6   - * This library is free software: you can redistribute it and/or modify
7   - * it under the terms of the GNU Lesser General Public License as published by
8   - * the Free Software Foundation, either version 3 of the License, or
9   - * (at your option) any later version.
10   - *
11   - * This library is distributed in the hope that it will be useful,
12   - * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   - * GNU Lesser General Public License for more details.
15   - *
16   - * You should have received a copy of the GNU Lesser General Public License
17   - * along with this program. If not, see <http://www.gnu.org/licenses/>.
18   - *)
19   -
20   -open ENIAMsubsyntaxTypes
21   -open Xstd
22   -
23   -let rules = ENIAM_LCGlexicon.make_rules false ENIAM_LCGlexiconTypes.user_lexicon_filename
24   -let dep_rules = ENIAM_LCGlexicon.make_rules true ENIAM_LCGlexiconTypes.user_lexicon_filename
25   -
26   -type output = (*Text | Xml |*) Html | Marsh (*| Graphviz*)
27   -
28   -let output = ref Html
29   -let comm_stdio = ref true
30   -let port = ref 5439
31   -let lexSemantics_built_in = ref true
32   -let lexSemantics_host = ref "localhost"
33   -let lexSemantics_port = ref 5739
34   -let verbosity = ref 1
35   -let img = ref 1
36   -let timeout = ref 30.
37   -let select_sentence_modes_flag = ref false
38   -let select_sentences_flag = ref true
39   -let semantic_processing_flag = ref true
40   -let output_dir = ref "results/"
41   -let spec_list = [
42   - "-i", Arg.Unit (fun () -> comm_stdio:=true), "Communication using stdio (default)";
43   - "-p", Arg.Int (fun p -> comm_stdio:=false; port:=p), "<port> Communication using sockets on given port number";
44   - (*"-t", Arg.Unit (fun () -> output:=Text), "Output as plain text (default)";
45   - "-x", Arg.Unit (fun () -> output:=Xml), "Output as XML";*)
46   - "-m", Arg.Unit (fun () -> output:=Marsh), "Output as marshalled Ocaml data structure";
47   - "-h", Arg.Unit (fun () -> output:=Html), "Output as HTML (default)";
48   - (*"-g", Arg.Unit (fun () -> output:=Graphviz; sentence_split:=false), "Output as graphviz dot file; turns sentence split off";*)
49   - (* "-r", Arg.String (fun p ->
50   - ENIAMtokenizerTypes.set_resource_path p;
51   - ENIAMmorphologyTypes.set_resource_path p;
52   - ENIAMsubsyntaxTypes.set_resource_path p), "<path> Set resource path"; *)
53   - "-b", Arg.Unit (fun () -> lexSemantics_built_in:=true), "Use built in version of ENIAMlexSemantics (default)";
54   - "--port", Arg.Int (fun p -> lexSemantics_built_in:=false; lexSemantics_port:=p), "<port> Connect to ENIAMlexSemantics on a given port";
55   - "--host", Arg.String (fun s -> lexSemantics_built_in:=false; lexSemantics_host:=s), "<hostname> Connect to ENIAMlexSemantics on a given host (by default localhost)";
56   - "--timeout", Arg.Float (fun x -> timeout:=x), "<seconds> Sets timeout value for parser (default 30 seconds)";
57   - "-v", Arg.Int (fun v -> verbosity:=v), "<val> Sets verbosity level of parser\n 0 - print only status information\n 1 - print data relevant to the status of a given sentence (default)\n 2 - print all data structures";
58   - "--img", Arg.Int (fun v -> img:=v), "<val> Selects which images are included in output html page \n 0 - no images included\n 1 - simple dependency trees included (default)\n 2 - dependency trees included";
59   - "--output", Arg.String (fun s -> output_dir:=s), "<dir> Sets output directory (by default results/)";
60   - "--sel_modes", Arg.Unit (fun () -> select_sentence_modes_flag:=true), "Select sencence modes";
61   - "--no_sel_modes", Arg.Unit (fun () -> select_sentence_modes_flag:=false), "Do not select sencence modes (default)";
62   - "--sel_sent", Arg.Unit (fun () -> select_sentences_flag:=true), "Select parsed sentences (default)";
63   - "--no_sel_sent", Arg.Unit (fun () -> select_sentences_flag:=false), "Do not select parsed sentences";
64   - "--sem", Arg.Unit (fun () -> semantic_processing_flag:=true), "Perform semantic processing (default)";
65   - "--no_sem", Arg.Unit (fun () -> semantic_processing_flag:=false), "Do not perforf semantic processing";
66   - ]
67   -
68   -let usage_msg =
69   - "Usage: domparser <options>\nInput is a sequence of lines. Empty line ends the sequence and invoke parsing. Double empty line shutdown parser.\nOptions are:"
70   -
71   -let message = "ENIAM_LCGparser, semantic parser for Logical Categorial Grammar formalism\n\
72   -Copyright (C) 2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>\n\
73   -Copyright (C) 2017 Institute of Computer Science Polish Academy of Sciences"
74   -
75   -let anon_fun s = raise (Arg.Bad ("invalid argument: " ^ s))
76   -
77   -let input_text channel =
78   - let s = ref (try input_line channel with End_of_file -> "") in
79   - let lines = ref [] in
80   - while !s <> "" do
81   - lines := !s :: !lines;
82   - s := try input_line channel with End_of_file -> ""
83   - done;
84   - String.concat "\n" (List.rev !lines)
85   -
86   -let rec main_loop sub_in sub_out in_chan out_chan =
87   - let text = input_text in_chan in
88   - if text = "" then () else (
89   - let text,tokens,lex_sems,msg =
90   - if !lexSemantics_built_in then
91   - let text,tokens,msg = ENIAMsubsyntax.catch_parse_text text in
92   - let lex_sems,msg =
93   - if msg <> "" then ExtArray.make 0 ENIAMlexSemanticsTypes.empty_lex_sem, msg
94   - else ENIAMdomainLexSemantics.catch_assign2 tokens text in
95   - text,tokens,lex_sems,msg else (
96   - Printf.fprintf sub_out "%s\n\n%!" text;
97   - (Marshal.from_channel sub_in : ENIAMsubsyntaxTypes.text * ENIAMtokenizerTypes.token_env ExtArray.t * ENIAMlexSemanticsTypes.lex_sem ExtArray.t * string)) in
98   - if msg <> "" then
99   - (match !output with
100   - | Html -> Printf.fprintf out_chan "%s\n%!" msg
101   - | Marsh -> Marshal.to_channel out_chan (text,tokens,lex_sems,msg) []; flush out_chan) else (
102   - let text = ENIAMexec.translate_text text in
103   - let text = ENIAMexec.parse !timeout !verbosity rules dep_rules tokens lex_sems text in
104   - let text = if !select_sentence_modes_flag then ENIAMselectSent.select_sentence_modes_text text else text in
105   - let text = if !select_sentences_flag then ENIAMselectSent.select_sentences_text ENIAMexecTypes.Struct text else text in
106   - let text = if !semantic_processing_flag then ENIAMexec.semantic_processing !verbosity tokens lex_sems text else text in
107   - (match !output with
108   - | Html -> ENIAMvisualization.print_html_text !output_dir "parsed_text" text !img !verbosity tokens
109   - | Marsh -> Marshal.to_channel out_chan (text,tokens,lex_sems,msg) []; flush out_chan));
110   - prerr_endline "Done!";
111   - main_loop sub_in sub_out in_chan out_chan)
112   -
113   -let get_sock_addr host_name port =
114   - let he = Unix.gethostbyname host_name in
115   - let addr = he.Unix.h_addr_list in
116   - Unix.ADDR_INET(addr.(0),port)
117   -
118   -let _ =
119   - prerr_endline message;
120   - ENIAMcategoriesPL.initialize ();
121   - ENIAMsemLexicon.initialize ();
122   - Arg.parse spec_list anon_fun usage_msg;
123   - if !lexSemantics_built_in then ENIAMlexSemantics.initialize ();
124   - Gc.compact ();
125   - let sub_in,sub_out =
126   - if !lexSemantics_built_in then stdin,stdout
127   - else Unix.open_connection (get_sock_addr !lexSemantics_host !lexSemantics_port) in
128   - prerr_endline "Ready!";
129   - if !comm_stdio then main_loop sub_in sub_out stdin stdout
130   - else
131   - let sockaddr = Unix.ADDR_INET(Unix.inet_addr_any,!port) in
132   - Unix.establish_server (main_loop sub_in sub_out) sockaddr
133   -
134   -let examples = [
135   - (* "Szpak","Szpak śpiewa."; *)
136   - (* "miał","Miałem miał."; *)
137   -(* "Ala","Ala ma kota.";
138   - "Ale","Ale mają kota:"; *)
139   - (* "zima","Szpak frunie zimą.";*)
140   - (* "październik","Kot miauczy w październiku."; *)
141   -(* "Szpak-Kot","Szpak frunie. Kot miauczy.";
142   - "powiedział","Szpak powiedział: „Frunę. Kiszę.”";*)
143   - (* "teraz","Teraz frunie jakiś szpak.";
144   - "chłopcy","Chłopcy mają ulicę kwiatami."; *)
145   - (* "arabia","Arabia Saudyjska biegnie.";*)
146   -(* "Tom","Tom idzie."; *)
147   - (* "liceum","W 1984-89 uczęszczał do VII Liceum Ogólnokształcącego im. K.K. Baczyńskiego w Szczecinie.";
148   - "studia","Następnie studiował architekturę na Politechnice Szczecińskiej, dyplom uzyskał w 1994."; *)
149   - (* "przez_nią","Frunę przez nią."; *)
150   - (* "o_nie","Witold frasuje się o nie."; *)
151   - (* "or1","- Frunę."; *)
152   - (* "or2","- Frunę - powiedział szpak."; *)
153   - (*"or3","- Frunę! - powiedział szpak.";*)
154   -]
155   -(*
156   -let _ =
157   - ENIAMsubsyntax.initialize ();
158   - ENIAMcategoriesPL.initialize ();
159   - ENIAMwalParser.initialize ();
160   - ENIAMwalReduce.initialize ();
161   - Xlist.iter examples (fun (name,example) ->
162   - let text,tokens,msg = ENIAMsubsyntax.catch_parse_text example in
163   - if msg <> "" then print_endline msg else (
164   - let lex_sems = ENIAMlexSemantics.assign tokens text in
165   - let text = ENIAMexec.translate_text text in
166   - let text = ENIAMexec.parse 30. !verbosity rules tokens lex_sems text in
167   - (* let text = ENIAMselectSent.select_sentence_modes_text text in *)
168   - let text = ENIAMselectSent.select_sentences_text ENIAMexecTypes.Struct text in
169   - ENIAMvisualization.print_html_text "results/" "parsed_text" text !img !verbosity tokens))
170   - *)
exec/makefile
... ... @@ -6,7 +6,7 @@ OCAMLFLAGS=$(INCLUDES) -g
6 6 OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa \
7 7 eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-integration.cmxa \
8 8 eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa \
9   - eniam-lexSemantics.cmxa eniam-semantics.cmxa #eniam-exec.cmxa
  9 + eniam-lexSemantics.cmxa eniam-semantics.cmxa eniam-exec.cmxa
10 10 INSTALLDIR=`ocamlc -where`/eniam
11 11  
12 12 SOURCES= ENIAMexecTypes.ml ENIAMexec.ml ENIAMselectSent.ml ENIAMvisualization.ml
... ... @@ -31,13 +31,10 @@ eniam-exec.cma: $(SOURCES)
31 31 eniam-exec.cmxa: $(SOURCES)
32 32 ocamlopt -linkall -a -o eniam-exec.cmxa $(INCLUDES) $^
33 33  
34   -parser: $(SOURCES) parser.ml
  34 +parser: parser.ml
35 35 $(OCAMLOPT) -o parser $(OCAMLOPTFLAGS) $^
36 36  
37   -domparser: $(SOURCES) domparser.ml
38   - $(OCAMLOPT) -o domparser $(OCAMLOPTFLAGS) eniam-domainLexSemantics.cmxa $^
39   -
40   -semparser: $(SOURCES) semparser.ml
  37 +semparser: semparser.ml
41 38 mkdir -p results
42 39 $(OCAMLOPT) -o semparser $(OCAMLOPTFLAGS) $^
43 40  
... ...
lexSemantics/ENIAMwalRenderer.ml
... ... @@ -257,7 +257,7 @@ let render_morf_cat cats = function
257 257  
258 258 let extract_sel_prefs sel_prefs =
259 259 Xlist.map sel_prefs (function
260   - Predef s -> s
  260 + SynsetName s -> s
261 261 | _ -> failwith "extract_sel_prefs")
262 262  
263 263 let render_schema schema =
... ...