diff --git a/exec/domparser.ml b/exec/domparser.ml deleted file mode 100644 index 64113fd..0000000 --- a/exec/domparser.ml +++ /dev/null @@ -1,170 +0,0 @@ -(* - * ENIAMexec implements ENIAM processing stream - * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> - * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences - * - * This library is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - *) - -open ENIAMsubsyntaxTypes -open Xstd - -let rules = ENIAM_LCGlexicon.make_rules false ENIAM_LCGlexiconTypes.user_lexicon_filename -let dep_rules = ENIAM_LCGlexicon.make_rules true ENIAM_LCGlexiconTypes.user_lexicon_filename - -type output = (*Text | Xml |*) Html | Marsh (*| Graphviz*) - -let output = ref Html -let comm_stdio = ref true -let port = ref 5439 -let lexSemantics_built_in = ref true -let lexSemantics_host = ref "localhost" -let lexSemantics_port = ref 5739 -let verbosity = ref 1 -let img = ref 1 -let timeout = ref 30. -let select_sentence_modes_flag = ref false -let select_sentences_flag = ref true -let semantic_processing_flag = ref true -let output_dir = ref "results/" -let spec_list = [ - "-i", Arg.Unit (fun () -> comm_stdio:=true), "Communication using stdio (default)"; - "-p", Arg.Int (fun p -> comm_stdio:=false; port:=p), "<port> Communication using sockets on given port number"; - (*"-t", Arg.Unit (fun () -> output:=Text), "Output as plain text (default)"; - "-x", Arg.Unit (fun () -> output:=Xml), "Output as XML";*) - "-m", Arg.Unit (fun () -> output:=Marsh), "Output as marshalled Ocaml data structure"; - "-h", Arg.Unit (fun () -> output:=Html), "Output as HTML (default)"; - (*"-g", Arg.Unit (fun () -> output:=Graphviz; sentence_split:=false), "Output as graphviz dot file; turns sentence split off";*) - (* "-r", Arg.String (fun p -> - ENIAMtokenizerTypes.set_resource_path p; - ENIAMmorphologyTypes.set_resource_path p; - ENIAMsubsyntaxTypes.set_resource_path p), "<path> Set resource path"; *) - "-b", Arg.Unit (fun () -> lexSemantics_built_in:=true), "Use built in version of ENIAMlexSemantics (default)"; - "--port", Arg.Int (fun p -> lexSemantics_built_in:=false; lexSemantics_port:=p), "<port> Connect to ENIAMlexSemantics on a given port"; - "--host", Arg.String (fun s -> lexSemantics_built_in:=false; lexSemantics_host:=s), "<hostname> Connect to ENIAMlexSemantics on a given host (by default localhost)"; - "--timeout", Arg.Float (fun x -> timeout:=x), "<seconds> Sets timeout value for parser (default 30 seconds)"; - "-v", Arg.Int (fun v -> verbosity:=v), "<val> Sets verbosity level of parser\n 0 - print only status information\n 1 - print data relevant to the status of a given sentence (default)\n 2 - print all data structures"; - "--img", Arg.Int (fun v -> img:=v), "<val> Selects which images are included in output html page \n 0 - no images included\n 1 - simple dependency trees included (default)\n 2 - dependency trees included"; - "--output", Arg.String (fun s -> output_dir:=s), "<dir> Sets output directory (by default results/)"; - "--sel_modes", Arg.Unit (fun () -> select_sentence_modes_flag:=true), "Select sencence modes"; - "--no_sel_modes", Arg.Unit (fun () -> select_sentence_modes_flag:=false), "Do not select sencence modes (default)"; - "--sel_sent", Arg.Unit (fun () -> select_sentences_flag:=true), "Select parsed sentences (default)"; - "--no_sel_sent", Arg.Unit (fun () -> select_sentences_flag:=false), "Do not select parsed sentences"; - "--sem", Arg.Unit (fun () -> semantic_processing_flag:=true), "Perform semantic processing (default)"; - "--no_sem", Arg.Unit (fun () -> semantic_processing_flag:=false), "Do not perforf semantic processing"; - ] - -let usage_msg = - "Usage: domparser <options>\nInput is a sequence of lines. Empty line ends the sequence and invoke parsing. Double empty line shutdown parser.\nOptions are:" - -let message = "ENIAM_LCGparser, semantic parser for Logical Categorial Grammar formalism\n\ -Copyright (C) 2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>\n\ -Copyright (C) 2017 Institute of Computer Science Polish Academy of Sciences" - -let anon_fun s = raise (Arg.Bad ("invalid argument: " ^ s)) - -let input_text channel = - let s = ref (try input_line channel with End_of_file -> "") in - let lines = ref [] in - while !s <> "" do - lines := !s :: !lines; - s := try input_line channel with End_of_file -> "" - done; - String.concat "\n" (List.rev !lines) - -let rec main_loop sub_in sub_out in_chan out_chan = - let text = input_text in_chan in - if text = "" then () else ( - let text,tokens,lex_sems,msg = - if !lexSemantics_built_in then - let text,tokens,msg = ENIAMsubsyntax.catch_parse_text text in - let lex_sems,msg = - if msg <> "" then ExtArray.make 0 ENIAMlexSemanticsTypes.empty_lex_sem, msg - else ENIAMdomainLexSemantics.catch_assign2 tokens text in - text,tokens,lex_sems,msg else ( - Printf.fprintf sub_out "%s\n\n%!" text; - (Marshal.from_channel sub_in : ENIAMsubsyntaxTypes.text * ENIAMtokenizerTypes.token_env ExtArray.t * ENIAMlexSemanticsTypes.lex_sem ExtArray.t * string)) in - if msg <> "" then - (match !output with - | Html -> Printf.fprintf out_chan "%s\n%!" msg - | Marsh -> Marshal.to_channel out_chan (text,tokens,lex_sems,msg) []; flush out_chan) else ( - let text = ENIAMexec.translate_text text in - let text = ENIAMexec.parse !timeout !verbosity rules dep_rules tokens lex_sems text in - let text = if !select_sentence_modes_flag then ENIAMselectSent.select_sentence_modes_text text else text in - let text = if !select_sentences_flag then ENIAMselectSent.select_sentences_text ENIAMexecTypes.Struct text else text in - let text = if !semantic_processing_flag then ENIAMexec.semantic_processing !verbosity tokens lex_sems text else text in - (match !output with - | Html -> ENIAMvisualization.print_html_text !output_dir "parsed_text" text !img !verbosity tokens - | Marsh -> Marshal.to_channel out_chan (text,tokens,lex_sems,msg) []; flush out_chan)); - prerr_endline "Done!"; - main_loop sub_in sub_out in_chan out_chan) - -let get_sock_addr host_name port = - let he = Unix.gethostbyname host_name in - let addr = he.Unix.h_addr_list in - Unix.ADDR_INET(addr.(0),port) - -let _ = - prerr_endline message; - ENIAMcategoriesPL.initialize (); - ENIAMsemLexicon.initialize (); - Arg.parse spec_list anon_fun usage_msg; - if !lexSemantics_built_in then ENIAMlexSemantics.initialize (); - Gc.compact (); - let sub_in,sub_out = - if !lexSemantics_built_in then stdin,stdout - else Unix.open_connection (get_sock_addr !lexSemantics_host !lexSemantics_port) in - prerr_endline "Ready!"; - if !comm_stdio then main_loop sub_in sub_out stdin stdout - else - let sockaddr = Unix.ADDR_INET(Unix.inet_addr_any,!port) in - Unix.establish_server (main_loop sub_in sub_out) sockaddr - -let examples = [ - (* "Szpak","Szpak śpiewa."; *) - (* "miał","Miałem miał."; *) -(* "Ala","Ala ma kota."; - "Ale","Ale mają kota:"; *) - (* "zima","Szpak frunie zimą.";*) - (* "październik","Kot miauczy w październiku."; *) -(* "Szpak-Kot","Szpak frunie. Kot miauczy."; - "powiedział","Szpak powiedział: „Frunę. Kiszę.”";*) - (* "teraz","Teraz frunie jakiś szpak."; - "chłopcy","Chłopcy mają ulicę kwiatami."; *) - (* "arabia","Arabia Saudyjska biegnie.";*) -(* "Tom","Tom idzie."; *) - (* "liceum","W 1984-89 uczęszczał do VII Liceum Ogólnokształcącego im. K.K. Baczyńskiego w Szczecinie."; - "studia","Następnie studiował architekturę na Politechnice Szczecińskiej, dyplom uzyskał w 1994."; *) - (* "przez_nią","Frunę przez nią."; *) - (* "o_nie","Witold frasuje się o nie."; *) - (* "or1","- Frunę."; *) - (* "or2","- Frunę - powiedział szpak."; *) - (*"or3","- Frunę! - powiedział szpak.";*) -] -(* -let _ = - ENIAMsubsyntax.initialize (); - ENIAMcategoriesPL.initialize (); - ENIAMwalParser.initialize (); - ENIAMwalReduce.initialize (); - Xlist.iter examples (fun (name,example) -> - let text,tokens,msg = ENIAMsubsyntax.catch_parse_text example in - if msg <> "" then print_endline msg else ( - let lex_sems = ENIAMlexSemantics.assign tokens text in - let text = ENIAMexec.translate_text text in - let text = ENIAMexec.parse 30. !verbosity rules tokens lex_sems text in - (* let text = ENIAMselectSent.select_sentence_modes_text text in *) - let text = ENIAMselectSent.select_sentences_text ENIAMexecTypes.Struct text in - ENIAMvisualization.print_html_text "results/" "parsed_text" text !img !verbosity tokens)) - *) diff --git a/exec/makefile b/exec/makefile index 32ba252..1ccbcf0 100755 --- a/exec/makefile +++ b/exec/makefile @@ -6,7 +6,7 @@ OCAMLFLAGS=$(INCLUDES) -g OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa \ eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-integration.cmxa \ eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa \ - eniam-lexSemantics.cmxa eniam-semantics.cmxa #eniam-exec.cmxa + eniam-lexSemantics.cmxa eniam-semantics.cmxa eniam-exec.cmxa INSTALLDIR=`ocamlc -where`/eniam SOURCES= ENIAMexecTypes.ml ENIAMexec.ml ENIAMselectSent.ml ENIAMvisualization.ml @@ -31,13 +31,10 @@ eniam-exec.cma: $(SOURCES) eniam-exec.cmxa: $(SOURCES) ocamlopt -linkall -a -o eniam-exec.cmxa $(INCLUDES) $^ -parser: $(SOURCES) parser.ml +parser: parser.ml $(OCAMLOPT) -o parser $(OCAMLOPTFLAGS) $^ -domparser: $(SOURCES) domparser.ml - $(OCAMLOPT) -o domparser $(OCAMLOPTFLAGS) eniam-domainLexSemantics.cmxa $^ - -semparser: $(SOURCES) semparser.ml +semparser: semparser.ml mkdir -p results $(OCAMLOPT) -o semparser $(OCAMLOPTFLAGS) $^ diff --git a/lexSemantics/ENIAMwalRenderer.ml b/lexSemantics/ENIAMwalRenderer.ml index efaad61..a4dfa5d 100644 --- a/lexSemantics/ENIAMwalRenderer.ml +++ b/lexSemantics/ENIAMwalRenderer.ml @@ -257,7 +257,7 @@ let render_morf_cat cats = function let extract_sel_prefs sel_prefs = Xlist.map sel_prefs (function - Predef s -> s + SynsetName s -> s | _ -> failwith "extract_sel_prefs") let render_schema schema =