Commit cc1f25eb75999621d427e059e1614a05cdbf1e7f

Authored by Wojciech Jaworski
1 parent 4102403e

Wstępna wersja biblioteki eniam-exec

LCGlexicon/interface.ml deleted
1   -(*
2   - * ENIAM_LCGlexicon is a library that provides LCG lexicon form Polish
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
5   - *
6   - * This library is free software: you can redistribute it and/or modify
7   - * it under the terms of the GNU Lesser General Public License as published by
8   - * the Free Software Foundation, either version 3 of the License, or
9   - * (at your option) any later version.
10   - *
11   - * This library is distributed in the hope that it will be useful,
12   - * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   - * GNU Lesser General Public License for more details.
15   - *
16   - * You should have received a copy of the GNU Lesser General Public License
17   - * along with this program. If not, see <http://www.gnu.org/licenses/>.
18   - *)
19   -
20   -open ENIAM_LCGlexiconTypes
21   -open ENIAM_LCGtypes
22   -open ENIAMsubsyntaxTypes
23   -
24   -let rules = ENIAM_LCGlexicon.make_rules ENIAM_LCGlexiconTypes.rules_filename
25   -
26   -let examples = [
27   - (* "Szpak","Szpak śpiewa.";*)
28   - (* "miał","Miałem miał."; *)
29   -(* "Ala","Ala ma kota.";
30   - "Ale","Ale mają kota:"; *)
31   - (* "zima","Szpak frunie zimą.";*)
32   - (* "październik","Kot miauczy w październiku."; *)
33   -(* "Szpak-Kot","Szpak frunie. Kot miauczy.";
34   - "powiedział","Szpak powiedział: „Frunę. Kiszę.”";*)
35   - (* "teraz","Teraz frunie jakiś szpak.";
36   - "chłopcy","Chłopcy mają ulicę kwiatami."; *)
37   - (* "arabia","Arabia Saudyjska biegnie.";*)
38   -(* "Tom","Tom idzie."; *)
39   - "liceum","W 1984-89 uczęszczał do VII Liceum Ogólnokształcącego im. K.K. Baczyńskiego w Szczecinie.";
40   - "studia","Następnie studiował architekturę na Politechnice Szczecińskiej, dyplom uzyskał w 1994.";
41   -]
42   -
43   -let clarify_categories senses token =
44   - match token.ENIAMtokenizerTypes.token with
45   - ENIAMtokenizerTypes.Lemma(lemma,pos,interp) -> List.flatten (Xlist.map interp (fun interp -> ENIAMcategoriesPL.clarify_categories false senses (lemma,pos,interp)))
46   - | ENIAMtokenizerTypes.Proper(lemma,pos,interp,_) -> List.flatten (Xlist.map interp (fun interp -> ENIAMcategoriesPL.clarify_categories true senses (lemma,pos,interp)))
47   - | ENIAMtokenizerTypes.Interp lemma -> ENIAMcategoriesPL.clarify_categories false senses (lemma,"interp",[])
48   - | _ -> []
49   -
50   -let create_chart tokens lex_sems paths last =
51   - ENIAM_LCGrenderer.reset_variable_numbers ();
52   - let chart = ENIAM_LCGchart.make last in
53   - let chart = Xlist.fold paths chart (fun chart (id,lnode,rnode) ->
54   - let t = ExtArray.get tokens id in
55   - let s = ExtArray.get lex_sems id in
56   - ENIAM_LCGrenderer.reset_variable_names ();
57   - ENIAM_LCGrenderer.add_variable_numbers ();
58   - let cats = clarify_categories ["X"] t in
59   - let l = ENIAM_LCGlexicon.create_entries rules id t.ENIAMtokenizerTypes.orth cats s.ENIAMlexSemanticsTypes.schemata in
60   - ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in
61   - chart
62   -
63   -let test_example name tokens lex_sems paths last =
64   - ENIAM_LCGreductions.reset_variant_label ();
65   - let chart = create_chart tokens lex_sems paths last in
66   - ENIAM_LCGlatexOf.print_chart "results/" (name^"1_chart") "a1" chart;
67   - let chart,references = ENIAM_LCGchart.lazify chart in
68   - ENIAM_LCGlatexOf.print_chart "results/" (name^"2_chart") "a4" chart;
69   - ENIAM_LCGlatexOf.print_references "results/" (name^"2_references") "a4" references;
70   - let chart = ENIAM_LCGchart.parse chart references 30. Sys.time in (* uwaga: niejawna zmiana imperatywna w references *)
71   - ENIAM_LCGlatexOf.print_chart "results/" (name^"3_chart") "a4" chart;
72   - ENIAM_LCGlatexOf.print_references "results/" (name^"3_references") "a4" references;
73   - if ENIAM_LCGchart.is_parsed chart then (
74   - let term = ENIAM_LCGchart.get_parsed_term chart in
75   - Xlatex.latex_file_out "results/" (name^"4_term") "a4" false (fun file ->
76   - Printf.fprintf file "\\[%s\\]\n" (ENIAM_LCGlatexOf.linear_term 0 term));
77   - Xlatex.latex_compile_and_clean "results/" (name^"4_term");
78   - let dependency_tree = ENIAM_LCGreductions.reduce term references in
79   - ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"4_dependency_tree") "a0" dependency_tree;
80   - if ENIAM_LCGreductions.is_reduced_dependency_tree dependency_tree then (
81   - ENIAM_LCGreductions.assign_labels dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
82   - ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"5_dependency_tree") "a4" dependency_tree;
83   - ENIAM_LCGreductions.remove_cuts dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
84   - ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"6_dependency_tree") "a4" dependency_tree;
85   - ENIAM_LCGgraphOf.print_dependency_tree "results/" (name^"6_dependency_tree") dependency_tree;
86   - ENIAM_LCGgraphOf.print_simplified_dependency_tree "results/" (name^"6_simple_dependency_tree") dependency_tree;
87   - ())
88   - else print_endline "not reduced")
89   - else print_endline "not parsed"
90   -
91   -let rec parse_sentence name id tokens lex_sems = function
92   - RawSentence s -> id
93   - | StructSentence(paths,last) ->
94   - test_example (name ^ string_of_int id ^ "_") tokens lex_sems paths last;
95   - id + 1
96   - | DepSentence(paths) -> id
97   - | QuotedSentences sentences ->
98   - Xlist.fold sentences id (fun id p ->
99   - parse_sentence name id tokens lex_sems p.sentence)
100   - | AltSentence l ->
101   - Xlist.fold l id (fun id (mode,sentence) ->
102   - parse_sentence name id tokens lex_sems sentence)
103   -
104   -let rec parse_paragraph name id tokens lex_sems = function
105   - RawParagraph s -> id
106   - | StructParagraph sentences ->
107   - Xlist.fold sentences id (fun id p ->
108   - parse_sentence name id tokens lex_sems p.sentence)
109   - | AltParagraph l ->
110   - Xlist.fold l id (fun id (mode,paragraph) ->
111   - parse_paragraph name id tokens lex_sems paragraph)
112   -
113   -let rec parse_text name id tokens lex_sems = function
114   - RawText s -> id
115   - | StructText paragraphs ->
116   - Xlist.fold paragraphs id (fun id paragraph ->
117   - parse_paragraph name id tokens lex_sems paragraph)
118   - | AltText l ->
119   - Xlist.fold l id (fun id (mode,text) ->
120   - parse_text name id tokens lex_sems text)
121   -
122   -
123   -let _ =
124   - ENIAMsubsyntax.initialize ();
125   - ENIAMcategoriesPL.initialize ();
126   - Xlist.iter examples (fun (name,example) ->
127   - let text,tokens = ENIAMsubsyntax.parse_text example in
128   - let lex_sems = ENIAMlexSemantics.assign tokens text in
129   - ignore(parse_text name 1 tokens lex_sems text))
130   -
131   -(*
132   -type output = Text | Xml | Html | Marsh | Graphviz
133   -
134   -let output = ref Text
135   -let comm_stdio = ref true
136   -let sentence_split = ref true
137   -let port = ref 0
138   -
139   -let spec_list = [
140   - "-s", Arg.Unit (fun () -> sentence_split:=true), "Split input into sentences (default)";
141   - "-n", Arg.Unit (fun () -> sentence_split:=false), "Do not split input into sentences";
142   - "-i", Arg.Unit (fun () -> comm_stdio:=true), "Communication using stdio (default)";
143   - "-p", Arg.Int (fun p -> comm_stdio:=false; port:=p), "<port> Communication using sockets on given port number";
144   - "-t", Arg.Unit (fun () -> output:=Text), "Output as plain text (default)";
145   - "-x", Arg.Unit (fun () -> output:=Xml), "Output as XML";
146   - "-m", Arg.Unit (fun () -> output:=Marsh), "Output as marshalled Ocaml data structure";
147   - "-h", Arg.Unit (fun () -> output:=Html), "Output as HTML";
148   - "-g", Arg.Unit (fun () -> output:=Graphviz; sentence_split:=false), "Output as graphviz dot file; turns sentence split off";
149   - (* "-r", Arg.String (fun p ->
150   - ENIAMtokenizerTypes.set_resource_path p;
151   - ENIAMmorphologyTypes.set_resource_path p;
152   - ENIAMsubsyntaxTypes.set_resource_path p), "<path> Set resource path"; *)
153   - ]
154   -
155   -let usage_msg =
156   - "Usage: subsyntax <options>\nInput is a sequence of lines. Empty line ends the sequence and invoke parsing. Double empty line shutdown parser.\nOptions are:"
157   -
158   -let message = "ENIAMsubsyntax: MWE, abbreviation and sentence detecion for Polish\n\
159   -Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>\n\
160   -Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences"
161   -
162   -let anon_fun s = raise (Arg.Bad ("invalid argument: " ^ s))
163   -
164   -let input_text channel =
165   - let s = ref (try input_line channel with End_of_file -> "") in
166   - let lines = ref [] in
167   - while !s <> "" do
168   - lines := !s :: !lines;
169   - s := try input_line channel with End_of_file -> ""
170   - done;
171   - String.concat "\n" (List.rev !lines)
172   -
173   -let rec main_loop in_chan out_chan =
174   - let text = input_text in_chan in
175   - if text = "" then () else (
176   - (* print_endline "input text begin";
177   - print_endline text;
178   - print_endline "input text end"; *)
179   - (if !sentence_split then
180   - let text,tokens = ENIAMsubsyntax.parse_text text in
181   - (match !output with
182   - Text -> output_string out_chan (ENIAMsubsyntaxStringOf.text "" tokens text ^ "\n" ^ ENIAMsubsyntaxStringOf.token_extarray tokens ^ "\n\n")
183   - | Xml -> output_string out_chan (Xml.to_string (ENIAMsubsyntaxXMLof.text_and_tokens text tokens) ^ "\n\n")
184   - | Html -> output_string out_chan (ENIAMsubsyntaxHTMLof.text_and_tokens text tokens ^ "\n\n")
185   - | Marsh -> Marshal.to_channel out_chan (text,tokens) []
186   - | Graphviz -> failwith "main_loop: ni")
187   - else
188   - let tokens = ENIAMsubsyntax.parse text in
189   - (match !output with
190   - Text -> output_string out_chan (ENIAMsubsyntaxStringOf.token_list tokens ^ "\n\n")
191   - | Xml -> output_string out_chan (Xml.to_string (ENIAMsubsyntaxXMLof.token_list tokens) ^ "\n\n")
192   - | Html -> output_string out_chan (ENIAMsubsyntaxHTMLof.token_list tokens ^ "\n\n")
193   - | Marsh -> Marshal.to_channel out_chan tokens []
194   - | Graphviz -> output_string out_chan (ENIAMsubsyntaxGraphOf.token_list tokens ^ "\n\n")));
195   - flush out_chan;
196   - main_loop in_chan out_chan)
197   -
198   -let _ =
199   - prerr_endline message;
200   - Arg.parse spec_list anon_fun usage_msg;
201   - Gc.compact ();
202   - prerr_endline "Ready!";
203   - if !comm_stdio then main_loop stdin stdout
204   - else
205   - let sockaddr = Unix.ADDR_INET(Unix.inet_addr_any,!port) in
206   - Unix.establish_server main_loop sockaddr
207   -*)
LCGlexicon/makefile
... ... @@ -4,8 +4,6 @@ OCAMLDEP=ocamldep
4 4 INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam
5 5 OCAMLFLAGS=$(INCLUDES) -g
6 6 OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa
7   -OCAMLOPTFLAGS2=$(OCAMLOPTFLAGS) eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa
8   -OCAMLOPTFLAGS3=$(OCAMLOPTFLAGS2) eniam-lexSemantics.cmxa
9 7 INSTALLDIR=`ocamlc -where`/eniam
10 8  
11 9 SOURCES= ENIAM_LCGlexiconTypes.ml ENIAMcategoriesPL.ml ENIAM_LCGlexiconParser.ml ENIAM_LCGlexicon.ml
... ... @@ -39,17 +37,6 @@ test: test.ml
39 37 mkdir -p results
40 38 $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml
41 39  
42   -test2: test2.ml
43   - mkdir -p results
44   - $(OCAMLOPT) -o test2 $(OCAMLOPTFLAGS3) test2.ml
45   -
46   -interface: interface.ml
47   - $(OCAMLOPT) -o parser $(OCAMLOPTFLAGS3) interface.ml
48   -
49   -semparser: semparser.ml
50   - mkdir -p results
51   - $(OCAMLOPT) -o semparser $(OCAMLOPTFLAGS2) semparser.ml
52   -
53 40 print_lexicon: ENIAM_LCGlexiconLatexOf.ml
54 41 mkdir -p results
55 42 $(OCAMLOPT) -o print_lexicon $(OCAMLOPTFLAGS) ENIAM_LCGlexiconLatexOf.ml
... ... @@ -75,4 +62,4 @@ print_lexicon: ENIAM_LCGlexiconLatexOf.ml
75 62 $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $<
76 63  
77 64 clean:
78   - rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a test test2 parser print_lexicon
  65 + rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a test print_lexicon
... ...
LCGlexicon/semparser.ml deleted
1   -open Xstd
2   -open ENIAMsubsyntaxTypes
3   -
4   -let rules = ENIAM_LCGlexicon.make_rules false ENIAM_LCGlexiconTypes.user_lexicon_filename
5   -
6   -let load_senses_map filename =
7   - File.fold_tab filename StringMap.empty (fun map -> function
8   - [lemma;cat] -> StringMap.add_inc map lemma [cat] (fun l -> cat :: l)
9   - | l -> failwith ("load_senses_map: " ^ String.concat "\t" l))
10   -
11   -let senses_map = load_senses_map ENIAM_LCGlexiconTypes.user_senses_filename
12   -
13   -
14   -let examples = [
15   - (* "liceum","W 1984-89 uczęszczał do VII Liceum Ogólnokształcącego im. K.K. Baczyńskiego w Szczecinie."; *)
16   - "studia","Następnie studiował architekturę na Politechnice Szczecińskiej, dyplom uzyskał w 1994.";
17   -]
18   -
19   -let clarify_categories token =
20   - match token.ENIAMtokenizerTypes.token with
21   - ENIAMtokenizerTypes.Lemma(lemma,pos,interp) ->
22   - let senses = try StringMap.find senses_map lemma with Not_found -> ["X"] in
23   - List.flatten (Xlist.map interp (fun interp -> ENIAMcategoriesPL.clarify_categories false senses (lemma,pos,interp)))
24   - | ENIAMtokenizerTypes.Proper(lemma,pos,interp,senses) -> List.flatten (Xlist.map interp (fun interp -> ENIAMcategoriesPL.clarify_categories true senses (lemma,pos,interp)))
25   - | ENIAMtokenizerTypes.Interp lemma -> ENIAMcategoriesPL.clarify_categories false ["X"] (lemma,"interp",[])
26   - | _ -> []
27   -
28   -let create_chart tokens paths last =
29   - ENIAM_LCGrenderer.reset_variable_numbers ();
30   - let chart = ENIAM_LCGchart.make last in
31   - let chart = Xlist.fold paths chart (fun chart (id,lnode,rnode) ->
32   - let t = ExtArray.get tokens id in
33   - ENIAM_LCGrenderer.reset_variable_names ();
34   - ENIAM_LCGrenderer.add_variable_numbers ();
35   - let cats = clarify_categories t in
36   - let l = ENIAM_LCGlexicon.create_entries rules id t.ENIAMtokenizerTypes.orth cats [] in
37   - ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in
38   - chart
39   -
40   -let test_example name tokens paths last =
41   - ENIAM_LCGreductions.reset_variant_label ();
42   - let chart = create_chart tokens paths last in
43   - ENIAM_LCGlatexOf.print_chart "results/" (name^"1_chart") "a1" chart;
44   - let chart,references = ENIAM_LCGchart.lazify chart in
45   - ENIAM_LCGlatexOf.print_chart "results/" (name^"2_chart") "a4" chart;
46   - ENIAM_LCGlatexOf.print_references "results/" (name^"2_references") "a4" references;
47   - let chart = ENIAM_LCGchart.parse chart references 30. Sys.time in (* uwaga: niejawna zmiana imperatywna w references *)
48   - ENIAM_LCGlatexOf.print_chart "results/" (name^"3_chart") "a4" chart;
49   - ENIAM_LCGlatexOf.print_references "results/" (name^"3_references") "a4" references;
50   - if ENIAM_LCGchart.is_parsed chart then (
51   - let term = ENIAM_LCGchart.get_parsed_term chart in
52   - Xlatex.latex_file_out "results/" (name^"4_term") "a4" false (fun file ->
53   - Printf.fprintf file "\\[%s\\]\n" (ENIAM_LCGlatexOf.linear_term 0 term));
54   - Xlatex.latex_compile_and_clean "results/" (name^"4_term");
55   - let dependency_tree = ENIAM_LCGreductions.reduce term references in
56   - ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"4_dependency_tree") "a0" dependency_tree;
57   - if ENIAM_LCGreductions.is_reduced_dependency_tree dependency_tree then (
58   - ENIAM_LCGreductions.assign_labels dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
59   - ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"5_dependency_tree") "a4" dependency_tree;
60   - ENIAM_LCGreductions.remove_cuts dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
61   - ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"6_dependency_tree") "a4" dependency_tree;
62   - ENIAM_LCGgraphOf.print_dependency_tree "results/" (name^"6_dependency_tree") dependency_tree;
63   - ENIAM_LCGgraphOf.print_simplified_dependency_tree "results/" (name^"6_simple_dependency_tree") dependency_tree;
64   - ())
65   - else print_endline "not reduced")
66   - else print_endline "not parsed"
67   -
68   -let rec parse_sentence name id tokens = function
69   - RawSentence s -> id
70   - | StructSentence(paths,last) ->
71   - test_example (name ^ string_of_int id ^ "_") tokens paths last;
72   - id + 1
73   - | DepSentence(paths) -> id
74   - | QuotedSentences sentences ->
75   - Xlist.fold sentences id (fun id p ->
76   - parse_sentence name id tokens p.sentence)
77   - | AltSentence l ->
78   - Xlist.fold l id (fun id (mode,sentence) ->
79   - parse_sentence name id tokens sentence)
80   -
81   -let rec parse_paragraph name id tokens = function
82   - RawParagraph s -> id
83   - | StructParagraph sentences ->
84   - Xlist.fold sentences id (fun id p ->
85   - parse_sentence name id tokens p.sentence)
86   - | AltParagraph l ->
87   - Xlist.fold l id (fun id (mode,paragraph) ->
88   - parse_paragraph name id tokens paragraph)
89   -
90   -let rec parse_text name id tokens = function
91   - RawText s -> id
92   - | StructText paragraphs ->
93   - Xlist.fold paragraphs id (fun id paragraph ->
94   - parse_paragraph name id tokens paragraph)
95   - | AltText l ->
96   - Xlist.fold l id (fun id (mode,text) ->
97   - parse_text name id tokens text)
98   -
99   -
100   -(* let _ =
101   - Xlist.iter examples (fun (name,example) ->
102   - let text,tokens = ENIAMsubsyntax.parse_text example in
103   - ignore(parse_text name 1 tokens text)) *)
104   -
105   -(*
106   -type entry = {title: string; info:string; biogram:string; (*primary:string; secondary:string;*) author:string}
107   -
108   -let process_xml = function
109   - Xml.Element("entries",[],entries) ->
110   - List.rev (Xlist.rev_map entries (function
111   - Xml.Element("entry",[],[title;info;biogram(*;primary;secondary*);author]) ->
112   - {title=Xml.to_string title; info=Xml.to_string info; biogram=Xml.to_string biogram;
113   - (*primary=Xml.to_string primary; secondary=Xml.to_string secondary;*) author=Xml.to_string author}
114   - | _ -> failwith "process_xml 1"))
115   - | _ -> failwith "process_xml 2"
116   -
117   -
118   -let load_ppibl filename =
119   - let ppibl = File.load_file_gen ("data/" ^ filename) in
120   - process_xml (Xml.parse_string ppibl)
121   -
122   -let named_entities =
123   - File.fold_tab "data/ne.tab" StringMap.empty (fun map -> function
124   - [lemma;cat] -> StringMap.add_inc map lemma [cat] (fun l -> cat :: l)
125   - | _ -> failwith "named_entities")
126   -
127   -let assign_named_entities t =
128   - match t.token with
129   - Lemma(lemma,"subst",interp) ->
130   - (try
131   - let cat = StringMap.find named_entities lemma in
132   - {t with token=Proper(lemma,"subst",interp,cat)}
133   - with Not_found -> t)
134   - | Proper(lemma,"subst",interp,_) ->
135   - (try
136   - let cat = StringMap.find named_entities lemma in
137   - {t with token=Proper(lemma,"subst",interp,cat)}
138   - with Not_found -> t)
139   - | _ -> t
140   -
141   -let test_strings = [
142   - (* "Debiutował opowiadaniem pt. <i>Zlecenie na dostawę</i>."; *)
143   - "W 1984-89 uczęszczał do VII Liceum Ogólnokształcącego im. K.K. Baczyńskiego w Szczecinie.";
144   - (* "Następnie studiował architekturę na Politechnice Szczecińskiej, dyplom uzyskał w 1994." *)
145   - (* "W 2003 obronił doktorat nauk technicznych w zakresie architektury i urbanistyki na Politechnice Krakowskiej i został adiunktem w Zakładzie Teorii Architektury, Historii i Konserwacji Zabytków IAiPP." *)
146   -]
147   -
148   -(* let _ =
149   - let entries = load_ppibl "ak322269.xml" in
150   - Xlist.iter entries (fun entry -> print_endline entry.biogram) *)
151   -
152   -(*
153   -let test_strings = [
154   - "Szpak frunie.";
155   - "Kot np. miauczy.";
156   - "Ala ma kota.";
157   - "Ale mają kota:"
158   - ]
159   -
160   -let test_strings2 = [
161   - "Szpak frunie. Kot miauczy.";
162   - "Szpak powiedział: „Frunę. Kiszę.”";
163   - ]
164   -*)
165   -
166   -let grammar = [
167   - "pos=year", Basic "year",symbol_weight;
168   - "pos=year-interval", Basic "year-interval",symbol_weight;
169   - "lemma=w,pos=prep,case=loc", Basic "time/(year+year-interval)",0.;
170   - "lemma=w,pos=prep,case=loc", Basic "locat/np*MIASTO*T*loc*T",0.;
171   -
172   - "lemma=uczęszczać,pos=praet|fin,person=ter,negation=aff,mood=indicative", Basic "ip*number*gender{|(1+time),|(1+pp*ORGANIZACJA*do*gen),|(1+locat)}",0.;
173   - "lemma=do,pos=prep,case=gen", Basic "pp*sense*lemma*case/np*sense*T*case*T",0.;
174   -
175   -]
176   -
177   -let _ =
178   - print_endline "Testy wbudowane";
179   - Xlist.iter test_strings (fun s ->
180   - print_endline ("\nTEST: " ^ s);
181   - let paths = ENIAMsubsyntax.parse s in
182   - let paths = Xlist.map paths assign_named_entities in
183   - (* print_endline (ENIAMtokenizer.xml_of tokens); *)
184   - print_endline (ENIAMpaths.to_string (paths,0)));
185   -(* Xlist.iter test_strings2 (fun s ->
186   - print_endline ("\nTEST: " ^ s);
187   - let text,tokens = ENIAMsubsyntax.parse_text s in
188   - (* print_endline (ENIAMtokenizer.xml_of tokens); *)
189   - print_endline (ENIAMsubsyntaxStringOf.tokens tokens);
190   - print_endline "";
191   - print_endline (ENIAMsubsyntaxStringOf.text "" tokens text));*)
192   -(* print_endline "Testy użytkownika.";
193   - print_endline "Wpisz tekst i naciśnij ENTER, pusty tekst kończy.";
194   - let s = ref (read_line ()) in
195   - while !s <> "" do
196   - let tokens = ENIAMtokenizer.parse !s in
197   - (* print_endline (ENIAMtokenizer.xml_of tokens); *)
198   - Xlist.iter tokens (fun token -> print_endline (ENIAMtokenizer.string_of 0 token));
199   - print_endline "Wpisz tekst i naciśnij ENTER, pusty tekst kończy.";
200   - s := read_line ()
201   - done;*)
202   - ()
203   -
204   -open ENIAM_LCGlexiconTypes
205   -open ENIAM_LCGtypes
206   -
207   -
208   -(*
209   -type output = Text | Xml | Html | Marsh | Graphviz
210   -
211   -let output = ref Text
212   -let comm_stdio = ref true
213   -let sentence_split = ref true
214   -let port = ref 0
215   -
216   -let spec_list = [
217   - "-s", Arg.Unit (fun () -> sentence_split:=true), "Split input into sentences (default)";
218   - "-n", Arg.Unit (fun () -> sentence_split:=false), "Do not split input into sentences";
219   - "-i", Arg.Unit (fun () -> comm_stdio:=true), "Communication using stdio (default)";
220   - "-p", Arg.Int (fun p -> comm_stdio:=false; port:=p), "<port> Communication using sockets on given port number";
221   - "-t", Arg.Unit (fun () -> output:=Text), "Output as plain text (default)";
222   - "-x", Arg.Unit (fun () -> output:=Xml), "Output as XML";
223   - "-m", Arg.Unit (fun () -> output:=Marsh), "Output as marshalled Ocaml data structure";
224   - "-h", Arg.Unit (fun () -> output:=Html), "Output as HTML";
225   - "-g", Arg.Unit (fun () -> output:=Graphviz; sentence_split:=false), "Output as graphviz dot file; turns sentence split off";
226   - (* "-r", Arg.String (fun p ->
227   - ENIAMtokenizerTypes.set_resource_path p;
228   - ENIAMmorphologyTypes.set_resource_path p;
229   - ENIAMsubsyntaxTypes.set_resource_path p), "<path> Set resource path"; *)
230   - ]
231   -
232   -let usage_msg =
233   - "Usage: subsyntax <options>\nInput is a sequence of lines. Empty line ends the sequence and invoke parsing. Double empty line shutdown parser.\nOptions are:"
234   -*)*)
235   -let message = "ENIAM_LCGparser, a parser for Logical Categorial Grammar formalism\n\
236   -Copyright (C) 2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>\n\
237   -Copyright (C) 2017 Institute of Computer Science Polish Academy of Sciences"
238   -(*
239   -let anon_fun s = raise (Arg.Bad ("invalid argument: " ^ s))
240   -*)
241   -let input_text channel =
242   - let s = ref (try input_line channel with End_of_file -> "") in
243   - let lines = ref [] in
244   - while !s <> "" do
245   - lines := !s :: !lines;
246   - s := try input_line channel with End_of_file -> ""
247   - done;
248   - String.concat "\n" (List.rev !lines)
249   -
250   -let rec main_loop sub_in sub_out in_chan out_chan =
251   - let text = input_text in_chan in
252   - if text = "" then () else (
253   - Printf.fprintf sub_out "%s\n\n%!" text;
254   - let text,tokens = (Marshal.from_channel sub_in : ENIAMsubsyntaxTypes.text * ENIAMtokenizerTypes.token_env ExtArray.t) in
255   - (* let text,tokens = ENIAMsubsyntax.parse_text text in *)
256   - ignore(parse_text "E"(*name*) 1 tokens text)
257   - (* print_endline "input text begin";
258   - print_endline text;
259   - print_endline "input text end"; *)
260   - (*if !sentence_split then
261   - let text,tokens = ENIAMsubsyntax.parse_text text in
262   - (match !output with
263   - Text -> output_string out_chan (ENIAMsubsyntaxStringOf.text "" tokens text ^ "\n" ^ ENIAMsubsyntaxStringOf.token_extarray tokens ^ "\n\n")
264   - | Xml -> output_string out_chan (Xml.to_string (ENIAMsubsyntaxXMLof.text_and_tokens text tokens) ^ "\n\n")
265   - | Html -> output_string out_chan (ENIAMsubsyntaxHTMLof.text_and_tokens text tokens ^ "\n\n")
266   - | Marsh -> Marshal.to_channel out_chan (text,tokens) []
267   - | Graphviz -> failwith "main_loop: ni")
268   - else
269   - let tokens = ENIAMsubsyntax.parse text in
270   - (match !output with
271   - Text -> output_string out_chan (ENIAMsubsyntaxStringOf.token_list tokens ^ "\n\n")
272   - | Xml -> output_string out_chan (Xml.to_string (ENIAMsubsyntaxXMLof.token_list tokens) ^ "\n\n")
273   - | Html -> output_string out_chan (ENIAMsubsyntaxHTMLof.token_list tokens ^ "\n\n")
274   - | Marsh -> Marshal.to_channel out_chan tokens []
275   - | Graphviz -> output_string out_chan (ENIAMsubsyntaxGraphOf.token_list tokens ^ "\n\n"))*);
276   - flush out_chan;
277   - main_loop sub_in sub_out in_chan out_chan)
278   -
279   -let get_sock_addr host_name port =
280   - let he = Unix.gethostbyname host_name in
281   - let addr = he.Unix.h_addr_list in
282   - Unix.ADDR_INET(addr.(0),port)
283   -
284   -let sub_host = "localhost"
285   -let sub_port = 5739
286   -
287   -let _ =
288   - prerr_endline message;
289   - (* ENIAMsubsyntax.initialize (); *)
290   - ENIAMcategoriesPL.initialize ();
291   - (* Arg.parse spec_list anon_fun usage_msg; *)
292   - Gc.compact ();
293   - let sub_in,sub_out = Unix.open_connection (get_sock_addr sub_host sub_port) in
294   - prerr_endline "Ready!";
295   - (*if !comm_stdio then*) main_loop sub_in sub_out stdin stdout
296   - (*else
297   - let sockaddr = Unix.ADDR_INET(Unix.inet_addr_any,!port) in
298   - Unix.establish_server main_loop sockaddr*)
LCGlexicon/test2.ml deleted
1   -(*
2   - * ENIAM_LCGlexicon is a library that provides LCG lexicon form Polish
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
5   - *
6   - * This library is free software: you can redistribute it and/or modify
7   - * it under the terms of the GNU Lesser General Public License as published by
8   - * the Free Software Foundation, either version 3 of the License, or
9   - * (at your option) any later version.
10   - *
11   - * This library is distributed in the hope that it will be useful,
12   - * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   - * GNU Lesser General Public License for more details.
15   - *
16   - * You should have received a copy of the GNU Lesser General Public License
17   - * along with this program. If not, see <http://www.gnu.org/licenses/>.
18   - *)
19   -
20   -open ENIAM_LCGlexiconTypes
21   -open ENIAM_LCGtypes
22   -open ENIAMsubsyntaxTypes
23   -
24   -let rules = ENIAM_LCGlexicon.make_rules false ENIAM_LCGlexiconTypes.rules_filename
25   -
26   -let examples = [
27   - (* "Szpak","Szpak śpiewa.";*)
28   - (* "miał","Miałem miał."; *)
29   -(* "Ala","Ala ma kota.";
30   - "Ale","Ale mają kota:"; *)
31   - (* "zima","Szpak frunie zimą.";*)
32   - (* "październik","Kot miauczy w październiku."; *)
33   -(* "Szpak-Kot","Szpak frunie. Kot miauczy.";
34   - "powiedział","Szpak powiedział: „Frunę. Kiszę.”";*)
35   - "teraz","Teraz frunie jakiś szpak.";
36   - "chłopcy","Chłopcy mają ulicę kwiatami.";
37   - (* "arabia","Arabia Saudyjska biegnie.";*)
38   -(* "Tom","Tom idzie."; *)
39   -]
40   -
41   -let clarify_categories senses token =
42   - match token.ENIAMtokenizerTypes.token with
43   - ENIAMtokenizerTypes.Lemma(lemma,pos,interp) -> List.flatten (Xlist.map interp (fun interp -> ENIAMcategoriesPL.clarify_categories false senses (lemma,pos,interp)))
44   - | ENIAMtokenizerTypes.Proper(lemma,pos,interp,_) -> List.flatten (Xlist.map interp (fun interp -> ENIAMcategoriesPL.clarify_categories true senses (lemma,pos,interp)))
45   - | ENIAMtokenizerTypes.Interp lemma -> ENIAMcategoriesPL.clarify_categories false senses (lemma,"interp",[])
46   - | _ -> []
47   -
48   -let create_chart tokens lex_sems paths last =
49   - ENIAM_LCGrenderer.reset_variable_numbers ();
50   - let chart = ENIAM_LCGchart.make last in
51   - let chart = Xlist.fold paths chart (fun chart (id,lnode,rnode) ->
52   - let t = ExtArray.get tokens id in
53   - let s = ExtArray.get lex_sems id in
54   - ENIAM_LCGrenderer.reset_variable_names ();
55   - ENIAM_LCGrenderer.add_variable_numbers ();
56   - let cats = clarify_categories ["X"] t in
57   - let l = ENIAM_LCGlexicon.create_entries rules id t.ENIAMtokenizerTypes.orth cats s.ENIAMlexSemanticsTypes.schemata in
58   - ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in
59   - chart
60   -
61   -let test_example name tokens lex_sems paths last =
62   - ENIAM_LCGreductions.reset_variant_label ();
63   - let chart = create_chart tokens lex_sems paths last in
64   - ENIAM_LCGlatexOf.print_chart "results/" (name^"1_chart") "a1" chart;
65   - let chart,references = ENIAM_LCGchart.lazify chart in
66   - ENIAM_LCGlatexOf.print_chart "results/" (name^"2_chart") "a4" chart;
67   - ENIAM_LCGlatexOf.print_references "results/" (name^"2_references") "a4" references;
68   - let chart = ENIAM_LCGchart.parse chart references 30. Sys.time in (* uwaga: niejawna zmiana imperatywna w references *)
69   - ENIAM_LCGlatexOf.print_chart "results/" (name^"3_chart") "a4" chart;
70   - ENIAM_LCGlatexOf.print_references "results/" (name^"3_references") "a4" references;
71   - if ENIAM_LCGchart.is_parsed chart then (
72   - let term = ENIAM_LCGchart.get_parsed_term chart in
73   - Xlatex.latex_file_out "results/" (name^"4_term") "a4" false (fun file ->
74   - Printf.fprintf file "\\[%s\\]\n" (ENIAM_LCGlatexOf.linear_term 0 term));
75   - Xlatex.latex_compile_and_clean "results/" (name^"4_term");
76   - let dependency_tree = ENIAM_LCGreductions.reduce term references in
77   - ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"4_dependency_tree") "a0" dependency_tree;
78   - if ENIAM_LCGreductions.is_reduced_dependency_tree dependency_tree then (
79   - ENIAM_LCGreductions.assign_labels dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
80   - ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"5_dependency_tree") "a4" dependency_tree;
81   - ENIAM_LCGreductions.remove_cuts dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
82   - ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"6_dependency_tree") "a4" dependency_tree;
83   - ENIAM_LCGgraphOf.print_dependency_tree "results/" (name^"6_dependency_tree") dependency_tree;
84   - ENIAM_LCGgraphOf.print_simplified_dependency_tree "results/" (name^"6_simple_dependency_tree") dependency_tree;
85   - ())
86   - else print_endline "not reduced")
87   - else print_endline "not parsed"
88   -
89   -let rec parse_sentence name id tokens lex_sems = function
90   - RawSentence s -> id
91   - | StructSentence(paths,last) ->
92   - test_example (name ^ string_of_int id ^ "_") tokens lex_sems paths last;
93   - id + 1
94   - | DepSentence(paths) -> id
95   - | QuotedSentences sentences ->
96   - Xlist.fold sentences id (fun id p ->
97   - parse_sentence name id tokens lex_sems p.sentence)
98   - | AltSentence l ->
99   - Xlist.fold l id (fun id (mode,sentence) ->
100   - parse_sentence name id tokens lex_sems sentence)
101   -
102   -let rec parse_paragraph name id tokens lex_sems = function
103   - RawParagraph s -> id
104   - | StructParagraph sentences ->
105   - Xlist.fold sentences id (fun id p ->
106   - parse_sentence name id tokens lex_sems p.sentence)
107   - | AltParagraph l ->
108   - Xlist.fold l id (fun id (mode,paragraph) ->
109   - parse_paragraph name id tokens lex_sems paragraph)
110   -
111   -let rec parse_text name id tokens lex_sems = function
112   - RawText s -> id
113   - | StructText paragraphs ->
114   - Xlist.fold paragraphs id (fun id paragraph ->
115   - parse_paragraph name id tokens lex_sems paragraph)
116   - | AltText l ->
117   - Xlist.fold l id (fun id (mode,text) ->
118   - parse_text name id tokens lex_sems text)
119   -
120   -
121   -let _ =
122   - ENIAMsubsyntax.initialize ();
123   - ENIAMcategoriesPL.initialize ();
124   - Xlist.iter examples (fun (name,example) ->
125   - let text,tokens = ENIAMsubsyntax.parse_text example in
126   - let lex_sems = ENIAMlexSemantics.assign tokens text in
127   - ignore(parse_text name 1 tokens lex_sems text))
parser/exec.ml renamed to exec/ENIAMexec.ml
1 1 (*
2   - * ENIAM: Categorial Syntactic-Semantic Parser for Polish
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  2 + * ENIAMexec implements ENIAM processing stream
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
5 5 *
6   - * This program is free software: you can redistribute it and/or modify
7   - * it under the terms of the GNU General Public License as published by
  6 + * This library is free software: you can redistribute it and/or modify
  7 + * it under the terms of the GNU Lesser General Public License as published by
8 8 * the Free Software Foundation, either version 3 of the License, or
9 9 * (at your option) any later version.
10 10 *
11   - * This program is distributed in the hope that it will be useful,
  11 + * This library is distributed in the hope that it will be useful,
12 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   - * GNU General Public License for more details.
  14 + * GNU Lesser General Public License for more details.
15 15 *
16   - * You should have received a copy of the GNU General Public License
  16 + * You should have received a copy of the GNU Lesser General Public License
17 17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 18 *)
19 19  
20   -open LCGtypes
21   -open ExecTypes
  20 +(* open LCGtypes *)
  21 +open ENIAMexecTypes
  22 +open Xstd
  23 +
  24 +let clarify_categories senses_map token =
  25 + match token.ENIAMtokenizerTypes.token with
  26 + ENIAMtokenizerTypes.Lemma(lemma,pos,interp) ->
  27 + let senses = try StringMap.find senses_map lemma with Not_found -> ["X"] in
  28 + List.flatten (Xlist.map interp (fun interp -> ENIAMcategoriesPL.clarify_categories false senses (lemma,pos,interp)))
  29 + | ENIAMtokenizerTypes.Proper(lemma,pos,interp,senses2) ->
  30 + let senses = try StringMap.find senses_map lemma with Not_found -> senses2 in
  31 + List.flatten (Xlist.map interp (fun interp -> ENIAMcategoriesPL.clarify_categories true senses (lemma,pos,interp)))
  32 + | ENIAMtokenizerTypes.Interp lemma ->
  33 + let senses = try StringMap.find senses_map lemma with Not_found -> ["X"] in
  34 + ENIAMcategoriesPL.clarify_categories false senses (lemma,"interp",[])
  35 + | _ -> []
  36 +
  37 +let create_chart rules senses_map tokens lex_sems paths last =
  38 + ENIAM_LCGrenderer.reset_variable_numbers ();
  39 + let chart = ENIAM_LCGchart.make last in
  40 + let chart = Xlist.fold paths chart (fun chart (id,lnode,rnode) ->
  41 + let t = ExtArray.get tokens id in
  42 + let s = ExtArray.get lex_sems id in
  43 + ENIAM_LCGrenderer.reset_variable_names ();
  44 + ENIAM_LCGrenderer.add_variable_numbers ();
  45 + let cats = clarify_categories senses_map t in
  46 + let l = ENIAM_LCGlexicon.create_entries rules id t.ENIAMtokenizerTypes.orth cats s.ENIAMlexSemanticsTypes.schemata in
  47 + ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in
  48 + chart
  49 +
  50 +let test_example rules senses_map name tokens lex_sems paths last =
  51 + ENIAM_LCGreductions.reset_variant_label ();
  52 + let chart = create_chart rules senses_map tokens lex_sems paths last in
  53 + ENIAM_LCGlatexOf.print_chart "results/" (name^"1_chart") "a1" chart;
  54 + let chart,references = ENIAM_LCGchart.lazify chart in
  55 + ENIAM_LCGlatexOf.print_chart "results/" (name^"2_chart") "a4" chart;
  56 + ENIAM_LCGlatexOf.print_references "results/" (name^"2_references") "a4" references;
  57 + let chart = ENIAM_LCGchart.parse chart references 30. Sys.time in (* uwaga: niejawna zmiana imperatywna w references *)
  58 + ENIAM_LCGlatexOf.print_chart "results/" (name^"3_chart") "a4" chart;
  59 + ENIAM_LCGlatexOf.print_references "results/" (name^"3_references") "a4" references;
  60 + if ENIAM_LCGchart.is_parsed chart then (
  61 + let term = ENIAM_LCGchart.get_parsed_term chart in
  62 + Xlatex.latex_file_out "results/" (name^"4_term") "a4" false (fun file ->
  63 + Printf.fprintf file "\\[%s\\]\n" (ENIAM_LCGlatexOf.linear_term 0 term));
  64 + Xlatex.latex_compile_and_clean "results/" (name^"4_term");
  65 + let dependency_tree = ENIAM_LCGreductions.reduce term references in
  66 + ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"4_dependency_tree") "a0" dependency_tree;
  67 + if ENIAM_LCGreductions.is_reduced_dependency_tree dependency_tree then (
  68 + ENIAM_LCGreductions.assign_labels dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
  69 + ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"5_dependency_tree") "a4" dependency_tree;
  70 + ENIAM_LCGreductions.remove_cuts dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
  71 + ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"6_dependency_tree") "a4" dependency_tree;
  72 + ENIAM_LCGgraphOf.print_dependency_tree "results/" (name^"6_dependency_tree") dependency_tree;
  73 + ENIAM_LCGgraphOf.print_simplified_dependency_tree "results/" (name^"6_simple_dependency_tree") dependency_tree;
  74 + ())
  75 + else print_endline "not reduced")
  76 + else print_endline "not parsed"
  77 +
  78 +let parse rules senses_map name id tokens lex_sems =
  79 + ENIAMsubsyntaxTypes.fold_text ENIAMsubsyntaxTypes.Struct id (fun mode id -> function
  80 + ENIAMsubsyntaxTypes.RawSentence s -> id
  81 + | ENIAMsubsyntaxTypes.StructSentence(paths,last) ->
  82 + test_example rules senses_map (name ^ string_of_int id ^ "_") tokens lex_sems paths last;
  83 + id + 1
  84 + | ENIAMsubsyntaxTypes.DepSentence(paths) -> id
  85 + | _ -> failwith "parse")
22 86  
  87 +(*
23 88 let empty_result = {
24 89 input_text=RawText "";
25 90 pre_text=RawText "";
... ... @@ -676,3 +741,4 @@ let process_file_id filename output_filename timeout =
676 741 Printf.fprintf oc "\n%!";
677 742 let _ = Unix.shutdown_connection ic in
678 743 ()*)
  744 +*)
... ...
parser/execTypes.ml renamed to exec/ENIAMexecTypes.ml
1 1 (*
2   - * ENIAM: Categorial Syntactic-Semantic Parser for Polish
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  2 + * ENIAMexec implements ENIAM processing stream
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
5 5 *
6   - * This program is free software: you can redistribute it and/or modify
7   - * it under the terms of the GNU General Public License as published by
  6 + * This library is free software: you can redistribute it and/or modify
  7 + * it under the terms of the GNU Lesser General Public License as published by
8 8 * the Free Software Foundation, either version 3 of the License, or
9 9 * (at your option) any later version.
10 10 *
11   - * This program is distributed in the hope that it will be useful,
  11 + * This library is distributed in the hope that it will be useful,
12 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   - * GNU General Public License for more details.
  14 + * GNU Lesser General Public License for more details.
15 15 *
16   - * You should have received a copy of the GNU General Public License
  16 + * You should have received a copy of the GNU Lesser General Public License
17 17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 18 *)
19   -
  19 +(*
20 20 type status = Idle | PreprocessingError | LexiconError | ParseError | ParseTimeout | Parsed | TooManyNodes | NotParsed | NotReduced | ReductionError | SemError | NotTranslated
21 21  
22 22 type eniam_parse_result = {
... ... @@ -145,3 +145,4 @@ type message_to_overseer =
145 145  
146 146 let time_fun = Unix.gettimeofday
147 147 (* let time_fun = Sys.time () *)
  148 +*)
... ...
exec/README 0 → 100644
  1 +ENIAMexec Version 1.0 :
  2 +-----------------------
  3 +
  4 +ENIAMexec implements ENIAM processing stream.
  5 +
  6 +Install
  7 +-------
  8 +
  9 +ENIAM_LCGlexicon requires OCaml version 4.02.3 compiler
  10 +together with Xlib library version 3.2 or later
  11 +and ENIAM_LCGparser library version 1.0.
  12 +
  13 +In order to install type:
  14 +
  15 +make install
  16 +
  17 +by default, ENIAM_LCGlexicon is installed in the 'ocamlc -where'/eniam directory.
  18 +you can change it by editing the Makefile.
  19 +
  20 +In order to test library type:
  21 +make test
  22 +./test
  23 +
  24 +In order to print lexicon as pdf file type:
  25 +make print_lexicon
  26 +./print_lexicon
  27 +
  28 +Both test and print_lexicon require pdflatex installed.
  29 +
  30 +By default ENIAM_LCGlexicon looks for resources in /usr/share/eniam directory.
  31 +However this behaviour may be changed by setting end exporting ENIAM_RESOURCE_PATH
  32 +environment variable.
  33 +
  34 +Credits
  35 +-------
  36 +Copyright © 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  37 +Copyright © 2016 Institute of Computer Science Polish Academy of Sciences
  38 +
  39 +Licence
  40 +-------
  41 +
  42 +This library is free software: you can redistribute it and/or modify
  43 +it under the terms of the GNU Lesser General Public License as published by
  44 +the Free Software Foundation, either version 3 of the License, or
  45 +(at your option) any later version.
  46 +
  47 +This library is distributed in the hope that it will be useful,
  48 +but WITHOUT ANY WARRANTY; without even the implied warranty of
  49 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  50 +GNU Lesser General Public License for more details.
  51 +
  52 +You should have received a copy of the GNU Lesser General Public License
  53 +along with this program. If not, see <http://www.gnu.org/licenses/>.
... ...
exec/TODO 0 → 100644
  1 +Włączyć senses_map do lex_semantics
  2 +verbosity w semparser
  3 +results w exec
  4 +poprawienie text-paragraph-sentence na fold w pozostałych modułach
  5 +selekcja sparsowanych zdan
  6 +output semparser w postaci html z wstawionymi obrazkami
... ...
exec/lgpl-3.0.txt 0 → 100644
  1 + GNU LESSER GENERAL PUBLIC LICENSE
  2 + Version 3, 29 June 2007
  3 +
  4 + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 + Everyone is permitted to copy and distribute verbatim copies
  6 + of this license document, but changing it is not allowed.
  7 +
  8 +
  9 + This version of the GNU Lesser General Public License incorporates
  10 +the terms and conditions of version 3 of the GNU General Public
  11 +License, supplemented by the additional permissions listed below.
  12 +
  13 + 0. Additional Definitions.
  14 +
  15 + As used herein, "this License" refers to version 3 of the GNU Lesser
  16 +General Public License, and the "GNU GPL" refers to version 3 of the GNU
  17 +General Public License.
  18 +
  19 + "The Library" refers to a covered work governed by this License,
  20 +other than an Application or a Combined Work as defined below.
  21 +
  22 + An "Application" is any work that makes use of an interface provided
  23 +by the Library, but which is not otherwise based on the Library.
  24 +Defining a subclass of a class defined by the Library is deemed a mode
  25 +of using an interface provided by the Library.
  26 +
  27 + A "Combined Work" is a work produced by combining or linking an
  28 +Application with the Library. The particular version of the Library
  29 +with which the Combined Work was made is also called the "Linked
  30 +Version".
  31 +
  32 + The "Minimal Corresponding Source" for a Combined Work means the
  33 +Corresponding Source for the Combined Work, excluding any source code
  34 +for portions of the Combined Work that, considered in isolation, are
  35 +based on the Application, and not on the Linked Version.
  36 +
  37 + The "Corresponding Application Code" for a Combined Work means the
  38 +object code and/or source code for the Application, including any data
  39 +and utility programs needed for reproducing the Combined Work from the
  40 +Application, but excluding the System Libraries of the Combined Work.
  41 +
  42 + 1. Exception to Section 3 of the GNU GPL.
  43 +
  44 + You may convey a covered work under sections 3 and 4 of this License
  45 +without being bound by section 3 of the GNU GPL.
  46 +
  47 + 2. Conveying Modified Versions.
  48 +
  49 + If you modify a copy of the Library, and, in your modifications, a
  50 +facility refers to a function or data to be supplied by an Application
  51 +that uses the facility (other than as an argument passed when the
  52 +facility is invoked), then you may convey a copy of the modified
  53 +version:
  54 +
  55 + a) under this License, provided that you make a good faith effort to
  56 + ensure that, in the event an Application does not supply the
  57 + function or data, the facility still operates, and performs
  58 + whatever part of its purpose remains meaningful, or
  59 +
  60 + b) under the GNU GPL, with none of the additional permissions of
  61 + this License applicable to that copy.
  62 +
  63 + 3. Object Code Incorporating Material from Library Header Files.
  64 +
  65 + The object code form of an Application may incorporate material from
  66 +a header file that is part of the Library. You may convey such object
  67 +code under terms of your choice, provided that, if the incorporated
  68 +material is not limited to numerical parameters, data structure
  69 +layouts and accessors, or small macros, inline functions and templates
  70 +(ten or fewer lines in length), you do both of the following:
  71 +
  72 + a) Give prominent notice with each copy of the object code that the
  73 + Library is used in it and that the Library and its use are
  74 + covered by this License.
  75 +
  76 + b) Accompany the object code with a copy of the GNU GPL and this license
  77 + document.
  78 +
  79 + 4. Combined Works.
  80 +
  81 + You may convey a Combined Work under terms of your choice that,
  82 +taken together, effectively do not restrict modification of the
  83 +portions of the Library contained in the Combined Work and reverse
  84 +engineering for debugging such modifications, if you also do each of
  85 +the following:
  86 +
  87 + a) Give prominent notice with each copy of the Combined Work that
  88 + the Library is used in it and that the Library and its use are
  89 + covered by this License.
  90 +
  91 + b) Accompany the Combined Work with a copy of the GNU GPL and this license
  92 + document.
  93 +
  94 + c) For a Combined Work that displays copyright notices during
  95 + execution, include the copyright notice for the Library among
  96 + these notices, as well as a reference directing the user to the
  97 + copies of the GNU GPL and this license document.
  98 +
  99 + d) Do one of the following:
  100 +
  101 + 0) Convey the Minimal Corresponding Source under the terms of this
  102 + License, and the Corresponding Application Code in a form
  103 + suitable for, and under terms that permit, the user to
  104 + recombine or relink the Application with a modified version of
  105 + the Linked Version to produce a modified Combined Work, in the
  106 + manner specified by section 6 of the GNU GPL for conveying
  107 + Corresponding Source.
  108 +
  109 + 1) Use a suitable shared library mechanism for linking with the
  110 + Library. A suitable mechanism is one that (a) uses at run time
  111 + a copy of the Library already present on the user's computer
  112 + system, and (b) will operate properly with a modified version
  113 + of the Library that is interface-compatible with the Linked
  114 + Version.
  115 +
  116 + e) Provide Installation Information, but only if you would otherwise
  117 + be required to provide such information under section 6 of the
  118 + GNU GPL, and only to the extent that such information is
  119 + necessary to install and execute a modified version of the
  120 + Combined Work produced by recombining or relinking the
  121 + Application with a modified version of the Linked Version. (If
  122 + you use option 4d0, the Installation Information must accompany
  123 + the Minimal Corresponding Source and Corresponding Application
  124 + Code. If you use option 4d1, you must provide the Installation
  125 + Information in the manner specified by section 6 of the GNU GPL
  126 + for conveying Corresponding Source.)
  127 +
  128 + 5. Combined Libraries.
  129 +
  130 + You may place library facilities that are a work based on the
  131 +Library side by side in a single library together with other library
  132 +facilities that are not Applications and are not covered by this
  133 +License, and convey such a combined library under terms of your
  134 +choice, if you do both of the following:
  135 +
  136 + a) Accompany the combined library with a copy of the same work based
  137 + on the Library, uncombined with any other library facilities,
  138 + conveyed under the terms of this License.
  139 +
  140 + b) Give prominent notice with the combined library that part of it
  141 + is a work based on the Library, and explaining where to find the
  142 + accompanying uncombined form of the same work.
  143 +
  144 + 6. Revised Versions of the GNU Lesser General Public License.
  145 +
  146 + The Free Software Foundation may publish revised and/or new versions
  147 +of the GNU Lesser General Public License from time to time. Such new
  148 +versions will be similar in spirit to the present version, but may
  149 +differ in detail to address new problems or concerns.
  150 +
  151 + Each version is given a distinguishing version number. If the
  152 +Library as you received it specifies that a certain numbered version
  153 +of the GNU Lesser General Public License "or any later version"
  154 +applies to it, you have the option of following the terms and
  155 +conditions either of that published version or of any later version
  156 +published by the Free Software Foundation. If the Library as you
  157 +received it does not specify a version number of the GNU Lesser
  158 +General Public License, you may choose any version of the GNU Lesser
  159 +General Public License ever published by the Free Software Foundation.
  160 +
  161 + If the Library as you received it specifies that a proxy can decide
  162 +whether future versions of the GNU Lesser General Public License shall
  163 +apply, that proxy's public statement of acceptance of any version is
  164 +permanent authorization for you to choose that version for the
  165 +Library.
... ...
exec/makefile 0 → 100755
  1 +OCAMLC=ocamlc
  2 +OCAMLOPT=ocamlopt
  3 +OCAMLDEP=ocamldep
  4 +INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam
  5 +OCAMLFLAGS=$(INCLUDES) -g
  6 +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa \
  7 + eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa \
  8 + eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa \
  9 + eniam-lexSemantics.cmxa eniam-exec.cmxa
  10 +INSTALLDIR=`ocamlc -where`/eniam
  11 +
  12 +SOURCES= ENIAMexecTypes.ml ENIAMexec.ml
  13 +
  14 +all: eniam-exec.cma eniam-exec.cmxa
  15 +
  16 +install: all
  17 + mkdir -p $(INSTALLDIR)
  18 + cp eniam-exec.cmxa eniam-exec.a eniam-exec.cma $(INSTALLDIR)
  19 + cp ENIAMexecTypes.cmi ENIAMexec.cmi $(INSTALLDIR)
  20 + cp ENIAMexecTypes.cmx ENIAMexec.cmx $(INSTALLDIR)
  21 +
  22 +
  23 +eniam-exec.cma: $(SOURCES)
  24 + ocamlc -linkall -a -o eniam-exec.cma $(OCAMLFLAGS) $^
  25 +
  26 +eniam-exec.cmxa: $(SOURCES)
  27 + ocamlopt -linkall -a -o eniam-exec.cmxa $(INCLUDES) $^
  28 +
  29 +parser: parser.ml
  30 + $(OCAMLOPT) -o parser $(OCAMLOPTFLAGS) parser.ml
  31 +
  32 +semparser: semparser.ml
  33 + mkdir -p results
  34 + $(OCAMLOPT) -o semparser $(OCAMLOPTFLAGS) semparser.ml
  35 +
  36 +.SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx
  37 +
  38 +.mll.ml:
  39 + ocamllex $<
  40 +
  41 +.mly.mli:
  42 + ocamlyacc $<
  43 +
  44 +.mly.ml:
  45 + ocamlyacc $<
  46 +
  47 +.ml.cmo:
  48 + $(OCAMLC) $(OCAMLFLAGS) -c $<
  49 +
  50 +.mli.cmi:
  51 + $(OCAMLC) $(OCAMLFALGS) -c $<
  52 +
  53 +.ml.cmx:
  54 + $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $<
  55 +
  56 +clean:
  57 + rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a parser semparser
... ...
exec/parser.ml 0 → 100644
  1 +(*
  2 + * ENIAMexec implements ENIAM processing stream
  3 + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  5 + *
  6 + * This library is free software: you can redistribute it and/or modify
  7 + * it under the terms of the GNU Lesser General Public License as published by
  8 + * the Free Software Foundation, either version 3 of the License, or
  9 + * (at your option) any later version.
  10 + *
  11 + * This library is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 + * GNU Lesser General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU Lesser General Public License
  17 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18 + *)
  19 +
  20 +open ENIAM_LCGlexiconTypes
  21 +open ENIAM_LCGtypes
  22 +open ENIAMsubsyntaxTypes
  23 +open Xstd
  24 +
  25 +let rules = ENIAM_LCGlexicon.make_rules false ENIAM_LCGlexiconTypes.rules_filename
  26 +
  27 +let examples = [
  28 + "Szpak","Szpak śpiewa.";
  29 + (* "miał","Miałem miał."; *)
  30 +(* "Ala","Ala ma kota.";
  31 + "Ale","Ale mają kota:"; *)
  32 + (* "zima","Szpak frunie zimą.";*)
  33 + (* "październik","Kot miauczy w październiku."; *)
  34 +(* "Szpak-Kot","Szpak frunie. Kot miauczy.";
  35 + "powiedział","Szpak powiedział: „Frunę. Kiszę.”";*)
  36 + (* "teraz","Teraz frunie jakiś szpak.";
  37 + "chłopcy","Chłopcy mają ulicę kwiatami."; *)
  38 + (* "arabia","Arabia Saudyjska biegnie.";*)
  39 +(* "Tom","Tom idzie."; *)
  40 + (* "liceum","W 1984-89 uczęszczał do VII Liceum Ogólnokształcącego im. K.K. Baczyńskiego w Szczecinie.";
  41 + "studia","Następnie studiował architekturę na Politechnice Szczecińskiej, dyplom uzyskał w 1994."; *)
  42 +]
  43 +
  44 +
  45 +
  46 +
  47 +let _ =
  48 + ENIAMsubsyntax.initialize ();
  49 + ENIAMcategoriesPL.initialize ();
  50 + ENIAMwalParser.initialize ();
  51 + ENIAMwalReduce.initialize ();
  52 + Xlist.iter examples (fun (name,example) ->
  53 + let text,tokens = ENIAMsubsyntax.parse_text example in
  54 + let lex_sems = ENIAMlexSemantics.assign tokens text in
  55 + ignore(ENIAMexec.parse rules StringMap.empty name 1 tokens lex_sems text))
  56 +
  57 +(*
  58 +type output = Text | Xml | Html | Marsh | Graphviz
  59 +
  60 +let output = ref Text
  61 +let comm_stdio = ref true
  62 +let sentence_split = ref true
  63 +let port = ref 0
  64 +
  65 +let spec_list = [
  66 + "-s", Arg.Unit (fun () -> sentence_split:=true), "Split input into sentences (default)";
  67 + "-n", Arg.Unit (fun () -> sentence_split:=false), "Do not split input into sentences";
  68 + "-i", Arg.Unit (fun () -> comm_stdio:=true), "Communication using stdio (default)";
  69 + "-p", Arg.Int (fun p -> comm_stdio:=false; port:=p), "<port> Communication using sockets on given port number";
  70 + "-t", Arg.Unit (fun () -> output:=Text), "Output as plain text (default)";
  71 + "-x", Arg.Unit (fun () -> output:=Xml), "Output as XML";
  72 + "-m", Arg.Unit (fun () -> output:=Marsh), "Output as marshalled Ocaml data structure";
  73 + "-h", Arg.Unit (fun () -> output:=Html), "Output as HTML";
  74 + "-g", Arg.Unit (fun () -> output:=Graphviz; sentence_split:=false), "Output as graphviz dot file; turns sentence split off";
  75 + (* "-r", Arg.String (fun p ->
  76 + ENIAMtokenizerTypes.set_resource_path p;
  77 + ENIAMmorphologyTypes.set_resource_path p;
  78 + ENIAMsubsyntaxTypes.set_resource_path p), "<path> Set resource path"; *)
  79 + ]
  80 +
  81 +let usage_msg =
  82 + "Usage: subsyntax <options>\nInput is a sequence of lines. Empty line ends the sequence and invoke parsing. Double empty line shutdown parser.\nOptions are:"
  83 +
  84 +let message = "ENIAMsubsyntax: MWE, abbreviation and sentence detecion for Polish\n\
  85 +Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>\n\
  86 +Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences"
  87 +
  88 +let anon_fun s = raise (Arg.Bad ("invalid argument: " ^ s))
  89 +
  90 +let input_text channel =
  91 + let s = ref (try input_line channel with End_of_file -> "") in
  92 + let lines = ref [] in
  93 + while !s <> "" do
  94 + lines := !s :: !lines;
  95 + s := try input_line channel with End_of_file -> ""
  96 + done;
  97 + String.concat "\n" (List.rev !lines)
  98 +
  99 +let rec main_loop in_chan out_chan =
  100 + let text = input_text in_chan in
  101 + if text = "" then () else (
  102 + (* print_endline "input text begin";
  103 + print_endline text;
  104 + print_endline "input text end"; *)
  105 + (if !sentence_split then
  106 + let text,tokens = ENIAMsubsyntax.parse_text text in
  107 + (match !output with
  108 + Text -> output_string out_chan (ENIAMsubsyntaxStringOf.text "" tokens text ^ "\n" ^ ENIAMsubsyntaxStringOf.token_extarray tokens ^ "\n\n")
  109 + | Xml -> output_string out_chan (Xml.to_string (ENIAMsubsyntaxXMLof.text_and_tokens text tokens) ^ "\n\n")
  110 + | Html -> output_string out_chan (ENIAMsubsyntaxHTMLof.text_and_tokens text tokens ^ "\n\n")
  111 + | Marsh -> Marshal.to_channel out_chan (text,tokens) []
  112 + | Graphviz -> failwith "main_loop: ni")
  113 + else
  114 + let tokens = ENIAMsubsyntax.parse text in
  115 + (match !output with
  116 + Text -> output_string out_chan (ENIAMsubsyntaxStringOf.token_list tokens ^ "\n\n")
  117 + | Xml -> output_string out_chan (Xml.to_string (ENIAMsubsyntaxXMLof.token_list tokens) ^ "\n\n")
  118 + | Html -> output_string out_chan (ENIAMsubsyntaxHTMLof.token_list tokens ^ "\n\n")
  119 + | Marsh -> Marshal.to_channel out_chan tokens []
  120 + | Graphviz -> output_string out_chan (ENIAMsubsyntaxGraphOf.token_list tokens ^ "\n\n")));
  121 + flush out_chan;
  122 + main_loop in_chan out_chan)
  123 +
  124 +let _ =
  125 + prerr_endline message;
  126 + Arg.parse spec_list anon_fun usage_msg;
  127 + Gc.compact ();
  128 + prerr_endline "Ready!";
  129 + if !comm_stdio then main_loop stdin stdout
  130 + else
  131 + let sockaddr = Unix.ADDR_INET(Unix.inet_addr_any,!port) in
  132 + Unix.establish_server main_loop sockaddr
  133 +*)
... ...
exec/semparser.ml 0 → 100644
  1 +(*
  2 + * ENIAMexec implements ENIAM processing stream
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
  5 + *
  6 + * This library is free software: you can redistribute it and/or modify
  7 + * it under the terms of the GNU Lesser General Public License as published by
  8 + * the Free Software Foundation, either version 3 of the License, or
  9 + * (at your option) any later version.
  10 + *
  11 + * This library is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 + * GNU Lesser General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU Lesser General Public License
  17 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18 + *)
  19 +
  20 +open Xstd
  21 +open ENIAMsubsyntaxTypes
  22 +
  23 +let rules = ENIAM_LCGlexicon.make_rules false ENIAM_LCGlexiconTypes.user_lexicon_filename
  24 +
  25 +let load_senses_map filename =
  26 + File.fold_tab filename StringMap.empty (fun map -> function
  27 + [lemma;cat] -> StringMap.add_inc map lemma [cat] (fun l -> cat :: l)
  28 + | l -> failwith ("load_senses_map: " ^ String.concat "\t" l))
  29 +
  30 +let senses_map = load_senses_map ENIAM_LCGlexiconTypes.user_senses_filename
  31 +
  32 +
  33 +let examples = [
  34 + (* "liceum","W 1984-89 uczęszczał do VII Liceum Ogólnokształcącego im. K.K. Baczyńskiego w Szczecinie."; *)
  35 + "studia","Następnie studiował architekturę na Politechnice Szczecińskiej, dyplom uzyskał w 1994.";
  36 +]
  37 +
  38 +
  39 +(*
  40 +type output = Text | Xml | Html | Marsh | Graphviz
  41 +
  42 +let output = ref Text
  43 +let comm_stdio = ref true
  44 +let sentence_split = ref true
  45 +let port = ref 0
  46 +*)
  47 +let subsyntax_built_in = ref true
  48 +let subsyntax_host = ref "localhost"
  49 +let subsyntax_port = ref 5739
  50 +
  51 +let spec_list = [
  52 +(* "-s", Arg.Unit (fun () -> sentence_split:=true), "Split input into sentences (default)";
  53 + "-n", Arg.Unit (fun () -> sentence_split:=false), "Do not split input into sentences";
  54 + "-i", Arg.Unit (fun () -> comm_stdio:=true), "Communication using stdio (default)";
  55 + "-p", Arg.Int (fun p -> comm_stdio:=false; port:=p), "<port> Communication using sockets on given port number";
  56 + "-t", Arg.Unit (fun () -> output:=Text), "Output as plain text (default)";
  57 + "-x", Arg.Unit (fun () -> output:=Xml), "Output as XML";
  58 + "-m", Arg.Unit (fun () -> output:=Marsh), "Output as marshalled Ocaml data structure";
  59 + "-h", Arg.Unit (fun () -> output:=Html), "Output as HTML";
  60 + "-g", Arg.Unit (fun () -> output:=Graphviz; sentence_split:=false), "Output as graphviz dot file; turns sentence split off";*)
  61 + (* "-r", Arg.String (fun p ->
  62 + ENIAMtokenizerTypes.set_resource_path p;
  63 + ENIAMmorphologyTypes.set_resource_path p;
  64 + ENIAMsubsyntaxTypes.set_resource_path p), "<path> Set resource path"; *)
  65 + "-b", Arg.Unit (fun () -> subsyntax_built_in:=true), "Use built in version of ENIAMsubsyntax (default)";
  66 + "--port", Arg.Int (fun p -> subsyntax_built_in:=false; subsyntax_port:=p), "<port> Connect to ENIAMsubsyntax on a given port";
  67 + "--host", Arg.String (fun s -> subsyntax_built_in:=false; subsyntax_host:=s), "<hostname> Connect to ENIAMsubsyntax on a given host (by default localhost)";
  68 + ]
  69 +
  70 +let usage_msg =
  71 + "Usage: semparser <options>\nInput is a sequence of lines. Empty line ends the sequence and invoke parsing. Double empty line shutdown parser.\nOptions are:"
  72 +
  73 +let message = "ENIAM_LCGsemparser, a parser for Logical Categorial Grammar formalism\n\
  74 +Copyright (C) 2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>\n\
  75 +Copyright (C) 2017 Institute of Computer Science Polish Academy of Sciences"
  76 +
  77 +let anon_fun s = raise (Arg.Bad ("invalid argument: " ^ s))
  78 +
  79 +let input_text channel =
  80 + let s = ref (try input_line channel with End_of_file -> "") in
  81 + let lines = ref [] in
  82 + while !s <> "" do
  83 + lines := !s :: !lines;
  84 + s := try input_line channel with End_of_file -> ""
  85 + done;
  86 + String.concat "\n" (List.rev !lines)
  87 +
  88 +let rec main_loop sub_in sub_out in_chan out_chan =
  89 + let text = input_text in_chan in
  90 + if text = "" then () else (
  91 + Printf.fprintf sub_out "%s\n\n%!" text;
  92 + let text,tokens =
  93 + if !subsyntax_built_in then ENIAMsubsyntax.parse_text text else
  94 + (Marshal.from_channel sub_in : ENIAMsubsyntaxTypes.text * ENIAMtokenizerTypes.token_env ExtArray.t) in
  95 + let lex_sems = ExtArray.make (ExtArray.size tokens) ENIAMlexSemanticsTypes.empty_lex_sem in
  96 + ignore(ENIAMexec.parse rules senses_map "E"(*name*) 1 tokens lex_sems text)
  97 + (* print_endline "input text begin";
  98 + print_endline text;
  99 + print_endline "input text end"; *)
  100 + (*if !sentence_split then
  101 + let text,tokens = ENIAMsubsyntax.parse_text text in
  102 + (match !output with
  103 + Text -> output_string out_chan (ENIAMsubsyntaxStringOf.text "" tokens text ^ "\n" ^ ENIAMsubsyntaxStringOf.token_extarray tokens ^ "\n\n")
  104 + | Xml -> output_string out_chan (Xml.to_string (ENIAMsubsyntaxXMLof.text_and_tokens text tokens) ^ "\n\n")
  105 + | Html -> output_string out_chan (ENIAMsubsyntaxHTMLof.text_and_tokens text tokens ^ "\n\n")
  106 + | Marsh -> Marshal.to_channel out_chan (text,tokens) []
  107 + | Graphviz -> failwith "main_loop: ni")
  108 + else
  109 + let tokens = ENIAMsubsyntax.parse text in
  110 + (match !output with
  111 + Text -> output_string out_chan (ENIAMsubsyntaxStringOf.token_list tokens ^ "\n\n")
  112 + | Xml -> output_string out_chan (Xml.to_string (ENIAMsubsyntaxXMLof.token_list tokens) ^ "\n\n")
  113 + | Html -> output_string out_chan (ENIAMsubsyntaxHTMLof.token_list tokens ^ "\n\n")
  114 + | Marsh -> Marshal.to_channel out_chan tokens []
  115 + | Graphviz -> output_string out_chan (ENIAMsubsyntaxGraphOf.token_list tokens ^ "\n\n"))*);
  116 + flush out_chan;
  117 + main_loop sub_in sub_out in_chan out_chan)
  118 +
  119 +let get_sock_addr host_name port =
  120 + let he = Unix.gethostbyname host_name in
  121 + let addr = he.Unix.h_addr_list in
  122 + Unix.ADDR_INET(addr.(0),port)
  123 +
  124 +let _ =
  125 + prerr_endline message;
  126 + (* ENIAMsubsyntax.initialize (); *)
  127 + ENIAMcategoriesPL.initialize ();
  128 + Arg.parse spec_list anon_fun usage_msg;
  129 + if !subsyntax_built_in then ENIAMsubsyntax.initialize ();
  130 + Gc.compact ();
  131 + let sub_in,sub_out =
  132 + if !subsyntax_built_in then stdin,stdout
  133 + else Unix.open_connection (get_sock_addr !subsyntax_host !subsyntax_port) in
  134 + prerr_endline "Ready!";
  135 + (*if !comm_stdio then*) main_loop sub_in sub_out stdin stdout
  136 + (*else
  137 + let sockaddr = Unix.ADDR_INET(Unix.inet_addr_any,!port) in
  138 + Unix.establish_server main_loop sockaddr*)
... ...
lexSemantics/ENIAMwalParser.ml
... ... @@ -448,11 +448,19 @@ let load_meanings filename =
448 448 Xlist.fold l IntMap.empty (fun meanings m ->
449 449 IntMap.add meanings m.mng_id m)
450 450  
451   -let phrases = load_phrases phrases_filename
452   -let entries = load_entries entries_filename
453   -let schemata = load_schemata schemata_filename
454   -let connected = load_connected connected_filename
455   -let meanings = load_meanings meanings_filename
  451 +let phrases = ref IntMap.empty
  452 +let entries = ref StringMap.empty
  453 +let schemata = ref StringMap.empty
  454 +let connected = ref StringMap.empty
  455 +let meanings = ref IntMap.empty
  456 +
  457 +let initialize () =
  458 + phrases := load_phrases phrases_filename;
  459 + entries := load_entries entries_filename;
  460 + schemata := load_schemata schemata_filename;
  461 + connected := load_connected connected_filename;
  462 + meanings := load_meanings meanings_filename;
  463 + ()
456 464  
457 465  
458 466 (*
... ...
lexSemantics/ENIAMwalReduce.ml
... ... @@ -68,14 +68,23 @@ let create_comprep_adjuncts comprep_reqs comprep_reqs2 =
68 68 StringMap.map map (fun l ->
69 69 Xlist.map l (fun s -> s, try StringMap.find comprep_reqs s with Not_found -> StringSet.empty))
70 70  
71   -let comprep_reqs,comprep_reqs2 = create_comprep_reqs ENIAMwalParser.entries
72   -let lexarg_reqs = create_lexarg_reqs ENIAMwalParser.entries
73   -let comprep_adjuncts = create_comprep_adjuncts comprep_reqs comprep_reqs2
  71 +let comprep_reqs = ref StringMap.empty
  72 +let comprep_reqs2 = ref StringMap.empty
  73 +let lexarg_reqs = ref IntMap.empty
  74 +let comprep_adjuncts = ref StringMap.empty
  75 +
  76 +let initialize () =
  77 + let a,b = create_comprep_reqs !ENIAMwalParser.entries in
  78 + comprep_reqs := a;
  79 + comprep_reqs2 := b;
  80 + lexarg_reqs := create_lexarg_reqs !ENIAMwalParser.entries;
  81 + comprep_adjuncts := create_comprep_adjuncts !comprep_reqs !comprep_reqs2;
  82 + ()
74 83  
75 84 let select_comprep_adjuncts lexemes =
76 85 StringSet.fold lexemes [] (fun l lemma ->
77 86 try
78   - Xlist.fold (StringMap.find comprep_adjuncts lemma) l (fun l (s,reqs) ->
  87 + Xlist.fold (StringMap.find !comprep_adjuncts lemma) l (fun l (s,reqs) ->
79 88 (* Printf.printf "%s: %s: %s\n" lemma s (String.concat " " (StringSet.to_list reqs)); *)
80 89 if StringSet.is_empty reqs ||
81 90 not (StringSet.is_empty (StringSet.intersection reqs lexemes)) then s :: l else l)
... ... @@ -216,8 +225,8 @@ let select_all_entries phrases entries schemata connected meanings =
216 225 entries,schemata,connected
217 226  
218 227 let select_entries lexemes =
219   - select_entries_full ENIAMwalParser.phrases ENIAMwalParser.entries ENIAMwalParser.schemata
220   - ENIAMwalParser.connected ENIAMwalParser.meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes
  228 + select_entries_full !ENIAMwalParser.phrases !ENIAMwalParser.entries !ENIAMwalParser.schemata
  229 + !ENIAMwalParser.connected !ENIAMwalParser.meanings !comprep_reqs !comprep_reqs2 !lexarg_reqs lexemes
221 230  
222 231 (* let entries,schemata,connected =
223 232 (* let lexemes = StringSet.of_list ["Ala"; "ma"; "kot"] in *)
... ...
subsyntax/ENIAMsubsyntaxTypes.ml
... ... @@ -76,3 +76,65 @@ let int_of_mode = function
76 76  
77 77 let compare_mode x y =
78 78 compare (int_of_mode x) (int_of_mode y)
  79 +
  80 +
  81 +let rec map_sentence mode f = function
  82 + | QuotedSentences sentences ->
  83 + let sentences = Xlist.rev_map sentences (fun p ->
  84 + let sentence = map_sentence mode f p.sentence in
  85 + {p with sentence=sentence}) in
  86 + QuotedSentences(List.rev sentences)
  87 + | AltSentence l ->
  88 + let l = Xlist.rev_map l (fun (mode,sentence) ->
  89 + mode, map_sentence mode f sentence) in
  90 + AltSentence(List.rev l)
  91 + | s -> f mode s
  92 +
  93 +let rec map_paragraph mode f = function
  94 + RawParagraph s -> RawParagraph s
  95 + | StructParagraph sentences ->
  96 + let sentences = Xlist.rev_map sentences (fun p ->
  97 + let sentence = map_sentence mode f p.sentence in
  98 + {p with sentence=sentence}) in
  99 + StructParagraph(List.rev sentences)
  100 + | AltParagraph l ->
  101 + let l = Xlist.rev_map l (fun (mode,paragraph) ->
  102 + mode, map_paragraph mode f paragraph) in
  103 + AltParagraph(List.rev l)
  104 +
  105 +let rec map_text mode f = function
  106 + RawText s -> RawText s
  107 + | StructText paragraphs ->
  108 + let paragraphs = Xlist.rev_map paragraphs (fun paragraph ->
  109 + map_paragraph mode f paragraph) in
  110 + StructText(List.rev paragraphs)
  111 + | AltText l -> AltText(Xlist.map l (fun (mode,text) ->
  112 + mode, map_text mode f text))
  113 +
  114 +
  115 +let rec fold_sentence mode s f = function
  116 + QuotedSentences sentences ->
  117 + Xlist.fold sentences s (fun s p ->
  118 + fold_sentence mode s f p.sentence)
  119 + | AltSentence l ->
  120 + Xlist.fold l s (fun s (mode,sentence) ->
  121 + fold_sentence mode s f sentence)
  122 + | t -> f mode s t
  123 +
  124 +let rec fold_paragraph mode s f = function
  125 + RawParagraph _ -> s
  126 + | StructParagraph sentences ->
  127 + Xlist.fold sentences s (fun s p ->
  128 + fold_sentence mode s f p.sentence)
  129 + | AltParagraph l ->
  130 + Xlist.fold l s (fun s (mode,paragraph) ->
  131 + fold_paragraph mode s f paragraph)
  132 +
  133 +let rec fold_text mode s f = function
  134 + RawText _ -> s
  135 + | StructText paragraphs ->
  136 + Xlist.fold paragraphs s (fun s paragraph ->
  137 + fold_paragraph mode s f paragraph)
  138 + | AltText l ->
  139 + Xlist.fold l s (fun s (mode,text) ->
  140 + fold_text mode s f text)
... ...