|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
(*
* ENIAM: Categorial Syntactic-Semantic Parser for Polish
* Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
* Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*)
|
|
20
|
(*
|
|
21
|
let parse query =
|
|
22
|
(* print_endline "a1"; *)
|
|
23
|
let l = Xunicode.classified_chars_of_utf8_string query in
|
|
24
|
(* print_endline "a2"; *)
|
|
25
|
let l = PreTokenizer.tokenize l in
|
|
26
27
28
29
30
31
32
33
34
35
36
37
38
|
(* print_endline "a3"; *)
let l = PrePatterns.normalize_tokens [] l in
(* print_endline "a4"; *)
let l = PrePatterns.find_replacement_patterns l in
(* print_endline "a5"; *)
let l = PrePatterns.remove_spaces [] l in
let l = PrePatterns.find_abr_patterns PreAcronyms.abr_patterns l in
let l = PrePatterns.normalize_tokens [] l in
(* print_endline "a6"; *)
let paths = PrePaths.translate_into_paths l in
(* print_endline "a7"; *)
let paths = PrePaths.lemmatize paths in
(* print_endline "a8"; *)
|
|
39
|
let paths,_ = PreMWE.process paths in
|
|
40
41
42
43
44
|
(* print_endline "a12"; *)
let paths = find_proper_names paths in
(* print_endline "a13"; *)
let paths = modify_weights paths in
let paths = translate_digs paths in
|
|
45
|
let paths = assign_senses paths in
|
|
46
47
|
(* print_endline "a14"; *)
let paths = assign_valence paths in
|
|
48
49
|
(* print_endline "a15"; *)
let paths = combine_interps paths in
|
|
50
|
(* print_endline "a16"; *)
|
|
51
|
let paths = disambiguate_senses paths in
|
|
52
53
54
|
let paths = assign_simplified_valence paths in
let paths = PreSemantics.assign_semantics paths in
(* print_endline "a16"; *)
|
|
55
|
let paths = select_tokens paths in
|
|
56
57
58
|
(* print_endline "a17"; *)
(* let paths = if !single_sense_flag then single_sense paths else paths in
let paths = if !single_frame_flag then single_frame paths else paths in*)
|
|
59
60
|
(*let paths, next_id = add_ids paths next_id in
let paths = prepare_indexes paths in*)
|
|
61
|
(* print_endline "a18"; *)
|
|
62
|
paths(*, next_id*)
|
|
63
|
(* print_endline (PrePaths.to_string paths); *)
|
|
64
65
|
(* let paths =
if PrePaths.no_possible_path (PrePaths.map paths PreLemmatization.remove_postags) then
|
|
66
67
68
|
PrePaths.map paths process_ign
else paths in
let paths = PrePaths.map paths PreLemmatization.remove_postags in
|
|
69
|
let paths = PreCaseShift.manage_lower_upper_case paths in (* FIXME: niepotrzebnie powiększa pierwszy token (przymiotniki partykuły itp.) *)
|
|
70
71
|
let paths = PreLemmatization.combine_interps paths in
(* print_endline (PrePaths.to_string paths); *)*)
|
|
72
|
|
|
73
|
let parse_conll tokens dep_paths = (* FIXME: sprawdzić, czy zachowana jest kolejność elementów paths !!! *)
|
|
74
|
let paths = List.rev (Int.fold 1 (Array.length dep_paths - 1) [] (fun paths conll_id ->
|
|
75
76
|
let id,_,_ = dep_paths.(conll_id) in
ExtArray.get tokens id :: paths)) in
|
|
77
|
(* print_endline "a12"; *)
|
|
78
|
let paths = find_proper_names paths in
|
|
79
|
(* print_endline "a13"; *)
|
|
80
81
|
let paths = modify_weights paths in
let paths = PreWordnet.assign_senses paths in
|
|
82
|
(* print_endline "a14"; *)
|
|
83
|
(* let paths = combine_interps paths in (* FIXME: to powinno też działać dla Proper *) *)
|
|
84
|
(* print_endline "a15"; *)
|
|
85
|
let paths = assign_valence paths in
|
|
86
|
(* print_endline "a16"; *)
|
|
87
88
89
|
let paths = disambiguate_senses paths in
let paths = assign_simplified_valence paths in
let paths = PreSemantics.assign_semantics paths in
|
|
90
|
(* print_endline "a16"; *)
|
|
91
|
let _ = Xlist.fold paths 1 (fun conll_id t ->
|
|
92
93
|
let id,_,_ = dep_paths.(conll_id) in
ExtArray.set tokens id t;
|
|
94
95
|
conll_id + 1) in
()
|
|
96
|
|
|
97
98
|
let parse_text = function
RawText query ->
|
|
99
100
101
102
|
let text,tokens = ENIAMsubsyntax.parse_text query in
let text = ENIAMpreIntegration.parse_text ENIAMsubsyntaxTypes.Struct tokens text in
let lex_sems = ENIAMlexSemantics.assign tokens text in
text,tokens,lex_sems
|
|
103
|
| AltText[Raw,RawText query;CONLL,StructText([
|
|
104
|
StructParagraph[{psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence dep_paths]} as p]],tokens)] ->
|
|
105
106
|
parse_conll tokens dep_paths;
let paths = parse query in
|
|
107
|
let sentences = PreSentences.split_into_sentences "" query tokens paths in
|
|
108
|
let m_dep_paths = Array.map (fun (id,_,_) -> id,-1,"") dep_paths in
|
|
109
|
let conll = StructParagraph[{p with psentence = AltSentence([Raw, RawSentence text; CONLL, DepSentence dep_paths]
|
|
110
|
@ if Paths.config.Paths.mate_parser_enabled then [Mate, DepSentence m_dep_paths] else [])}] in
|
|
111
|
AltText[Raw,RawText query; Struct, StructText([
|
|
112
|
AltParagraph[Raw,RawParagraph query; ENIAM, StructParagraph sentences; CONLL, conll]],tokens)]
|
|
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
|
| _ -> failwith "parse_text: not implemented"*)
open ENIAMsubsyntaxTypes
let parse_text = function
RawText query,_ ->
let text,tokens = ENIAMsubsyntax.parse_text query in
let text = ENIAMpreIntegration.parse_text ENIAMsubsyntaxTypes.Struct tokens text in
let lex_sems = ENIAMlexSemantics.assign tokens text in
text,tokens,lex_sems
| AltText[Raw,RawText query;CONLL,StructText[
StructParagraph[{psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence dep_paths]} as p]]],tokens ->
let m_dep_paths = Array.map (fun (id,_,_) -> id,-1,"") dep_paths in
let conll = StructParagraph[{p with psentence = AltSentence([Raw, RawSentence text; CONLL, DepSentence dep_paths]
@ if Paths.config.Paths.mate_parser_enabled then [Mate, DepSentence m_dep_paths] else [])}] in
let paths = ENIAMsubsyntax.parse query in
let sentences = ENIAMsentences.split_into_sentences "" query tokens paths in
let text = AltText[Raw,RawText query; Struct, StructText([
AltParagraph[Raw,RawParagraph query; ENIAM, StructParagraph sentences; CONLL, conll]])] in
let lex_sems = ENIAMlexSemantics.assign tokens text in
text,tokens,lex_sems
|
|
134
|
| _ -> failwith "parse_text: not implemented"
|
|
135
|
|
|
136
|
let rec main_loop in_chan out_chan =
|
|
137
|
(* print_endline "main_loop 1"; *)
|
|
138
|
let query = (Marshal.from_channel in_chan : text * ENIAMtokenizerTypes.token_record ExtArray.t) in
|
|
139
|
(* print_endline "main_loop 2"; *)
|
|
140
|
if fst query = RawText "" then () else (
|
|
141
|
(try
|
|
142
143
|
(* let time0 = Sys.time () in *)
let utime0 = Unix.gettimeofday () in
|
|
144
|
(* print_endline "main_loop 3a"; *)
|
|
145
|
let text,tokens,lex_sems = parse_text query in
|
|
146
|
(* print_endline "main_loop 4a"; *)
|
|
147
148
149
|
(* let time2 = Sys.time () in *)
let utime2 = Unix.gettimeofday () in
(* Printf.printf "time=%f utime=%f\n%!" (time2 -. time0) (utime2 -. utime0); *)
|
|
150
|
Marshal.to_channel out_chan (text,tokens,lex_sems,"",utime2 -. utime0) [];
|
|
151
152
|
(* print_endline "main_loop 5"; *)
()
|
|
153
154
|
with e -> (
(* print_endline "main_loop 7"; *)
|
|
155
156
157
158
159
160
|
Marshal.to_channel out_chan (
RawText "",
ExtArray.make 1 ENIAMtokenizerTypes.empty_token,
ExtArray.make 1 ENIAMlexSemanticsTypes.empty_lex_sem,
Printexc.to_string e,
0.) []));
|
|
161
|
(* print_endline "main_loop 6"; *)
|
|
162
163
|
flush out_chan;
main_loop in_chan out_chan)
|
|
164
|
|
|
165
166
167
168
|
(* let _ = main_loop stdin stdout *)
let sockaddr = Unix.ADDR_INET(Unix.inet_addr_any,Paths.pre_port)
|
|
169
|
let _ =
|
|
170
171
|
Gc.compact ();
print_endline "Ready!";
|
|
172
|
Unix.establish_server main_loop sockaddr
|