ENIAMlexSemanticsHTMLof.ml
2.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
(*
* ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
* Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
* Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
*
* This library is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*)
open ENIAMlexSemanticsTypes
open Printf
let html_header =
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">
<html>
<head>
<META HTTP-EQUIV=\"CONTENT-TYPE\" CONTENT=\"text/html; charset=utf8\">
<TITLE>ENIAM: Kategorialny Parser Składniowo-Semantyczny</TITLE>
<META HTTP-EQUIV=\"Content-Language\" CONTENT=\"pl\">
</head>
<body>
<center>"
let html_trailer =
"</center>
</body>
</html>"
let html_of_lex_sems tokens lex_sems =
"<table><tr><td><b>orth</b></td><td><b>token</b></td><td><b>id</b></td></tr>" ^
String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size tokens - 1) [] (fun l id ->
let t = ExtArray.get tokens id in
(sprintf "<tr><td>%s</td><td>%s</td><td>%d</td></tr>"
t.ENIAMtokenizerTypes.orth (ENIAMsubsyntaxHTMLof.escape_html (ENIAMtokens.string_of_token t.ENIAMtokenizerTypes.token)) id) :: l))) ^
"</table>"
let text_and_tokens_and_lex_sems text tokens lex_sems msg =
if msg = "" then sprintf "%s\n%s<BR>\n%s<BR>\n%s<BR>\n%s\n" html_header
(ENIAMsubsyntaxHTMLof.html_of_text tokens text) (ENIAMsubsyntaxHTMLof.html_of_token_extarray tokens)
(html_of_lex_sems tokens lex_sems) html_trailer
else sprintf "%s\n%s\n%s\n" html_header msg html_trailer
let print_text_and_tokens_and_lex_sems path name text tokens lex_sems msg =
File.file_out (path ^ name ^ ".html") (fun file ->
output_string file (text_and_tokens_and_lex_sems text tokens lex_sems msg ))