generate-static.py
5.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/usr/bin/env python
# -*- Mode: Python; tab-width: 4; indent-tabs-mode: nil; coding: utf-8; -*-
# vim:set ft=python ts=4 sw=4 sts=4 autoindent:
# Generates a web pages linking to visualizations of each document in
# a BioNLP ST 2011 Shared Task dataset.
import sys
import os
try:
import argparse
except ImportError:
from os.path import basename
from sys import path as sys_path
# We are most likely on an old Python and need to use our internal version
sys_path.append(join_path(basename(__file__), '../server/lib'))
import argparse
# Filename extensions that should be considered in selecting files to
# process.
known_filename_extensions = [".txt", ".a1", ".a2"]
def argparser():
ap=argparse.ArgumentParser(description="Generate web page linking to visualizations of BioNLP ST documents.")
ap.add_argument("-v", "--visualizer", default="visualizer.xhtml", metavar="URL", help="Visualization script")
ap.add_argument("-s", "--staticdir", default="static", metavar="DIR", help="Directory containing static visualizations")
ap.add_argument("-d", "--dataset", default=None, metavar="NAME", help="Dataset name (derived from directory by default.)")
ap.add_argument("directory", help="Directory containing ST documents.")
ap.add_argument("prefix", metavar="URL", help="URL prefix to prepend to links")
return ap
def files_to_process(dir):
try:
toprocess = []
for fn in os.listdir(dir):
fp = os.path.join(dir, fn)
if os.path.isdir(fp):
print >> sys.stderr, "Skipping directory %s" % fn
elif os.path.splitext(fn)[1] not in known_filename_extensions:
print >> sys.stderr, "Skipping %s: unrecognized suffix" % fn
else:
toprocess.append(fp)
except OSError, e:
print >> sys.stderr, "Error processing %s: %s" % (dir, e)
return toprocess
def print_links(files, arg, out=sys.stdout):
# group by filename root (filename without extension)
grouped = {}
for fn in files:
root, ext = os.path.splitext(fn)
if root not in grouped:
grouped[root] = []
grouped[root].append(ext)
# output in sort order
sorted = grouped.keys()
sorted.sort()
print >> out, "<table>"
for root in sorted:
path, fn = os.path.split(root)
print >> out, "<tr>"
print >> out, " <td>%s</td>" % fn
# dynamic visualization
print >> out, " <td><a href=\"%s\">dynamic</a></td>" % (arg.prefix+arg.visualizer+"#"+arg.dataset+"/"+fn)
# static visualizations
print >> out, " <td><a href=\"%s\">svg</a></td>" % (arg.prefix+arg.staticdir+"/svg/"+arg.dataset+"/"+fn+".svg")
print >> out, " <td><a href=\"%s\">png</a></td>" % (arg.prefix+arg.staticdir+"/png/"+arg.dataset+"/"+fn+".png")
# data files
for ext in known_filename_extensions:
if ext in grouped[root]:
print >> out, " <td><a href=\"%s\">%s</a></td>" % (arg.prefix+root+ext, ext[1:])
else:
# missing
print >> out, " <td>-</td>"
print >> out, "</tr>"
print >> out, "</table>"
def main(argv=None):
if argv is None:
argv = sys.argv
arg = argparser().parse_args(argv[1:])
# derive dataset name from directory if not separately specified
if arg.dataset is None:
dir = arg.directory
# strip trailing separators
while dir[-1] == os.sep:
dir = dir[:-1]
arg.dataset = os.path.split(dir)[1]
print >> sys.stderr, "Assuming dataset name '%s', visualizations in %s" % (arg.dataset, os.path.join(arg.staticdir,arg.dataset))
try:
files = files_to_process(arg.directory)
if files is None or len(files) == 0:
print >> sys.stderr, "No files found"
return 1
print_header()
print_links(files, arg)
print_footer()
except:
print >> sys.stderr, "Error processing %s" % arg.directory
raise
return 0
def print_header(out=sys.stdout):
print >> out, """<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<link rel="stylesheet" href="bionlp-st-11.css" type="text/css" />
<meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1"/>
<title>BioNLP Shared Task 2011 - Data Visualization</title>
</head>
<body>
<div id="sites-chrome-everything" style="direction: ltr">
<div id="sites-chrome-page-wrapper">
<div id="sites-chrome-page-wrapper-inside">
<div xmlns="http://www.w3.org/1999/xhtml" id="sites-chrome-header-wrapper">
<table id="sites-chrome-header" class="sites-layout-hbox" cellspacing="0">
<tr class="sites-header-primary-row">
<td id="sites-header-title">
<div class="sites-header-cell-buffer-wrapper">
<h2>
<a href="https://sites.google.com/site/bionlpst/" dir="ltr">BioNLP Shared Task</a>
</h2>
</div>
</td>
</tr>
</table>
</div>
<div id="sites-chrome-main-wrapper">
<div id="sites-chrome-main-wrapper-inside">
<table id="sites-chrome-main" class="sites-layout-hbox" cellspacing="0">
<tr>
<td id="sites-canvas-wrapper">
<div id="sites-canvas">
<div xmlns="http://www.w3.org/1999/xhtml" id="title-crumbs" style="">
</div>
<h3 xmlns="http://www.w3.org/1999/xhtml" id="sites-page-title-header" style="" align="left">
<span id="sites-page-title" dir="ltr">BioNLP Shared Task 2011 Downloads</span>
</h3>
<div id="sites-canvas-main" class="sites-canvas-main">
<div id="sites-canvas-main-content">
<!-- ##################################################################### -->
<div id="main">
"""
def print_footer(out=sys.stdout):
print >> out, """ </div>
</div>
</div>
</td>
</tr>
</table>
</div>
</div>
</div>
</div>
</div>
</body>
</html>"""
if __name__ == "__main__":
sys.exit(main())