create_baseline.py
2.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/python
import re
import optparse
import fileinput
import sys
###########################################################################
#
# Command-line options and usage
#
###########################################################################
usage = """%prog [OPTIONS] FILE ...
Convert from one dependency style to another.
Use -f FROM and -t TO to specify the input and output formats.
"""
parser = optparse.OptionParser(usage=usage)
parser.add_option("-r", "--rightward", action="store_true",
default=False,
help="""Create right-linking baseline.""")
parser.add_option("-d", "--default-relation", action="store",
default="Elaboration",
help="Pick default relation.",
metavar="RELATION")
def transform_meta_chars(string):
return string.replace(",","+comma+")
def untransform_meta_chars(string):
return string.replace("+comma+",",")
## Output dependencies for one sentence
def output_one_sentence(deps):
accum = [[], [], [], [], []]
for dep in deps:
for num in xrange(len(dep)):
accum[num].append(dep[num])
accum = ["\t".join([str(x) for x in y]) for y in accum]
print "\n".join(accum[1:])
print
## Get options
(options, args) = parser.parse_args()
## Process file(s)
lines = fileinput.input(args)
deps = []
## Read input
sentence_info = []
for line in lines:
line = line.strip()
if not line:
num_words = len(sentence_info[0])
baseline_deps = range(num_words)
if options.rightward:
baseline_deps.pop(0)
baseline_deps.pop(0)
baseline_deps += [num_words, 0]
sentence_info[2] = [options.default_relation]*num_words
sentence_info[3] = baseline_deps
try:
for i in xrange(len(sentence_info[0])):
deps.append([i+1]+[row[i] for row in sentence_info])
except:
#print sentence_info
print "\n".join([len(x) for x in sentence_info])
sys.exit(0)
#print deps
output_one_sentence(deps)
deps = []
sentence_info = []
else:
sentence_info.append(line.split())