main.py
3.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import os
import sys
from argparse import ArgumentParser
from natsort import natsorted
sys.path.append(os.path.abspath(os.path.join('..')))
import conf
from inout import mmax
from inout.constants import INPUT_FORMATS
from resolvers import resolve
from resolvers.constants import RESOLVERS
from utils import eprint
def main():
args = parse_arguments()
if not args.input:
eprint("Error: Input file(s) not specified!")
elif args.resolver not in RESOLVERS:
eprint("Error: Unknown resolve algorithm!")
elif args.format not in INPUT_FORMATS:
eprint("Error: Unknown input file format!")
else:
resolver = args.resolver
if conf.NEURAL_MODEL_ARCHITECTURE == 'siamese':
resolver = conf.NEURAL_MODEL_ARCHITECTURE
eprint ("Warning: Using %s resolver because of selected neural model architecture!" % conf.NEURAL_MODEL_ARCHITECTURE)
process_texts(args.input, args.output, args.format, resolver, args.threshold)
def parse_arguments():
parser = ArgumentParser(description='Corneferencer: coreference resolver using neural nets.')
parser.add_argument('-i', '--input', type=str, action='store',
dest='input', default='',
help='input file or dir path')
parser.add_argument('-o', '--output', type=str, action='store',
dest='output', default='',
help='output path; if not specified writes output to standard output')
parser.add_argument('-f', '--format', type=str, action='store',
dest='format', default='mmax',
help='input format; default: mmax')
parser.add_argument('-r', '--resolver', type=str, action='store',
dest='resolver', default='incremental',
help='resolve algorithm; default: incremental; possibilities: %s'
% ', '.join(RESOLVERS))
parser.add_argument('-t', '--threshold', type=float, action='store',
dest='threshold', default=0.001,
help='threshold; default: 0.001')
args = parser.parse_args()
return args
def process_texts(inpath, outpath, informat, resolver, threshold):
if os.path.isdir(inpath):
process_directory(inpath, outpath, informat, resolver, threshold)
elif os.path.isfile(inpath):
process_file(inpath, outpath, informat, resolver, threshold)
else:
eprint("Error: Specified input does not exist!")
def process_directory(inpath, outpath, informat, resolver, threshold):
inpath = os.path.abspath(inpath)
outpath = os.path.abspath(outpath)
files = os.listdir(inpath)
files = natsorted(files)
for filename in files:
textname = os.path.splitext(os.path.basename(filename))[0]
textoutput = os.path.join(outpath, textname)
textinput = os.path.join(inpath, filename)
process_file(textinput, textoutput, informat, resolver, threshold)
def process_file(inpath, outpath, informat, resolver, threshold):
basename = os.path.basename(inpath)
if informat == 'mmax' and basename.endswith('.mmax'):
print (basename)
text = mmax.read(inpath)
if resolver == 'incremental':
resolve.incremental(text, threshold)
elif resolver == 'entity_based':
resolve.entity_based(text, threshold)
elif resolver == 'closest':
resolve.closest(text, threshold)
elif resolver == 'siamese':
resolve.siamese(text, threshold)
mmax.write(inpath, outpath, text)
if __name__ == '__main__':
main()