FullPipelineTest.java
2.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
package examples;
import java.io.File;
import is2.data.InstancesTagger;
import is2.data.SentenceData09;
import is2.io.CONLLReader09;
import is2.io.CONLLWriter09;
import is2.lemmatizer.Lemmatizer;
import is2.lemmatizer.MFO;
import is2.parser.Parser;
import is2.tag.Tagger;
//import org.apache.log4j.Logger;
/**
 * Dependency parsing
 *
 * @author B. Piwowarski <benjamin@bpiwowar.net>
 * @date 10/10/12
 */
// @TaskDescription(name = "dependency-parser", project = "mate-tools")
public class FullPipelineTest {
	// final static private Logger LOGGER =
	// Logger.getLogger(DependencyParser.class);
	// @Argument(name = "lemmatizer", required = true, checkers =
	// IOChecker.Readable.class)
	public File lemmatizerFile;
	// @Argument(name = "tagger", required = true)
	public File taggerFile;
	public File mtaggerFile;
	// @Argument(name = "parser", required = true)
	public File parserFile;
	// @Override
	public int execute(String source, String target) throws Throwable {
		// Load lemmatizer
		// LOGGER.info("Loading lemmatizer");
		// true = do uppercase lemmatization
		Lemmatizer lemmatizer = new Lemmatizer(lemmatizerFile.getAbsolutePath());
		// Load tagger
		// LOGGER.info("Loading tagger");
		Tagger tagger = new Tagger(taggerFile.getAbsolutePath());
		is2.mtag.Tagger mtagger = new is2.mtag.Tagger(mtaggerFile.getAbsolutePath());
		// Load parser
		// LOGGER.info("Loading parser");
		Parser parser = new Parser(parserFile.getAbsolutePath());
		CONLLReader09 reader = new CONLLReader09(source);
		CONLLWriter09 writer = new CONLLWriter09(target);
		int count = 0;
		while (true) {
			// Prepare the sentence
			InstancesTagger is = new InstancesTagger();
			is.init(1, new MFO());
			SentenceData09 instance = reader.getNext(is);
			if (instance == null)
				break;
			SentenceData09 result = null;
			try {
				System.out.print("\b\b\b\b" + count);
				result = lemmatizer.apply(instance);
				result = tagger.apply(result);
				result = mtagger.apply(result);
				result = parser.apply(result);
				count++;
			} catch (Exception e) {
				System.out.println("error" + result);
				System.out.println("error" + instance);
				e.printStackTrace();
				break;
			}
			// Output
			writer.write(result);
		}
		writer.finishWriting();
		return 0;
	}
	public static void main(String args[]) throws Throwable {
		if (args.length < 3) {
			System.out.println("lemmatizer-model tagger-model parser-model source target");
			System.exit(0);
		}
		FullPipelineTest p = new FullPipelineTest();
		p.lemmatizerFile = new File(args[0]);
		p.taggerFile = new File(args[1]);
		p.mtaggerFile = new File(args[2]);
		p.parserFile = new File(args[3]);
		p.execute(args[4], args[5]);
	}
}