FullPipelineTest.java
3.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
package examples;
import is2.data.InstancesTagger;
import is2.data.SentenceData09;
import is2.io.CONLLReader09;
import is2.io.CONLLWriter09;
import is2.lemmatizer.Lemmatizer;
import is2.lemmatizer.MFO;
import is2.parser.Parser;
import is2.tag.Tagger;
//import org.apache.log4j.Logger;
import java.io.File;
import java.util.Arrays;
/**
* Dependency parsing
*
* @author B. Piwowarski <benjamin@bpiwowar.net>
* @date 10/10/12
*/
//@TaskDescription(name = "dependency-parser", project = "mate-tools")
public class FullPipelineTest {
// final static private Logger LOGGER = Logger.getLogger(DependencyParser.class);
//@Argument(name = "lemmatizer", required = true, checkers = IOChecker.Readable.class)
public File lemmatizerFile;
//@Argument(name = "tagger", required = true)
public File taggerFile;
public File mtaggerFile;
//@Argument(name = "parser", required = true)
public File parserFile;
//@Override
public int execute(String source, String target) throws Throwable {
// Load lemmatizer
//LOGGER.info("Loading lemmatizer");
// true = do uppercase lemmatization
Lemmatizer lemmatizer = new Lemmatizer(lemmatizerFile.getAbsolutePath());
// Load tagger
//LOGGER.info("Loading tagger");
Tagger tagger = new Tagger(taggerFile.getAbsolutePath());
is2.mtag.Tagger mtagger = new is2.mtag.Tagger(mtaggerFile.getAbsolutePath());
// Load parser
//LOGGER.info("Loading parser");
Parser parser = new Parser(parserFile.getAbsolutePath());
CONLLReader09 reader = new CONLLReader09(source);
CONLLWriter09 writer = new CONLLWriter09(target);
int count=0;
while (true) {
// Prepare the sentence
InstancesTagger is = new InstancesTagger();
is.init(1, new MFO());
SentenceData09 instance= reader.getNext(is);
if (instance ==null) break;
SentenceData09 result = null;
try {
System.out.print("\b\b\b\b"+count);
result= lemmatizer.apply(instance);
result = tagger.apply(result);
result= mtagger.apply(result);
result = parser.apply(result);
count++;
} catch(Exception e) {
System.out.println("error"+result);
System.out.println("error"+instance);
e.printStackTrace();
break;
}
// Output
writer.write(result);
}
writer.finishWriting();
return 0;
}
public static void main(String args[]) throws Throwable {
if (args.length<3) {
System.out.println("lemmatizer-model tagger-model parser-model source target");
System.exit(0);
}
FullPipelineTest p = new FullPipelineTest();
p.lemmatizerFile = new File(args[0]);
p.taggerFile = new File(args[1]);
p.mtaggerFile = new File(args[2]);
p.parserFile = new File(args[3]);
p.execute(args[4], args[5]);
}
}