DependencyParser.java
2.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
package examples;
import java.io.File;
import java.util.Arrays;
import is2.data.InstancesTagger;
import is2.data.SentenceData09;
import is2.io.CONLLReader09;
import is2.io.IOGenerals;
import is2.lemmatizer.Lemmatizer;
import is2.lemmatizer.MFO;
import is2.parser.Parser;
import is2.tag.Tagger;
//import org.apache.log4j.Logger;
/**
 * Dependency parsing
 *
 * @author B. Piwowarski <benjamin@bpiwowar.net>
 * @date 10/10/12
 */
// @TaskDescription(name = "dependency-parser", project = "mate-tools")
public class DependencyParser {
	// final static private Logger LOGGER =
	// Logger.getLogger(DependencyParser.class);
	// @Argument(name = "lemmatizer", required = true, checkers =
	// IOChecker.Readable.class)
	File lemmatizerFile;
	// @Argument(name = "tagger", required = true)
	File taggerFile;
	// @Argument(name = "parser", required = true)
	File parserFile;
	// @Override
	public int execute() throws Throwable {
		// Load lemmatizer
		// LOGGER.info("Loading lemmatizer");
		// true = do uppercase lemmatization
		Lemmatizer lemmatizer = new Lemmatizer(lemmatizerFile.getAbsolutePath());
		// Load tagger
		// LOGGER.info("Loading tagger");
		Tagger tagger = new Tagger(taggerFile.getAbsolutePath());
		// Load parser
		// LOGGER.info("Loading parser");
		Parser parser = new Parser(parserFile.getAbsolutePath());
		// Sentences to parse
		String sentences[] = new String[] { "Airfields have been constructed on a number of the islands .",
				"Private investment has even made an increasingly modern ferry fleet possible .",
				"Politically , the 1990s have been relatively quite times for the islands ." };
		CONLLReader09 reader = new CONLLReader09(CONLLReader09.NO_NORMALIZE);
		for (String sentence : sentences) {
			// Prepare the sentence
			InstancesTagger instanceTagger = new InstancesTagger();
			instanceTagger.init(1, new MFO());
			String[] split = sentence.split("\\s+");
			String[] splitRoot = new String[split.length + 1];
			System.arraycopy(split, 0, splitRoot, 1, split.length);
			splitRoot[0] = IOGenerals.ROOT;
			SentenceData09 instance = new SentenceData09();
			instance.init(splitRoot);
			reader.insert(instanceTagger, instance);
			SentenceData09 result = lemmatizer.apply(instance);
			tagger.apply(result);
			result = parser.parse(result, parser.params, false, parser.options);
			// Output
			System.out.println(Arrays.toString(result.forms));
			System.out.println(Arrays.toString(result.plemmas));
			System.out.println(Arrays.toString(result.ppos));
			System.out.println(Arrays.toString(result.pheads));
			System.out.println(Arrays.toString(result.plabels));
			System.out.println();
		}
		return 0;
	}
}