Blame view

dependencyParser/experimental/mate-tools/src/examples/Pipeline.java 2.44 KB
Jan Lupa authored
1
package examples;
Jan Lupa authored
2
Jan Lupa authored
3
4
5
6
7
8
9
10
11
12
13
14
import java.io.IOException;
import java.util.ArrayList;
import java.util.StringTokenizer;

import is2.data.SentenceData09;
import is2.lemmatizer.Lemmatizer;
import is2.parser.Options;
import is2.parser.Parser;
import is2.tag.Tagger;

/**
 * @author Bernd Bohnet, 13.09.2010
Jan Lupa authored
15
16
17
 *
 *         Illustrates the application of some components: lemmatizer, tagger,
 *         and parser
Jan Lupa authored
18
19
20
 */
public class Pipeline {
Jan Lupa authored
21
	// how to parse a sentences and call the tools
Jan Lupa authored
22
23
24
25
26
	public static void main(String[] args) throws IOException {

		// Create a data container for a sentence
		SentenceData09 i = new SentenceData09();
Jan Lupa authored
27
28
		if (args.length == 1) { // input might be a sentence: "This is another
								// test ."
Jan Lupa authored
29
30
			StringTokenizer st = new StringTokenizer(args[0]);
			ArrayList<String> forms = new ArrayList<String>();
Jan Lupa authored
31
Jan Lupa authored
32
			forms.add("<root>");
Jan Lupa authored
33
34
35
			while (st.hasMoreTokens())
				forms.add(st.nextToken());
Jan Lupa authored
36
			i.init(forms.toArray(new String[0]));
Jan Lupa authored
37
Jan Lupa authored
38
		} else {
Jan Lupa authored
39
40
			// provide a default sentence
			i.init(new String[] { "<root>", "This", "is", "a", "test", "." });
Jan Lupa authored
41
42
		}
Jan Lupa authored
43
44
45
		// print the forms
		for (String l : i.forms)
			System.out.println("form : " + l);
Jan Lupa authored
46
47

		// tell the lemmatizer the location of the model
Jan Lupa authored
48
49
		is2.lemmatizer.Options optsLemmatizer = new is2.lemmatizer.Options(
				new String[] { "-model", "models/lemma-eng.model" });
Jan Lupa authored
50
51
52
53

		// create a lemmatizer
		Lemmatizer lemmatizer = new Lemmatizer(optsLemmatizer.modelName);
Jan Lupa authored
54
		// lemmatize a sentence; the result is stored in the stenenceData09 i
Jan Lupa authored
55
56
57
		i = lemmatizer.apply(i);

		// output the lemmata
Jan Lupa authored
58
59
		for (String l : i.plemmas)
			System.out.println("lemma : " + l);
Jan Lupa authored
60
61

		// tell the tagger the location of the model
Jan Lupa authored
62
		is2.tag.Options optsTagger = new is2.tag.Options(new String[] { "-model", "models/tag-eng.model" });
Jan Lupa authored
63
64
		Tagger tagger = new Tagger(optsTagger);
Jan Lupa authored
65
66
		// String pos[] =tagger.tag(i.forms, i.lemmas);
		// i.setPPos(pos);
Jan Lupa authored
67
68

		SentenceData09 tagged = tagger.tag(i);
Jan Lupa authored
69
70
		for (String p : tagged.ppos)
			System.out.println("pos " + p);
Jan Lupa authored
71
Jan Lupa authored
72
73
		// initialize the options
		Options optsParser = new Options(new String[] { "-model", "models/prs-eng-x.model" });
Jan Lupa authored
74
75
76

		// create a parser
		Parser parser = new Parser(optsParser);
Jan Lupa authored
77
Jan Lupa authored
78
79
80
81
82
83
84
		// parse the sentence (you get a copy of the input i)
		SentenceData09 parse = parser.apply(tagged);

		System.out.println(parse.toString());

		// create some trash on the hard drive :-)
		is2.io.CONLLWriter09 writer = new is2.io.CONLLWriter09("example-out.txt");
Jan Lupa authored
85
Jan Lupa authored
86
87
88
89
90
		writer.write(i);
		writer.finishWriting();
	}

}