|
1
|
package examples;
|
|
2
|
|
|
3
4
5
6
7
8
9
10
11
12
13
14
|
import java.io.IOException;
import java.util.ArrayList;
import java.util.StringTokenizer;
import is2.data.SentenceData09;
import is2.lemmatizer.Lemmatizer;
import is2.parser.Options;
import is2.parser.Parser;
import is2.tag.Tagger;
/**
* @author Bernd Bohnet, 13.09.2010
|
|
15
16
17
|
*
* Illustrates the application of some components: lemmatizer, tagger,
* and parser
|
|
18
19
20
|
*/
public class Pipeline {
|
|
21
|
// how to parse a sentences and call the tools
|
|
22
23
24
25
26
|
public static void main(String[] args) throws IOException {
// Create a data container for a sentence
SentenceData09 i = new SentenceData09();
|
|
27
28
|
if (args.length == 1) { // input might be a sentence: "This is another
// test ."
|
|
29
30
|
StringTokenizer st = new StringTokenizer(args[0]);
ArrayList<String> forms = new ArrayList<String>();
|
|
31
|
|
|
32
|
forms.add("<root>");
|
|
33
34
35
|
while (st.hasMoreTokens())
forms.add(st.nextToken());
|
|
36
|
i.init(forms.toArray(new String[0]));
|
|
37
|
|
|
38
|
} else {
|
|
39
40
|
// provide a default sentence
i.init(new String[] { "<root>", "This", "is", "a", "test", "." });
|
|
41
42
|
}
|
|
43
44
45
|
// print the forms
for (String l : i.forms)
System.out.println("form : " + l);
|
|
46
47
|
// tell the lemmatizer the location of the model
|
|
48
49
|
is2.lemmatizer.Options optsLemmatizer = new is2.lemmatizer.Options(
new String[] { "-model", "models/lemma-eng.model" });
|
|
50
51
52
53
|
// create a lemmatizer
Lemmatizer lemmatizer = new Lemmatizer(optsLemmatizer.modelName);
|
|
54
|
// lemmatize a sentence; the result is stored in the stenenceData09 i
|
|
55
56
57
|
i = lemmatizer.apply(i);
// output the lemmata
|
|
58
59
|
for (String l : i.plemmas)
System.out.println("lemma : " + l);
|
|
60
61
|
// tell the tagger the location of the model
|
|
62
|
is2.tag.Options optsTagger = new is2.tag.Options(new String[] { "-model", "models/tag-eng.model" });
|
|
63
64
|
Tagger tagger = new Tagger(optsTagger);
|
|
65
66
|
// String pos[] =tagger.tag(i.forms, i.lemmas);
// i.setPPos(pos);
|
|
67
68
|
SentenceData09 tagged = tagger.tag(i);
|
|
69
70
|
for (String p : tagged.ppos)
System.out.println("pos " + p);
|
|
71
|
|
|
72
73
|
// initialize the options
Options optsParser = new Options(new String[] { "-model", "models/prs-eng-x.model" });
|
|
74
75
76
|
// create a parser
Parser parser = new Parser(optsParser);
|
|
77
|
|
|
78
79
80
81
82
83
84
|
// parse the sentence (you get a copy of the input i)
SentenceData09 parse = parser.apply(tagged);
System.out.println(parse.toString());
// create some trash on the hard drive :-)
is2.io.CONLLWriter09 writer = new is2.io.CONLLWriter09("example-out.txt");
|
|
85
|
|
|
86
87
88
89
90
|
writer.write(i);
writer.finishWriting();
}
}
|