Pipeline.java 2.43 KB
package examples;
 
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.StringTokenizer;

import is2.data.SentenceData09;
import is2.lemmatizer.Lemmatizer;
import is2.parser.Options;
import is2.parser.Parser;
import is2.tag.Tagger;

/**
 * @author Bernd Bohnet, 13.09.2010
 * 
 * Illustrates the application of some components: lemmatizer, tagger, and parser
 */
public class Pipeline {

	
	//	how to parse a sentences and call the tools
	public static void main(String[] args) throws IOException {

		
		// Create a data container for a sentence
		SentenceData09 i = new SentenceData09();

		if (args.length==1) { // input might be a sentence: "This is another test ." 
			StringTokenizer st = new StringTokenizer(args[0]);
			ArrayList<String> forms = new ArrayList<String>();
			
			forms.add("<root>");
			while(st.hasMoreTokens()) forms.add(st.nextToken());
			
			i.init(forms.toArray(new String[0]));
			
		} else {
			// provide a default sentence 
			i.init(new String[] {"<root>","This","is","a","test","."});
		}

		//print the forms
		for (String l : i.forms) System.out.println("form : "+l);

		// tell the lemmatizer the location of the model
		is2.lemmatizer.Options optsLemmatizer = new is2.lemmatizer.Options(new String[] {"-model","models/lemma-eng.model"});

		// create a lemmatizer
		Lemmatizer lemmatizer = new Lemmatizer(optsLemmatizer.modelName);

		// lemmatize a sentence; the result is stored in the stenenceData09 i 
		i = lemmatizer.apply(i);

		
		// output the lemmata
		for (String l : i.plemmas) System.out.println("lemma : "+l);

		// tell the tagger the location of the model
		is2.tag.Options optsTagger = new is2.tag.Options(new String[]{"-model","models/tag-eng.model"});
		Tagger tagger = new Tagger(optsTagger);


		
//		String pos[] =tagger.tag(i.forms, i.lemmas);
//		i.setPPos(pos);

		
		SentenceData09 tagged = tagger.tag(i);
		for (String p : tagged.ppos) System.out.println("pos "+p);


		
		// initialize the options 
		Options optsParser = new Options(new String[]{"-model","models/prs-eng-x.model"});

		// create a parser
		Parser parser = new Parser(optsParser);
		
		// parse the sentence (you get a copy of the input i)
		SentenceData09 parse = parser.apply(tagged);

		System.out.println(parse.toString());

		// create some trash on the hard drive :-)
		is2.io.CONLLWriter09 writer = new is2.io.CONLLWriter09("example-out.txt");
		
		writer.write(i);
		writer.finishWriting();
	}

	

	
}