NKJPWypluwkaReader.java 1.88 KB
package ipipan.clarin.tei.api.io;

import ipipan.clarin.tei.api.entities.AnnotationLayer;
import ipipan.clarin.tei.api.entities.TEICorpusText;
import ipipan.clarin.tei.api.entities.TEIHeader;
import ipipan.clarin.tei.api.entities.TEIParagraph;
import ipipan.clarin.tei.api.exceptions.TEIException;
import ipipan.clarin.tei.impl.io.read.NKJPWypluwkaReaderImpl;

import java.io.File;
import java.util.EnumSet;

/**
 * Reads paragraphs from NKJP wypluwka dir.
 *
 * @author mlenart
 */
public abstract class NKJPWypluwkaReader {

	/**
	 * Reads from directory. Resulting paragraphs will contain anything that can
	 * be found there.
	 *
	 * @param directory
	 *            - directory to read from
	 * @return NKJPWypluwkaReader instance
	 * @throws TEIException
	 *             if the read fails
	 */
	public static NKJPWypluwkaReader getInstance(File directory)
			throws TEIException {
		return new NKJPWypluwkaReaderImpl(directory);
	}

	/**
	 * Reads from directory. Resulting paragraphs will contain only specified
	 * annotation layers.
	 *
	 * @param directory
	 *            - directory to read from
	 * @param layers
	 *            - layers to read
	 * @return NKJPWypluwkaReader instance
	 * @throws TEIException
	 *             if the read fails
	 */
	public static NKJPWypluwkaReader getInstance(File directory,
			EnumSet<AnnotationLayer> layers) throws TEIException {
		return new NKJPWypluwkaReaderImpl(directory, layers);
	}

	public abstract EnumSet<AnnotationLayer> getAvailableLayers()
			throws TEIException;

	public abstract boolean hasNextParagraph() throws TEIException;

	public abstract TEIParagraph readNextParagraph() throws TEIException;

	public abstract void close() throws TEIException;

	public abstract TEIHeader readCorpusHeader() throws TEIException;

	public abstract TEIHeader readTextHeader() throws TEIException;

	public abstract void readBody(TEICorpusText tei) throws TEIException;
}