ReadWriteMentionsTest.java 4.44 KB
package ipipan.clarin.tei.impl.io;

import static org.junit.Assert.assertEquals;
import ipipan.clarin.tei.api.entities.AnnotationLayer;
import ipipan.clarin.tei.api.entities.TEICorpusText;
import ipipan.clarin.tei.api.entities.TEIMention;
import ipipan.clarin.tei.api.entities.TEIMorph;
import ipipan.clarin.tei.api.entities.TEISentence;
import ipipan.clarin.tei.api.exceptions.TEIException;
import ipipan.clarin.tei.api.io.TEI_IO;
import ipipan.clarin.tei.api.io.TEI_IO.CompressionMethod;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;

import org.apache.log4j.BasicConfigurator;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

/**
 * 
 * @author mkopec
 */
public class ReadWriteMentionsTest {

	private final static Logger logger = Logger
			.getLogger(ReadWriteMentionsTest.class);

	private final static File TEST_DIR = new File(
			"src/test/resources/example_text_with_mentions");
	@Rule
	public TemporaryFolder TEST_TMP_DIR = new TemporaryFolder();

	private final TEI_IO teiIO = TEI_IO.getInstance();

	public ReadWriteMentionsTest() {
		logger.info(System.getProperty("user.dir"));
	}

	@BeforeClass
	public static void setUpClass() throws Exception {
		BasicConfigurator.configure();
		Logger.getRootLogger().setLevel(Level.ALL);
	}

	@AfterClass
	public static void tearDownClass() throws Exception {
	}

	@Before
	public void setUp() throws IOException {
		logger.info(System.getProperty("user.dir"));
	}

	@After
	public void tearDown() {
	}

	@Test
	public void testRead() throws TEIException {
		logger.info("testWypluwkaInput");
		TEICorpusText tei = teiIO.readFromNKJPDirectory(TEST_DIR);
		logger.info("done read");

		doTest(tei);
	}

	@Test
	public void testWriteDir() throws TEIException {
		logger.info("testWypluwkaInput");
		TEICorpusText tei = teiIO.readFromNKJPDirectory(TEST_DIR);
		logger.info("done read");

		teiIO.writeToNKJPDirectory(tei, this.TEST_TMP_DIR.getRoot(),
				CompressionMethod.NONE);
		tei = teiIO.readFromNKJPDirectory(this.TEST_TMP_DIR.getRoot());
		doTest(tei);
	}

	@Test
	public void testWritePackage() throws IOException {
		logger.info("testWypluwkaInput");
		TEICorpusText tei = teiIO.readFromNKJPDirectory(TEST_DIR);
		logger.info("done read");

		File newFile = TEST_TMP_DIR.newFile();
		teiIO.writeToPackage(newFile, tei);
		tei = teiIO.readFromPackage(newFile);
		doTest(tei);
	}

	private void doTest(TEICorpusText tei) {
		assertEquals(EnumSet.copyOf(Arrays.asList(AnnotationLayer.TEXT,
				AnnotationLayer.SEGMENTATION, AnnotationLayer.MORPHOSYNTAX,
				AnnotationLayer.NAMES, AnnotationLayer.WORDS,
				AnnotationLayer.GROUPS, AnnotationLayer.MENTIONS)),
				tei.getAnnotationLayers());

		doTestMentionsLayer(tei);
	}

	private void doTestMentionsLayer(TEICorpusText tei) {

		TEISentence firstSentence = tei.getAllSentences().get(0);
		TEISentence lastSentence = tei.getAllSentences().get(13);

		// number of mentions in sentences
		assertEquals(11, firstSentence.getAllMentions().size());
		assertEquals(11, lastSentence.getAllMentions().size());

		// zero subject mentions
		assertEquals(firstSentence.getAllMentions().get(0).isZeroSubject(),
				true);
		assertEquals(firstSentence.getAllMentions().get(1).isZeroSubject(),
				false);

		// mentions
		doTestMention(firstSentence, 0, "mention_0",
				Arrays.asList("morph_1.1.2-seg"),
				Arrays.asList("morph_1.1.2-seg"));
		doTestMention(firstSentence, 7, "mention_7", Arrays.asList(
				"morph_1.1.22-seg", "morph_1.1.23-seg", "morph_1.1.24-seg",
				"morph_1.1.25-seg"), Arrays.asList("morph_1.1.22-seg"));

		doTestMention(lastSentence, 8, "mention_90", Arrays.asList(
				"morph_4.14.24-seg", "morph_4.14.25-seg", "morph_4.14.26-seg"),
				Arrays.asList("morph_4.14.24-seg"));
	}

	private void doTestMention(TEISentence sentence, int mentionIdx,
			String mentionId, List<String> morphsIds, List<String> headsIds) {
		TEIMention teiMention = sentence.getAllMentions().get(mentionIdx);
		assertEquals(mentionId, teiMention.getId());

		List<String> ids = new ArrayList<String>();
		for (TEIMorph m : teiMention.getMorphs())
			ids.add(m.getId());
		assertEquals(morphsIds, ids);

		ids.clear();
		for (TEIMorph m : teiMention.getHeadMorphs())
			ids.add(m.getId());
		assertEquals(headsIds, ids);
	}

}