MentionsReader.java 2.67 KB
package ipipan.clarin.tei.impl.io.read;

import ipipan.clarin.tei.api.entities.AnnotationLayer;
import ipipan.clarin.tei.api.entities.TEIMention;
import ipipan.clarin.tei.api.entities.TEIMorph;
import ipipan.clarin.tei.api.entities.TEIParagraph;
import ipipan.clarin.tei.api.entities.TEISentence;
import ipipan.clarin.tei.api.exceptions.TEIException;
import ipipan.clarin.tei.impl.entities.TEIMentionImpl;
import ipipan.clarin.tei.impl.io.IdValuePair;

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import javax.xml.stream.XMLStreamException;

public class MentionsReader extends BodyReader {

	protected MentionsReader(InWrapper in) {
		super(in);
	}

	@Override
	protected void readNextParagraph(TEIParagraph par) throws TEIException {
		try {
			while (!in.isStartParagraph()) {
				in.next();
			}
			String parId = in.getXmlId();
			for (TEISentence sent : par.getSentences()) {
				in.nextTag();
				in.requireStart("s");
				readNextSent(sent);
			}
			in.nextTag();
			in.requireEnd(); // p
			par.setId(AnnotationLayer.MENTIONS, parId);
		} catch (Exception ex) {
			throw new TEIException("Error in mentions: " + ex.getMessage(), ex);
		}
	}

	private TEISentence readNextSent(TEISentence sent)
			throws XMLStreamException, TEIException {
		Map<String, TEIMorph> ptr2Morph = new LinkedHashMap<String, TEIMorph>();
		for (TEIMorph morph : sent.getMorphs()) {
			ptr2Morph.put(morph.getId(), morph);
		}

		List<TEIMention> mentions = new LinkedList<TEIMention>();
		String sentId = in.getXmlId();
		in.nextTag();
		while (!in.isEnd()) {
			in.requireStart("seg");
			mentions.add(readMention(sent, ptr2Morph));
			in.nextTag();
		}
		in.requireEnd(); // s
		sent.setMentions(mentions);
		sent.setId(AnnotationLayer.MENTIONS, sentId);
		return sent;
	}

	private TEIMention readMention(TEISentence sent,
			Map<String, TEIMorph> ptr2Morph) throws XMLStreamException {
		String id;
		List<TEIMorph> heads = new ArrayList<TEIMorph>();
		List<TEIMorph> morphs = new ArrayList<TEIMorph>();

		in.requireStart("seg");
		id = in.getXmlId();

		in.nextTag();
		in.requireStartFS("mention");

		in.nextTag();
		while (in.isStartF("semh")) {
			heads.add(ptr2Morph.get(in.readFValue()));
			in.nextTag();
		}

		boolean isZeroSubject = false;
		if (in.isStartF("zero")) {
			if (in.readFValue().equalsIgnoreCase("true"))
				isZeroSubject = true;
			in.nextTag();
		}

		in.requireEnd(); // fs mention

		in.nextTag();
		for (IdValuePair ptr : PtrHelper.readPtrsWithTypes(in)) {
			String target = ptr.getId();
			morphs.add(ptr2Morph.get(target));
		}
		in.requireEnd(); // seg

		return new TEIMentionImpl(id, morphs, heads, isZeroSubject);
	}
}