NamedReader.java 4.09 KB
package ipipan.clarin.tei.impl.io.read;

import ipipan.clarin.tei.api.entities.AnnotationLayer;
import ipipan.clarin.tei.api.entities.TEIMorph;
import ipipan.clarin.tei.api.entities.TEINamedEntity;
import ipipan.clarin.tei.api.entities.TEINamedEntityChild;
import ipipan.clarin.tei.api.entities.TEINamedEntityDerivation;
import ipipan.clarin.tei.api.entities.TEIParagraph;
import ipipan.clarin.tei.api.entities.TEISentence;
import ipipan.clarin.tei.api.exceptions.TEIException;

import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.xml.stream.XMLStreamException;

/**
 *
 * @author mlenart
 */
class NamedReader extends BodyReader {

	NamedReader(InWrapper in) {
		super(in);
	}

	@Override
	protected void readNextParagraph(TEIParagraph par) throws TEIException {
		try {
			while (!in.isStartParagraph()) {
				in.next();
			}
			String parId = in.getXmlId();
			for (TEISentence sent : par.getSentences()) {
				in.nextTag();
				in.requireStart("s");
				readNextSent(sent);
			}
			in.nextTag();
			in.requireEnd(); // p
			par.setId(AnnotationLayer.NAMES, parId);
		} catch (Exception ex) {
			throw new TEIException("Error in named entities: "
					+ ex.getMessage(), ex);
		}
	}

	private TEISentence readNextSent(TEISentence sent)
			throws XMLStreamException, TEIException {
		List<NEBuilder> builders = new LinkedList<NEBuilder>();
		String sentId = in.getXmlId();
		in.nextTag();
		while (!in.isEnd()) {
			in.requireStart("seg");
			builders.add(readNEBuilder());
			in.nextTag();
		}
		in.requireEnd(); // s
		sent.setNERResult(getNEsFromBuilders(builders, sent));
		sent.setId(AnnotationLayer.NAMES, sentId);
		return sent;
	}

	private List<TEINamedEntity> getNEsFromBuilders(List<NEBuilder> builders,
			TEISentence sent) throws TEIException {
		LinkedList<TEINamedEntity> res = new LinkedList<TEINamedEntity>();
		Map<String, TEINamedEntityChild> ptr2Child = new LinkedHashMap<String, TEINamedEntityChild>();
		for (TEIMorph morph : sent.getMorphs()) {
			ptr2Child.put(morph.getId(), morph);
		}
		Collections.reverse(builders);
		for (NEBuilder b : builders) {
			TEINamedEntity ne = b.getNe(ptr2Child);
			res.addFirst(ne);
			ptr2Child.put(ne.getId(), ne);
		}

		filterNEs(res);
		return res;
	}

	private void filterNEs(LinkedList<TEINamedEntity> nes) {
		Set<TEINamedEntityChild> nonRootNEs = new LinkedHashSet<TEINamedEntityChild>();
		for (TEINamedEntity ne : nes) {
			nonRootNEs.addAll(ne.getChildren());
		}
		nes.removeAll(nonRootNEs);
	}

	private NEBuilder readNEBuilder() throws XMLStreamException {
		NEBuilder b = new NEBuilder(in.getXmlId());
		in.nextTag();
		in.requireStartFS("named");

		in.nextTag();
		if (in.isStartF("derived")) {
			b.setDeriv(readDeriv());
			in.nextTag();
		}
		in.requireStartF("type");
		b.setType(in.readSymbolF().getValue());

		in.nextTag();
		if (in.isStartF("subtype")) {
			b.setSubtype(in.readSymbolF().getValue());
			in.nextTag();
		}

		in.requireStartF("orth");
		b.setOrth(in.readStringF().getValue());

		in.nextTag();

		if (in.isStartF("base")) {
			b.setBase(in.readStringF().getValue());
			in.nextTag();
		}
		if (in.isStartF("when")) {
			b.setBase(in.readStringF().getValue());
			in.nextTag();
		}
		if (in.isStartF("certainty")) {
			b.setCertainty(in.readSymbolF().getValue());
			in.nextTag();
			if (in.isStartF("comment")) {
				b.setComment(in.readStringF().getValue());
				in.nextTag();
			}
		}

		in.requireEnd(); // fs named

		in.nextTag();
		b.setPtrs(PtrHelper.readPtrs(in));
		in.requireEnd(); // seg

		return b;
	}

	private TEINamedEntityDerivation readDeriv() throws XMLStreamException {
		in.requireStartF("derived");
		in.nextTag();
		in.requireStartFS("derivation");
		in.nextTag();
		in.requireStartF("derivType");
		String type = in.readSymbolF().getValue();
		in.nextTag();
		String from = null;
		if (in.isStartF("derivedFrom")) {
			from = in.readStringF().getValue();
			in.nextTag();
		}
		in.requireEnd(); // fs
		in.nextTag();
		in.requireEnd(); // f derived
		return ef.createNamedEntityDerivation(type, from);
	}
}