TEISentenceImpl.java 7.24 KB
package ipipan.clarin.tei.impl.entities;

import ipipan.clarin.tei.api.entities.AnnotationLayer;
import ipipan.clarin.tei.api.entities.TEIDeepParseTree;
import ipipan.clarin.tei.api.entities.TEIGroup;
import ipipan.clarin.tei.api.entities.TEIMention;
import ipipan.clarin.tei.api.entities.TEIMorph;
import ipipan.clarin.tei.api.entities.TEINamedEntity;
import ipipan.clarin.tei.api.entities.TEINamedEntityChild;
import ipipan.clarin.tei.api.entities.TEISegment;
import ipipan.clarin.tei.api.entities.TEISentence;
import ipipan.clarin.tei.api.entities.TEISyntacticEntity;
import ipipan.clarin.tei.api.entities.TEIWord;
import ipipan.clarin.tei.api.entities.TEIWordChild;
import ipipan.clarin.tei.impl.entities.comparators.NEChildrenComparator;
import ipipan.clarin.tei.impl.entities.comparators.NamesComparator;
import ipipan.clarin.tei.impl.entities.comparators.SEComparator;
import ipipan.clarin.tei.impl.entities.comparators.WordChildrenComparator;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

/**
 * 
 * @author mlenart
 */
class TEISentenceImpl extends TEIAbstractEntity implements TEISentence {

	private final Map<AnnotationLayer, String> layer2IdMap = new EnumMap<AnnotationLayer, String>(
			AnnotationLayer.class);
	private final List<TEISegment> chosenSegments;
	private final List<TEISegment> allSegments;
	private LinkedHashMap<String, TEIMorph> id2Morph;
	private List<TEINamedEntity> nes = new ArrayList<TEINamedEntity>();
	private List<TEIWord> words = new ArrayList<TEIWord>();
	private List<TEIGroup> groups = new ArrayList<TEIGroup>();
	private List<TEIDeepParseTree> deepParsing = new ArrayList<TEIDeepParseTree>();
	private List<TEIMention> mentions = new ArrayList<TEIMention>();

	TEISentenceImpl(String id, List<TEISegment> segments) {
		super(id);
		this.layer2IdMap.put(AnnotationLayer.SEGMENTATION, id);
		this.allSegments = segments;
		this.chosenSegments = new ArrayList<TEISegment>(segments);
		Iterator<TEISegment> it = chosenSegments.iterator();
		while (it.hasNext()) {
			TEISegment segm = it.next();
			if (segm.isRejected()) {
				it.remove();
			}
		}
	}

	@Override
	public String getId(AnnotationLayer layer) {
		if (layer2IdMap.containsKey(layer)) {
			return layer2IdMap.get(layer);
		} else {
			return layer.toString().toLowerCase() + "_"
					+ getId().replaceFirst("[a-z]+_", "");
		}
	}

	@Override
	public void setId(AnnotationLayer layer, String id) {
		layer2IdMap.put(layer, id);
	}

	@Override
	public String getOrth() {
		StringBuilder sb = new StringBuilder();
		boolean start = true;
		for (TEISegment seg : getChosenSegments()) {
			if (!start && !seg.hasNps()) {
				sb.append(" ");
			}
			sb.append(seg.getOrth());
			start = false;
		}
		return sb.toString();
	}

	@Override
	public List<TEISegment> getChosenSegments() {
		return chosenSegments;
	}

	@Override
	public List<TEISegment> getAllSegments() {
		return allSegments;
	}

	@Override
	public List<TEIMorph> getMorphs() {
		return new ArrayList<TEIMorph>(id2Morph.values());
	}

	@Override
	public TEIMorph getMorphById(String morphId) {
		return id2Morph.get(morphId);
	}

	@Override
	public List<TEINamedEntity> getTopLevelNamedEntities() {
		return nes;
	}

	@Override
	public List<TEINamedEntity> getAllNamedEntities() {
		List<TEINamedEntity> res = new LinkedList<TEINamedEntity>();
		for (TEINamedEntity ne : nes) {
			res.addAll(getWithDescendants(ne));
		}
		return res;
	}

	private List<TEINamedEntity> getWithDescendants(TEINamedEntity ne) {
		List<TEINamedEntity> res = new LinkedList<TEINamedEntity>();
		res.add(ne);
		for (TEINamedEntityChild neChild : ne.getChildren()) {
			if (neChild.isNamedEntity()) {
				res.addAll(getWithDescendants((TEINamedEntity) neChild));
			}
		}
		return res;
	}

	@Override
	public List<TEIWord> getTopLevelWords() {
		return words;
	}

	@Override
	public List<TEIWord> getAllWords() {
		List<TEIWord> res = new LinkedList<TEIWord>();

		for (TEIWord word : words) {
			res.add(word);
			res.addAll(getDescendantWords(word));
		}

		return res;
	}

	private List<TEIWord> getDescendantWords(TEIWord word) {
		List<TEIWord> res = new LinkedList<TEIWord>();
		for (TEIWordChild child : word.getChildren()) {
			if (child.isWord()) {
				TEIWord childWord = (TEIWord) child;
				res.add(childWord);
				res.addAll(getDescendantWords((TEIWord) child));
			}
		}
		return res;
	}

	@Override
	public List<TEIGroup> getTopLevelGroups() {
		if (groups == null)
			return new LinkedList<TEIGroup>();
		return groups;
	}

	@Override
	public List<TEIGroup> getAllGroups() {
		List<TEIGroup> res = new LinkedList<TEIGroup>();

		for (TEIGroup group : getTopLevelGroups()) {
			res.add(group);
			res.addAll(getDescendantGroups(group));
		}

		return res;
	}

	private List<TEIGroup> getDescendantGroups(TEIGroup group) {
		List<TEIGroup> res = new LinkedList<TEIGroup>();
		for (TEISyntacticEntity child : group.getChildren()) {
			if (child.isGroup()) {
				TEIGroup childGroup = (TEIGroup) child;
				res.add(childGroup);
				res.addAll(getDescendantGroups(childGroup));
			}
		}
		return res;
	}

	@Override
	public void setSyntacticWords(List<TEIWord> words) {
		this.words = words;

		Collections.sort(this.words, new SEComparator(this));
		for (TEIWord word : this.words)
			sortDescendants(word);
	}

	@Override
	public void setSyntacticGroups(List<TEIGroup> groups) {
		this.groups = groups;

		Collections.sort(this.groups, new SEComparator(this));
		for (TEIGroup group : this.groups)
			sortDescendants(group);
	}

	@Override
	public void setNERResult(List<TEINamedEntity> entities) {
		this.nes = entities;

		Collections.sort(this.nes, new NamesComparator(this));
		for (TEINamedEntity ne : nes) {
			sortDescendants(ne);
		}
	}

	private void sortDescendants(TEINamedEntity ne) {
		Comparator<TEINamedEntityChild> cmp = new NEChildrenComparator(this);
		Collections.sort(ne.getChildren(), cmp);
		for (TEINamedEntityChild child : ne.getChildren()) {
			if (child.isNamedEntity()) {
				sortDescendants(child.asNamedEntity());
			}
		}
	}

	private void sortDescendants(TEIGroup group) {
		Comparator<TEISyntacticEntity> cmp = new SEComparator(this);
		Collections.sort(group.getChildren(), cmp);
		for (TEISyntacticEntity child : group.getChildren()) {
			if (child.isGroup()) {
				sortDescendants(child.asGroup());
			} else
				sortDescendants(child.asWord());
		}
	}

	private void sortDescendants(TEIWord word) {
		Comparator<TEIWordChild> cmp = new WordChildrenComparator(this);
		Collections.sort(word.getChildren(), cmp);
		for (TEIWordChild child : word.getChildren()) {
			if (child.isWord()) {
				sortDescendants(child.asWord());
			}
		}
	}

	@Override
	public void setTaggingResult(List<TEIMorph> morphs) {
		id2Morph = new LinkedHashMap<String, TEIMorph>();
		for (TEIMorph morph : morphs) {
			id2Morph.put(morph.getId(), morph);
		}
	}

	@Override
	public List<TEIDeepParseTree> getDeepParsingRoots() {
		return deepParsing;
	}

	@Override
	public void setDeepParsingResult(List<TEIDeepParseTree> deepParsing) {
		this.deepParsing = deepParsing;
	}

	@Override
	public void setMentions(List<TEIMention> mentions) {
		this.mentions = mentions;
	}

	@Override
	public List<TEIMention> getAllMentions() {
		return mentions;
	}
}