MentionsReader.java
2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
package ipipan.clarin.tei.impl.io.read;
import ipipan.clarin.tei.api.entities.AnnotationLayer;
import ipipan.clarin.tei.api.entities.TEIMention;
import ipipan.clarin.tei.api.entities.TEIMorph;
import ipipan.clarin.tei.api.entities.TEIParagraph;
import ipipan.clarin.tei.api.entities.TEISentence;
import ipipan.clarin.tei.api.exceptions.TEIException;
import ipipan.clarin.tei.impl.entities.TEIMentionImpl;
import ipipan.clarin.tei.impl.io.IdValuePair;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import javax.xml.stream.XMLStreamException;
public class MentionsReader extends BodyReader {
protected MentionsReader(InWrapper in) {
super(in);
}
@Override
protected void readNextParagraph(TEIParagraph par) throws TEIException {
try {
while (!in.isStartParagraph()) {
in.next();
}
String parId = in.getXmlId();
for (TEISentence sent : par.getSentences()) {
in.nextTag();
in.requireStart("s");
readNextSent(sent);
}
in.nextTag();
in.requireEnd(); // p
par.setId(AnnotationLayer.MENTIONS, parId);
} catch (Exception ex) {
throw new TEIException("Error in mentions: " + ex.getMessage(), ex);
}
}
private TEISentence readNextSent(TEISentence sent)
throws XMLStreamException, TEIException {
Map<String, TEIMorph> ptr2Morph = new LinkedHashMap<String, TEIMorph>();
for (TEIMorph morph : sent.getMorphs()) {
ptr2Morph.put(morph.getId(), morph);
}
List<TEIMention> mentions = new LinkedList<TEIMention>();
String sentId = in.getXmlId();
in.nextTag();
while (!in.isEnd()) {
in.requireStart("seg");
mentions.add(readMention(sent, ptr2Morph));
in.nextTag();
}
in.requireEnd(); // s
sent.setMentions(mentions);
sent.setId(AnnotationLayer.MENTIONS, sentId);
return sent;
}
private TEIMention readMention(TEISentence sent,
Map<String, TEIMorph> ptr2Morph) throws XMLStreamException {
String id;
List<TEIMorph> heads = new ArrayList<TEIMorph>();
List<TEIMorph> morphs = new ArrayList<TEIMorph>();
in.requireStart("seg");
id = in.getXmlId();
in.nextTag();
in.requireStartFS("mention");
in.nextTag();
while (in.isStartF("semh")) {
heads.add(ptr2Morph.get(in.readFValue()));
in.nextTag();
}
boolean isZeroSubject = false;
if (in.isStartF("zero")) {
if (in.readFValue().equalsIgnoreCase("true"))
isZeroSubject = true;
in.nextTag();
}
in.requireEnd(); // fs mention
in.nextTag();
for (IdValuePair ptr : PtrHelper.readPtrsWithTypes(in)) {
String target = ptr.getId();
morphs.add(ptr2Morph.get(target));
}
in.requireEnd(); // seg
return new TEIMentionImpl(id, morphs, heads, isZeroSubject);
}
}