TEIParagraphImpl.java
4.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
package ipipan.clarin.tei.impl.entities;
import ipipan.clarin.tei.api.entities.AnnotationLayer;
import ipipan.clarin.tei.api.entities.TEIMorph;
import ipipan.clarin.tei.api.entities.TEIParagraph;
import ipipan.clarin.tei.api.entities.TEISegment;
import ipipan.clarin.tei.api.entities.TEISentence;
import java.util.ArrayList;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
/**
*
* @author mlenart
*/
public class TEIParagraphImpl extends TEIAbstractEntity implements TEIParagraph {
private final Map<AnnotationLayer, String> layer2IdMap = new EnumMap<AnnotationLayer, String>(
AnnotationLayer.class);
private final ParagraphType type;
private final String n;
private final String text;
private final List<TEIParagraph> subparagraphs;
private final Map<String, TEIParagraph> subparagraphsMap = new HashMap<String, TEIParagraph>();
private List<TEISentence> sents;
// private List<TEISegment> segments;
public TEIParagraphImpl(String id, ParagraphType type, String n,
String text, List<TEIParagraph> subparagraphs) {
super(id);
this.layer2IdMap.put(AnnotationLayer.TEXT, id);
this.type = type;
this.n = n;
this.text = text;
this.subparagraphs = subparagraphs;
if (subparagraphs != null) {
for (TEIParagraph subpar : subparagraphs) {
subparagraphsMap.put(subpar.getId(), subpar);
}
}
}
@Override
public String getId(AnnotationLayer layer) {
if (layer2IdMap.containsKey(layer)) {
return layer2IdMap.get(layer);
} else {
return layer.toString().toLowerCase() + "_"
+ getId().replaceFirst("[a-z]+_", "");
}
}
@Override
public void setId(AnnotationLayer layer, String id) {
layer2IdMap.put(layer, id);
}
@Override
public ParagraphType getType() {
return type;
}
@Override
public String getN() {
return n;
}
@Override
public String getText() {
return text;
}
@Override
public List<TEIParagraph> getSubparagraphs() {
return subparagraphs;
}
@Override
public List<TEISentence> getSentences() {
if (sents == null) {
List<TEISentence> res = new LinkedList<TEISentence>();
for (TEIParagraph subpar : subparagraphs) {
res.addAll(subpar.getSentences());
}
return res;
} else {
return sents;
}
}
@Override
public List<TEIMorph> getMorphs() {
List<TEIMorph> res = new ArrayList<TEIMorph>();
for (TEISentence sent : sents) {
res.addAll(sent.getMorphs());
}
return res;
}
@Override
public void setSentences(List<TEISentence> sents) {
this.sents = sents;
// this.segments = getMySegmentsFrom(sents);
if (subparagraphs != null) {
Map<TEIParagraph, List<TEISentence>> subpar2sents = new LinkedHashMap<TEIParagraph, List<TEISentence>>();
for (TEIParagraph subpar : subparagraphs) {
subpar2sents.put(subpar, new LinkedList<TEISentence>());
}
for (TEISentence sent : sents) {
TEISegment segm = sent.getAllSegments().get(0);
subpar2sents.get(segm.getParagraph()).add(sent);
}
for (Entry<TEIParagraph, List<TEISentence>> entry : subpar2sents
.entrySet()) {
entry.getKey().setSentences(entry.getValue());
}
}
}
@Override
public TEIParagraph getSubparagraph(String id) {
return subparagraphsMap.get(id);
}
@Override
public String toString() {
return "TEIParagraph{" + "id=" + getId() + '}';
}
@Override
public void correctSegmentOffsets() {
if (subparagraphs != null) {
for (TEIParagraph subpar : subparagraphs) {
subpar.correctSegmentOffsets();
}
} else {
// doCorrectSegmentOffsets();
}
}
// private void doCorrectSegmentOffsets() {
// int offset = 0;
// boolean start = true;
// // TODO - poprawiać także segmenty oznaczone jako rejected
// for (TEISegment seg : getChosenSegments()) {
// if (!seg.hasNps() && !start) {
// offset++;
// }
// start = false;
// seg.setParagraph(this);
// seg.setOffset(offset);
// offset += seg.getLength();
// }
// }
// private Iterable<TEISegment> getChosenSegments() {
// return segments;
// // List<TEISegment> res = new LinkedList<TEISegment>();
// // for (TEISentence sent: getSentences())
// // res.addAll(sent.getChosenSegments());
// // return res;
// }
// private List<TEISegment> getMySegmentsFrom(List<TEISentence> sents) {
// List<TEISegment> res = new ArrayList<TEISegment>();
// for (TEISentence sent : sents) {
// for (TEISegment seg : sent.getChosenSegments()) {
// if (seg.getParagraph() == this || seg.getParagraph() == null) {
// res.add(seg);
// if (seg.getParagraph() == null) {
// seg.setParagraph(this);
// }
// }
// }
// }
// return res;
// }
}