Commit 9bd8588210b09b1b9c30d085300d32c6b0b91091
1 parent
184af486
AddMentionsByGroups comment update.
Showing
1 changed file
with
3 additions
and
211 deletions
src/main/java/pl/waw/ipipan/zil/core/md/detection/Detector.java
... | ... | @@ -98,25 +98,15 @@ public class Detector { |
98 | 98 | } |
99 | 99 | |
100 | 100 | /** |
101 | - * Wyszukuję i oznaczam wszystkie NG* | |
101 | + * Wyszukuję i oznaczam wszystkie NG lub ciagi grup NG i PrepNG | |
102 | + * pasujace do schematow rzeczownikowych z Walentego | |
102 | 103 | * |
103 | 104 | * @param sentence |
104 | 105 | */ |
105 | 106 | private static void addMentionsByGroups(Sentence sentence, |
106 | 107 | Map<ValenceDicts,Map<String,ArrayList<String>>> valence) { |
107 | 108 | |
108 | - for (SyntacticGroup group : sentence.getGroups()) { | |
109 | - | |
110 | - /*SyntacticGroup nextGroup = group.getFollowingGroup(); | |
111 | - SyntacticGroup nextnextGroup = null; | |
112 | - SyntacticGroup nextnextnextGroup = null; | |
113 | - if (nextGroup != null) { | |
114 | - nextnextGroup = nextGroup.getFollowingGroup(); | |
115 | - if (nextnextGroup != null) { | |
116 | - nextnextnextGroup = nextnextGroup.getFollowingGroup(); | |
117 | - } | |
118 | - }*/ | |
119 | - | |
109 | + for (SyntacticGroup group : sentence.getGroups()) { | |
120 | 110 | if (group.getType().startsWith("NG")) { |
121 | 111 | ArrayList<SyntacticGroup> nestedGroups = new ArrayList<SyntacticGroup>(); |
122 | 112 | nestedGroups.add(group); |
... | ... | @@ -130,194 +120,9 @@ public class Detector { |
130 | 120 | Mention mention = createMention(nestedGroups, valence.get(ValenceDicts.NounsValence)); |
131 | 121 | sentence.addMention(mention); |
132 | 122 | } |
133 | - | |
134 | - /*if (group.getType().startsWith("NG") && nextGroup != null && | |
135 | - nextnextGroup != null && nextnextnextGroup != null && | |
136 | - quatroCompatibility(group, nextGroup, nextnextGroup, | |
137 | - nextnextnextGroup, valence.get(ValenceDicts.NounsValence))) { | |
138 | - List<Token> heads = group.getSemanticHeadTokens(); | |
139 | - List<Token> segments = new ArrayList<Token>(); | |
140 | - segments.addAll(group.getTokens()); | |
141 | - segments.addAll(nextGroup.getTokens()); | |
142 | - segments.addAll(nextnextGroup.getTokens()); | |
143 | - segments.addAll(nextnextnextGroup.getTokens()); | |
144 | - | |
145 | - sentence.addMention(new Mention(segments, heads)); | |
146 | - } else if (group.getType().startsWith("NG") && nextGroup != null && | |
147 | - nextnextGroup != null && tripleCompatibility(group, nextGroup, nextnextGroup, valence.get(ValenceDicts.NounsValence))) { | |
148 | - List<Token> heads = group.getSemanticHeadTokens(); | |
149 | - List<Token> segments = new ArrayList<Token>(); | |
150 | - segments.addAll(group.getTokens()); | |
151 | - segments.addAll(nextGroup.getTokens()); | |
152 | - segments.addAll(nextnextGroup.getTokens()); | |
153 | - | |
154 | - sentence.addMention(new Mention(segments, heads)); | |
155 | - } else if (group.getType().startsWith("NG") && nextGroup != null && | |
156 | - groupsValenceCompatibility(group, nextGroup, sentence, valence.get(ValenceDicts.NounsValence)) | |
157 | - ) { | |
158 | - List<Token> heads = group.getSemanticHeadTokens(); | |
159 | - List<Token> segments = new ArrayList<Token>(); | |
160 | - segments.addAll(group.getTokens()); | |
161 | - segments.addAll(nextGroup.getTokens()); | |
162 | - | |
163 | - sentence.addMention(new Mention(segments, heads)); | |
164 | - } else if (group.getType().startsWith("NG")) { | |
165 | - List<Token> segments = group.getTokens(); | |
166 | - List<Token> heads = group.getSemanticHeadTokens(); | |
167 | - | |
168 | - sentence.addMention(new Mention(segments, heads)); | |
169 | - }*/ | |
170 | - } | |
171 | - } | |
172 | - | |
173 | - /*private static boolean isProperSchema(String schema, ArrayList<String> group1Types, | |
174 | - ArrayList<String> group2Types) { | |
175 | - for (String group1Type : group1Types) { | |
176 | - if (schemaContains(schema, group1Type)) { | |
177 | - for (String group2Type : group2Types) { | |
178 | - if (schemaContains(schema, group2Type)) { | |
179 | - return true; | |
180 | - } | |
181 | - } | |
182 | - } | |
183 | - } | |
184 | - return false; | |
185 | - }*/ | |
186 | - | |
187 | - /*private static boolean isProperSchema(String schema, ArrayList<String> group1Types, | |
188 | - ArrayList<String> group2Types) { | |
189 | - ArrayList<String> group1MPositions = getMatchingPositions(schema, group1Types); | |
190 | - ArrayList<String> group2MPositions = getMatchingPositions(schema, group2Types); | |
191 | - | |
192 | - ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>(); | |
193 | - matchingPositions.add(group1MPositions); | |
194 | - matchingPositions.add(group2MPositions); | |
195 | - | |
196 | - if (matchingPositionsExists(matchingPositions)) { | |
197 | - ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions); | |
198 | - for (ArrayList<String> combination : product) { | |
199 | - Set<String> combinationSet = new HashSet<String>(combination); | |
200 | - if (combinationSet.size() == matchingPositions.size()) { | |
201 | - return true; | |
202 | - } | |
203 | - } | |
204 | 123 | } |
205 | - return false; | |
206 | - }*/ | |
207 | - | |
208 | - /*private static boolean groupsValenceCompatibility(SyntacticGroup NG1, | |
209 | - SyntacticGroup NG2, Sentence sentence, | |
210 | - Map<String,ArrayList<String>> walentyMapping) { | |
211 | - Token NG1Head = NG1.getSemanticHeadTokens().get(0); | |
212 | - | |
213 | - String NGHeadBase = NG1Head.getBase(); | |
214 | - | |
215 | - if (!walentyMapping.containsKey(NGHeadBase)) { | |
216 | - return false; | |
217 | - } else { | |
218 | - ArrayList<String> NG2realizations = NG2.getWalentyRealizations(); | |
219 | - | |
220 | - ArrayList<String> schemata = walentyMapping.get(NGHeadBase); | |
221 | - for (String real : NG2realizations) { | |
222 | - for (String schema : schemata) { | |
223 | - if (schemaContains(schema, real)) { | |
224 | - return true; | |
225 | - } | |
226 | - } | |
227 | - } | |
228 | - } | |
229 | - return false; | |
230 | 124 | } |
231 | 125 | |
232 | - private static boolean tripleCompatibility(SyntacticGroup group1, | |
233 | - SyntacticGroup group2, SyntacticGroup group3, | |
234 | - Map<String,ArrayList<String>> walentyMapping) { | |
235 | - Token group1Head = group1.getSemanticHeadTokens().get(0); | |
236 | - | |
237 | - String group1HeadBase = group1Head.getBase(); | |
238 | - | |
239 | - if (!walentyMapping.containsKey(group1HeadBase)) { | |
240 | - return false; | |
241 | - } else { | |
242 | - ArrayList<String> group2realizations = group2.getWalentyRealizations(); | |
243 | - ArrayList<String> group3realizations = group3.getWalentyRealizations(); | |
244 | - | |
245 | - ArrayList<String> schemata = walentyMapping.get(group1HeadBase); | |
246 | - for (String schema : schemata) { | |
247 | - if (isProperSchema(schema, group2realizations, group3realizations)) { | |
248 | - return true; | |
249 | - } | |
250 | - } | |
251 | - } | |
252 | - return false; | |
253 | - } | |
254 | - | |
255 | - private static boolean quatroCompatibility(SyntacticGroup group1, | |
256 | - SyntacticGroup group2, SyntacticGroup group3, SyntacticGroup group4, | |
257 | - Map<String,ArrayList<String>> walentyMapping) { | |
258 | - Token group1Head = group1.getSemanticHeadTokens().get(0); | |
259 | - | |
260 | - String group1HeadBase = group1Head.getBase(); | |
261 | - | |
262 | - if (!walentyMapping.containsKey(group1HeadBase)) { | |
263 | - return false; | |
264 | - } else { | |
265 | - ArrayList<String> group2realizations = group2.getWalentyRealizations(); | |
266 | - ArrayList<String> group3realizations = group3.getWalentyRealizations(); | |
267 | - ArrayList<String> group4realizations = group4.getWalentyRealizations(); | |
268 | - | |
269 | - ArrayList<String> schemata = walentyMapping.get(group1HeadBase); | |
270 | - for (String schema : schemata) { | |
271 | - if (isTripleProperSchema(schema, group2realizations, group3realizations, | |
272 | - group4realizations)) { | |
273 | - return true; | |
274 | - } | |
275 | - } | |
276 | - } | |
277 | - return false; | |
278 | - } | |
279 | - | |
280 | - private static boolean isTripleProperSchema(String schema, ArrayList<String> group1Types, | |
281 | - ArrayList<String> group2Types, ArrayList<String> group3Types) { | |
282 | - for (String group1Type : group1Types) { | |
283 | - if (schemaContains(schema, group1Type)) { | |
284 | - for (String group2Type : group2Types) { | |
285 | - if (schemaContains(schema, group2Type)) { | |
286 | - for (String group3Type : group3Types) { | |
287 | - if (schemaContains(schema, group3Type)) { | |
288 | - return true; | |
289 | - } | |
290 | - } | |
291 | - } | |
292 | - } | |
293 | - } | |
294 | - } | |
295 | - return false; | |
296 | - } | |
297 | - | |
298 | - private static boolean isTripleProperSchema(String schema, ArrayList<String> group1Types, | |
299 | - ArrayList<String> group2Types, ArrayList<String> group3Types) { | |
300 | - | |
301 | - ArrayList<String> group1MPositions = getMatchingPositions(schema, group1Types); | |
302 | - ArrayList<String> group2MPositions = getMatchingPositions(schema, group2Types); | |
303 | - ArrayList<String> group3MPositions = getMatchingPositions(schema, group3Types); | |
304 | - | |
305 | - ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>(); | |
306 | - matchingPositions.add(group1MPositions); | |
307 | - matchingPositions.add(group2MPositions); | |
308 | - matchingPositions.add(group3MPositions); | |
309 | - | |
310 | - if (matchingPositionsExists(matchingPositions)) { | |
311 | - ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions); | |
312 | - for (ArrayList<String> combination : product) { | |
313 | - Set<String> combinationSet = new HashSet<String>(combination); | |
314 | - if (combinationSet.size() == matchingPositions.size()) { | |
315 | - return true; | |
316 | - } | |
317 | - } | |
318 | - } | |
319 | - return false; | |
320 | - }*/ | |
321 | 126 | |
322 | 127 | private static Mention createMention(ArrayList<SyntacticGroup> nestedGroups, |
323 | 128 | Map<String,ArrayList<String>> walentyNouns) { |
... | ... | @@ -430,19 +235,6 @@ public class Detector { |
430 | 235 | } |
431 | 236 | return positions; |
432 | 237 | } |
433 | - | |
434 | - private static boolean schemaContains(String schema, String phraseType) { | |
435 | - for (String position : schema.split("\\s\\+\\s")) { | |
436 | - position = position.trim(); | |
437 | - position = position.substring(1, position.length()-1); | |
438 | - for (String phrT : position.split(";")) { | |
439 | - if (phrT.equals(phraseType)) { | |
440 | - return true; | |
441 | - } | |
442 | - } | |
443 | - } | |
444 | - return false; | |
445 | - } | |
446 | 238 | |
447 | 239 | /** |
448 | 240 | * Wyszukuję i oznaczam wszystkie NER |
... | ... |