Commit 184af486d7998a3cfb75b19b519be9557ba2a856
1 parent
792c556f
Reimplemented rule for matching nouns schemata from Walenty.
Showing
1 changed file
with
63 additions
and
15 deletions
src/main/java/pl/waw/ipipan/zil/core/md/detection/Detector.java
@@ -107,7 +107,7 @@ public class Detector { | @@ -107,7 +107,7 @@ public class Detector { | ||
107 | 107 | ||
108 | for (SyntacticGroup group : sentence.getGroups()) { | 108 | for (SyntacticGroup group : sentence.getGroups()) { |
109 | 109 | ||
110 | - SyntacticGroup nextGroup = group.getFollowingGroup(); | 110 | + /*SyntacticGroup nextGroup = group.getFollowingGroup(); |
111 | SyntacticGroup nextnextGroup = null; | 111 | SyntacticGroup nextnextGroup = null; |
112 | SyntacticGroup nextnextnextGroup = null; | 112 | SyntacticGroup nextnextnextGroup = null; |
113 | if (nextGroup != null) { | 113 | if (nextGroup != null) { |
@@ -115,9 +115,9 @@ public class Detector { | @@ -115,9 +115,9 @@ public class Detector { | ||
115 | if (nextnextGroup != null) { | 115 | if (nextnextGroup != null) { |
116 | nextnextnextGroup = nextnextGroup.getFollowingGroup(); | 116 | nextnextnextGroup = nextnextGroup.getFollowingGroup(); |
117 | } | 117 | } |
118 | - } | 118 | + }*/ |
119 | 119 | ||
120 | - /*if (group.getType().startsWith("NG")) { | 120 | + if (group.getType().startsWith("NG")) { |
121 | ArrayList<SyntacticGroup> nestedGroups = new ArrayList<SyntacticGroup>(); | 121 | ArrayList<SyntacticGroup> nestedGroups = new ArrayList<SyntacticGroup>(); |
122 | nestedGroups.add(group); | 122 | nestedGroups.add(group); |
123 | 123 | ||
@@ -127,11 +127,11 @@ public class Detector { | @@ -127,11 +127,11 @@ public class Detector { | ||
127 | nextGroup = nextGroup.getFollowingGroup(); | 127 | nextGroup = nextGroup.getFollowingGroup(); |
128 | } | 128 | } |
129 | 129 | ||
130 | - Mention mention = create_mention(nestedGroups, valence.get(ValenceDicts.NounsValence)); | 130 | + Mention mention = createMention(nestedGroups, valence.get(ValenceDicts.NounsValence)); |
131 | sentence.addMention(mention); | 131 | sentence.addMention(mention); |
132 | - }*/ | 132 | + } |
133 | 133 | ||
134 | - if (group.getType().startsWith("NG") && nextGroup != null && | 134 | + /*if (group.getType().startsWith("NG") && nextGroup != null && |
135 | nextnextGroup != null && nextnextnextGroup != null && | 135 | nextnextGroup != null && nextnextnextGroup != null && |
136 | quatroCompatibility(group, nextGroup, nextnextGroup, | 136 | quatroCompatibility(group, nextGroup, nextnextGroup, |
137 | nextnextnextGroup, valence.get(ValenceDicts.NounsValence))) { | 137 | nextnextnextGroup, valence.get(ValenceDicts.NounsValence))) { |
@@ -166,11 +166,11 @@ public class Detector { | @@ -166,11 +166,11 @@ public class Detector { | ||
166 | List<Token> heads = group.getSemanticHeadTokens(); | 166 | List<Token> heads = group.getSemanticHeadTokens(); |
167 | 167 | ||
168 | sentence.addMention(new Mention(segments, heads)); | 168 | sentence.addMention(new Mention(segments, heads)); |
169 | - } | 169 | + }*/ |
170 | } | 170 | } |
171 | } | 171 | } |
172 | 172 | ||
173 | - private static boolean isProperSchema(String schema, ArrayList<String> group1Types, | 173 | + /*private static boolean isProperSchema(String schema, ArrayList<String> group1Types, |
174 | ArrayList<String> group2Types) { | 174 | ArrayList<String> group2Types) { |
175 | for (String group1Type : group1Types) { | 175 | for (String group1Type : group1Types) { |
176 | if (schemaContains(schema, group1Type)) { | 176 | if (schemaContains(schema, group1Type)) { |
@@ -182,7 +182,7 @@ public class Detector { | @@ -182,7 +182,7 @@ public class Detector { | ||
182 | } | 182 | } |
183 | } | 183 | } |
184 | return false; | 184 | return false; |
185 | - } | 185 | + }*/ |
186 | 186 | ||
187 | /*private static boolean isProperSchema(String schema, ArrayList<String> group1Types, | 187 | /*private static boolean isProperSchema(String schema, ArrayList<String> group1Types, |
188 | ArrayList<String> group2Types) { | 188 | ArrayList<String> group2Types) { |
@@ -205,7 +205,7 @@ public class Detector { | @@ -205,7 +205,7 @@ public class Detector { | ||
205 | return false; | 205 | return false; |
206 | }*/ | 206 | }*/ |
207 | 207 | ||
208 | - private static boolean groupsValenceCompatibility(SyntacticGroup NG1, | 208 | + /*private static boolean groupsValenceCompatibility(SyntacticGroup NG1, |
209 | SyntacticGroup NG2, Sentence sentence, | 209 | SyntacticGroup NG2, Sentence sentence, |
210 | Map<String,ArrayList<String>> walentyMapping) { | 210 | Map<String,ArrayList<String>> walentyMapping) { |
211 | Token NG1Head = NG1.getSemanticHeadTokens().get(0); | 211 | Token NG1Head = NG1.getSemanticHeadTokens().get(0); |
@@ -295,7 +295,7 @@ public class Detector { | @@ -295,7 +295,7 @@ public class Detector { | ||
295 | return false; | 295 | return false; |
296 | } | 296 | } |
297 | 297 | ||
298 | - /*private static boolean isTripleProperSchema(String schema, ArrayList<String> group1Types, | 298 | + private static boolean isTripleProperSchema(String schema, ArrayList<String> group1Types, |
299 | ArrayList<String> group2Types, ArrayList<String> group3Types) { | 299 | ArrayList<String> group2Types, ArrayList<String> group3Types) { |
300 | 300 | ||
301 | ArrayList<String> group1MPositions = getMatchingPositions(schema, group1Types); | 301 | ArrayList<String> group1MPositions = getMatchingPositions(schema, group1Types); |
@@ -319,7 +319,54 @@ public class Detector { | @@ -319,7 +319,54 @@ public class Detector { | ||
319 | return false; | 319 | return false; |
320 | }*/ | 320 | }*/ |
321 | 321 | ||
322 | - /*private static boolean isProperSchema(String schema, | 322 | + private static Mention createMention(ArrayList<SyntacticGroup> nestedGroups, |
323 | + Map<String,ArrayList<String>> walentyNouns) { | ||
324 | + | ||
325 | + SyntacticGroup initialGroup = nestedGroups.get(0); | ||
326 | + String initialGroupHead = initialGroup.getSemanticHeadTokens().get(0).getBase(); | ||
327 | + | ||
328 | + List<Token> heads = initialGroup.getSemanticHeadTokens(); | ||
329 | + List<Token> segments = new ArrayList<Token>(); | ||
330 | + | ||
331 | + if (!walentyNouns.containsKey(initialGroupHead)) { | ||
332 | + segments.addAll(initialGroup.getTokens()); | ||
333 | + } else { | ||
334 | + | ||
335 | + ArrayList<String> schemata = walentyNouns.get(initialGroupHead); | ||
336 | + ArrayList<ArrayList<String>> groupsRealizations = new ArrayList<ArrayList<String>>(); | ||
337 | + ArrayList<SyntacticGroup> largestMatch = new ArrayList<SyntacticGroup>(); | ||
338 | + largestMatch.add(initialGroup); | ||
339 | + | ||
340 | + for (int i=1; i < nestedGroups.size(); i++) { | ||
341 | + SyntacticGroup group = nestedGroups.get(i); | ||
342 | + ArrayList<String> realizations = group.getWalentyRealizations(); | ||
343 | + groupsRealizations.add(realizations); | ||
344 | + if (realizationsMatch(schemata, groupsRealizations)) { | ||
345 | + largestMatch.add(group); | ||
346 | + } else { | ||
347 | + break; | ||
348 | + } | ||
349 | + } | ||
350 | + | ||
351 | + for (SyntacticGroup group : largestMatch) { | ||
352 | + segments.addAll(group.getTokens()); | ||
353 | + } | ||
354 | + | ||
355 | + } | ||
356 | + return new Mention(segments, heads); | ||
357 | + } | ||
358 | + | ||
359 | + private static boolean realizationsMatch(ArrayList<String> schemata, | ||
360 | + ArrayList<ArrayList<String>> groupsRealizations) { | ||
361 | + for (String schema : schemata) { | ||
362 | + if (isProperSchema(schema, groupsRealizations)) { | ||
363 | + return true; | ||
364 | + } | ||
365 | + } | ||
366 | + return false; | ||
367 | + } | ||
368 | + | ||
369 | + private static boolean isProperSchema(String schema, | ||
323 | ArrayList<ArrayList<String>> groupsRealizations) { | 370 | ArrayList<ArrayList<String>> groupsRealizations) { |
324 | 371 | ||
325 | ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>(); | 372 | ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>(); |
@@ -328,13 +375,14 @@ public class Detector { | @@ -328,13 +375,14 @@ public class Detector { | ||
328 | } | 375 | } |
329 | 376 | ||
330 | if (matchingPositionsExists(matchingPositions)) { | 377 | if (matchingPositionsExists(matchingPositions)) { |
331 | - ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions); | 378 | + return true; |
379 | + /*ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions); | ||
332 | for (ArrayList<String> combination : product) { | 380 | for (ArrayList<String> combination : product) { |
333 | Set<String> combinationSet = new HashSet<String>(combination); | 381 | Set<String> combinationSet = new HashSet<String>(combination); |
334 | if (combinationSet.size() == matchingPositions.size()) { | 382 | if (combinationSet.size() == matchingPositions.size()) { |
335 | return true; | 383 | return true; |
336 | } | 384 | } |
337 | - } | 385 | + }*/ |
338 | } | 386 | } |
339 | return false; | 387 | return false; |
340 | } | 388 | } |
@@ -381,7 +429,7 @@ public class Detector { | @@ -381,7 +429,7 @@ public class Detector { | ||
381 | } | 429 | } |
382 | } | 430 | } |
383 | return positions; | 431 | return positions; |
384 | - }*/ | 432 | + } |
385 | 433 | ||
386 | private static boolean schemaContains(String schema, String phraseType) { | 434 | private static boolean schemaContains(String schema, String phraseType) { |
387 | for (String position : schema.split("\\s\\+\\s")) { | 435 | for (String position : schema.split("\\s\\+\\s")) { |