Commit 184af486d7998a3cfb75b19b519be9557ba2a856
1 parent
792c556f
Reimplemented rule for matching nouns schemata from Walenty.
Showing
1 changed file
with
63 additions
and
15 deletions
src/main/java/pl/waw/ipipan/zil/core/md/detection/Detector.java
... | ... | @@ -107,7 +107,7 @@ public class Detector { |
107 | 107 | |
108 | 108 | for (SyntacticGroup group : sentence.getGroups()) { |
109 | 109 | |
110 | - SyntacticGroup nextGroup = group.getFollowingGroup(); | |
110 | + /*SyntacticGroup nextGroup = group.getFollowingGroup(); | |
111 | 111 | SyntacticGroup nextnextGroup = null; |
112 | 112 | SyntacticGroup nextnextnextGroup = null; |
113 | 113 | if (nextGroup != null) { |
... | ... | @@ -115,9 +115,9 @@ public class Detector { |
115 | 115 | if (nextnextGroup != null) { |
116 | 116 | nextnextnextGroup = nextnextGroup.getFollowingGroup(); |
117 | 117 | } |
118 | - } | |
118 | + }*/ | |
119 | 119 | |
120 | - /*if (group.getType().startsWith("NG")) { | |
120 | + if (group.getType().startsWith("NG")) { | |
121 | 121 | ArrayList<SyntacticGroup> nestedGroups = new ArrayList<SyntacticGroup>(); |
122 | 122 | nestedGroups.add(group); |
123 | 123 | |
... | ... | @@ -127,11 +127,11 @@ public class Detector { |
127 | 127 | nextGroup = nextGroup.getFollowingGroup(); |
128 | 128 | } |
129 | 129 | |
130 | - Mention mention = create_mention(nestedGroups, valence.get(ValenceDicts.NounsValence)); | |
130 | + Mention mention = createMention(nestedGroups, valence.get(ValenceDicts.NounsValence)); | |
131 | 131 | sentence.addMention(mention); |
132 | - }*/ | |
132 | + } | |
133 | 133 | |
134 | - if (group.getType().startsWith("NG") && nextGroup != null && | |
134 | + /*if (group.getType().startsWith("NG") && nextGroup != null && | |
135 | 135 | nextnextGroup != null && nextnextnextGroup != null && |
136 | 136 | quatroCompatibility(group, nextGroup, nextnextGroup, |
137 | 137 | nextnextnextGroup, valence.get(ValenceDicts.NounsValence))) { |
... | ... | @@ -166,11 +166,11 @@ public class Detector { |
166 | 166 | List<Token> heads = group.getSemanticHeadTokens(); |
167 | 167 | |
168 | 168 | sentence.addMention(new Mention(segments, heads)); |
169 | - } | |
169 | + }*/ | |
170 | 170 | } |
171 | 171 | } |
172 | 172 | |
173 | - private static boolean isProperSchema(String schema, ArrayList<String> group1Types, | |
173 | + /*private static boolean isProperSchema(String schema, ArrayList<String> group1Types, | |
174 | 174 | ArrayList<String> group2Types) { |
175 | 175 | for (String group1Type : group1Types) { |
176 | 176 | if (schemaContains(schema, group1Type)) { |
... | ... | @@ -182,7 +182,7 @@ public class Detector { |
182 | 182 | } |
183 | 183 | } |
184 | 184 | return false; |
185 | - } | |
185 | + }*/ | |
186 | 186 | |
187 | 187 | /*private static boolean isProperSchema(String schema, ArrayList<String> group1Types, |
188 | 188 | ArrayList<String> group2Types) { |
... | ... | @@ -205,7 +205,7 @@ public class Detector { |
205 | 205 | return false; |
206 | 206 | }*/ |
207 | 207 | |
208 | - private static boolean groupsValenceCompatibility(SyntacticGroup NG1, | |
208 | + /*private static boolean groupsValenceCompatibility(SyntacticGroup NG1, | |
209 | 209 | SyntacticGroup NG2, Sentence sentence, |
210 | 210 | Map<String,ArrayList<String>> walentyMapping) { |
211 | 211 | Token NG1Head = NG1.getSemanticHeadTokens().get(0); |
... | ... | @@ -295,7 +295,7 @@ public class Detector { |
295 | 295 | return false; |
296 | 296 | } |
297 | 297 | |
298 | - /*private static boolean isTripleProperSchema(String schema, ArrayList<String> group1Types, | |
298 | + private static boolean isTripleProperSchema(String schema, ArrayList<String> group1Types, | |
299 | 299 | ArrayList<String> group2Types, ArrayList<String> group3Types) { |
300 | 300 | |
301 | 301 | ArrayList<String> group1MPositions = getMatchingPositions(schema, group1Types); |
... | ... | @@ -319,7 +319,54 @@ public class Detector { |
319 | 319 | return false; |
320 | 320 | }*/ |
321 | 321 | |
322 | - /*private static boolean isProperSchema(String schema, | |
322 | + private static Mention createMention(ArrayList<SyntacticGroup> nestedGroups, | |
323 | + Map<String,ArrayList<String>> walentyNouns) { | |
324 | + | |
325 | + SyntacticGroup initialGroup = nestedGroups.get(0); | |
326 | + String initialGroupHead = initialGroup.getSemanticHeadTokens().get(0).getBase(); | |
327 | + | |
328 | + List<Token> heads = initialGroup.getSemanticHeadTokens(); | |
329 | + List<Token> segments = new ArrayList<Token>(); | |
330 | + | |
331 | + if (!walentyNouns.containsKey(initialGroupHead)) { | |
332 | + segments.addAll(initialGroup.getTokens()); | |
333 | + } else { | |
334 | + | |
335 | + ArrayList<String> schemata = walentyNouns.get(initialGroupHead); | |
336 | + ArrayList<ArrayList<String>> groupsRealizations = new ArrayList<ArrayList<String>>(); | |
337 | + ArrayList<SyntacticGroup> largestMatch = new ArrayList<SyntacticGroup>(); | |
338 | + largestMatch.add(initialGroup); | |
339 | + | |
340 | + for (int i=1; i < nestedGroups.size(); i++) { | |
341 | + SyntacticGroup group = nestedGroups.get(i); | |
342 | + ArrayList<String> realizations = group.getWalentyRealizations(); | |
343 | + groupsRealizations.add(realizations); | |
344 | + if (realizationsMatch(schemata, groupsRealizations)) { | |
345 | + largestMatch.add(group); | |
346 | + } else { | |
347 | + break; | |
348 | + } | |
349 | + } | |
350 | + | |
351 | + for (SyntacticGroup group : largestMatch) { | |
352 | + segments.addAll(group.getTokens()); | |
353 | + } | |
354 | + | |
355 | + } | |
356 | + return new Mention(segments, heads); | |
357 | + } | |
358 | + | |
359 | + private static boolean realizationsMatch(ArrayList<String> schemata, | |
360 | + ArrayList<ArrayList<String>> groupsRealizations) { | |
361 | + for (String schema : schemata) { | |
362 | + if (isProperSchema(schema, groupsRealizations)) { | |
363 | + return true; | |
364 | + } | |
365 | + } | |
366 | + return false; | |
367 | + } | |
368 | + | |
369 | + private static boolean isProperSchema(String schema, | |
323 | 370 | ArrayList<ArrayList<String>> groupsRealizations) { |
324 | 371 | |
325 | 372 | ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>(); |
... | ... | @@ -328,13 +375,14 @@ public class Detector { |
328 | 375 | } |
329 | 376 | |
330 | 377 | if (matchingPositionsExists(matchingPositions)) { |
331 | - ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions); | |
378 | + return true; | |
379 | + /*ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions); | |
332 | 380 | for (ArrayList<String> combination : product) { |
333 | 381 | Set<String> combinationSet = new HashSet<String>(combination); |
334 | 382 | if (combinationSet.size() == matchingPositions.size()) { |
335 | 383 | return true; |
336 | 384 | } |
337 | - } | |
385 | + }*/ | |
338 | 386 | } |
339 | 387 | return false; |
340 | 388 | } |
... | ... | @@ -381,7 +429,7 @@ public class Detector { |
381 | 429 | } |
382 | 430 | } |
383 | 431 | return positions; |
384 | - }*/ | |
432 | + } | |
385 | 433 | |
386 | 434 | private static boolean schemaContains(String schema, String phraseType) { |
387 | 435 | for (String position : schema.split("\\s\\+\\s")) { |
... | ... |