Commit 184af486d7998a3cfb75b19b519be9557ba2a856

Authored by Bartłomiej Nitoń
1 parent 792c556f

Reimplemented rule for matching nouns schemata from Walenty.

src/main/java/pl/waw/ipipan/zil/core/md/detection/Detector.java
@@ -107,7 +107,7 @@ public class Detector { @@ -107,7 +107,7 @@ public class Detector {
107 107
108 for (SyntacticGroup group : sentence.getGroups()) { 108 for (SyntacticGroup group : sentence.getGroups()) {
109 109
110 - SyntacticGroup nextGroup = group.getFollowingGroup(); 110 + /*SyntacticGroup nextGroup = group.getFollowingGroup();
111 SyntacticGroup nextnextGroup = null; 111 SyntacticGroup nextnextGroup = null;
112 SyntacticGroup nextnextnextGroup = null; 112 SyntacticGroup nextnextnextGroup = null;
113 if (nextGroup != null) { 113 if (nextGroup != null) {
@@ -115,9 +115,9 @@ public class Detector { @@ -115,9 +115,9 @@ public class Detector {
115 if (nextnextGroup != null) { 115 if (nextnextGroup != null) {
116 nextnextnextGroup = nextnextGroup.getFollowingGroup(); 116 nextnextnextGroup = nextnextGroup.getFollowingGroup();
117 } 117 }
118 - } 118 + }*/
119 119
120 - /*if (group.getType().startsWith("NG")) { 120 + if (group.getType().startsWith("NG")) {
121 ArrayList<SyntacticGroup> nestedGroups = new ArrayList<SyntacticGroup>(); 121 ArrayList<SyntacticGroup> nestedGroups = new ArrayList<SyntacticGroup>();
122 nestedGroups.add(group); 122 nestedGroups.add(group);
123 123
@@ -127,11 +127,11 @@ public class Detector { @@ -127,11 +127,11 @@ public class Detector {
127 nextGroup = nextGroup.getFollowingGroup(); 127 nextGroup = nextGroup.getFollowingGroup();
128 } 128 }
129 129
130 - Mention mention = create_mention(nestedGroups, valence.get(ValenceDicts.NounsValence)); 130 + Mention mention = createMention(nestedGroups, valence.get(ValenceDicts.NounsValence));
131 sentence.addMention(mention); 131 sentence.addMention(mention);
132 - }*/ 132 + }
133 133
134 - if (group.getType().startsWith("NG") && nextGroup != null && 134 + /*if (group.getType().startsWith("NG") && nextGroup != null &&
135 nextnextGroup != null && nextnextnextGroup != null && 135 nextnextGroup != null && nextnextnextGroup != null &&
136 quatroCompatibility(group, nextGroup, nextnextGroup, 136 quatroCompatibility(group, nextGroup, nextnextGroup,
137 nextnextnextGroup, valence.get(ValenceDicts.NounsValence))) { 137 nextnextnextGroup, valence.get(ValenceDicts.NounsValence))) {
@@ -166,11 +166,11 @@ public class Detector { @@ -166,11 +166,11 @@ public class Detector {
166 List<Token> heads = group.getSemanticHeadTokens(); 166 List<Token> heads = group.getSemanticHeadTokens();
167 167
168 sentence.addMention(new Mention(segments, heads)); 168 sentence.addMention(new Mention(segments, heads));
169 - } 169 + }*/
170 } 170 }
171 } 171 }
172 172
173 - private static boolean isProperSchema(String schema, ArrayList<String> group1Types, 173 + /*private static boolean isProperSchema(String schema, ArrayList<String> group1Types,
174 ArrayList<String> group2Types) { 174 ArrayList<String> group2Types) {
175 for (String group1Type : group1Types) { 175 for (String group1Type : group1Types) {
176 if (schemaContains(schema, group1Type)) { 176 if (schemaContains(schema, group1Type)) {
@@ -182,7 +182,7 @@ public class Detector { @@ -182,7 +182,7 @@ public class Detector {
182 } 182 }
183 } 183 }
184 return false; 184 return false;
185 - } 185 + }*/
186 186
187 /*private static boolean isProperSchema(String schema, ArrayList<String> group1Types, 187 /*private static boolean isProperSchema(String schema, ArrayList<String> group1Types,
188 ArrayList<String> group2Types) { 188 ArrayList<String> group2Types) {
@@ -205,7 +205,7 @@ public class Detector { @@ -205,7 +205,7 @@ public class Detector {
205 return false; 205 return false;
206 }*/ 206 }*/
207 207
208 - private static boolean groupsValenceCompatibility(SyntacticGroup NG1, 208 + /*private static boolean groupsValenceCompatibility(SyntacticGroup NG1,
209 SyntacticGroup NG2, Sentence sentence, 209 SyntacticGroup NG2, Sentence sentence,
210 Map<String,ArrayList<String>> walentyMapping) { 210 Map<String,ArrayList<String>> walentyMapping) {
211 Token NG1Head = NG1.getSemanticHeadTokens().get(0); 211 Token NG1Head = NG1.getSemanticHeadTokens().get(0);
@@ -295,7 +295,7 @@ public class Detector { @@ -295,7 +295,7 @@ public class Detector {
295 return false; 295 return false;
296 } 296 }
297 297
298 - /*private static boolean isTripleProperSchema(String schema, ArrayList<String> group1Types, 298 + private static boolean isTripleProperSchema(String schema, ArrayList<String> group1Types,
299 ArrayList<String> group2Types, ArrayList<String> group3Types) { 299 ArrayList<String> group2Types, ArrayList<String> group3Types) {
300 300
301 ArrayList<String> group1MPositions = getMatchingPositions(schema, group1Types); 301 ArrayList<String> group1MPositions = getMatchingPositions(schema, group1Types);
@@ -319,7 +319,54 @@ public class Detector { @@ -319,7 +319,54 @@ public class Detector {
319 return false; 319 return false;
320 }*/ 320 }*/
321 321
322 - /*private static boolean isProperSchema(String schema, 322 + private static Mention createMention(ArrayList<SyntacticGroup> nestedGroups,
  323 + Map<String,ArrayList<String>> walentyNouns) {
  324 +
  325 + SyntacticGroup initialGroup = nestedGroups.get(0);
  326 + String initialGroupHead = initialGroup.getSemanticHeadTokens().get(0).getBase();
  327 +
  328 + List<Token> heads = initialGroup.getSemanticHeadTokens();
  329 + List<Token> segments = new ArrayList<Token>();
  330 +
  331 + if (!walentyNouns.containsKey(initialGroupHead)) {
  332 + segments.addAll(initialGroup.getTokens());
  333 + } else {
  334 +
  335 + ArrayList<String> schemata = walentyNouns.get(initialGroupHead);
  336 + ArrayList<ArrayList<String>> groupsRealizations = new ArrayList<ArrayList<String>>();
  337 + ArrayList<SyntacticGroup> largestMatch = new ArrayList<SyntacticGroup>();
  338 + largestMatch.add(initialGroup);
  339 +
  340 + for (int i=1; i < nestedGroups.size(); i++) {
  341 + SyntacticGroup group = nestedGroups.get(i);
  342 + ArrayList<String> realizations = group.getWalentyRealizations();
  343 + groupsRealizations.add(realizations);
  344 + if (realizationsMatch(schemata, groupsRealizations)) {
  345 + largestMatch.add(group);
  346 + } else {
  347 + break;
  348 + }
  349 + }
  350 +
  351 + for (SyntacticGroup group : largestMatch) {
  352 + segments.addAll(group.getTokens());
  353 + }
  354 +
  355 + }
  356 + return new Mention(segments, heads);
  357 + }
  358 +
  359 + private static boolean realizationsMatch(ArrayList<String> schemata,
  360 + ArrayList<ArrayList<String>> groupsRealizations) {
  361 + for (String schema : schemata) {
  362 + if (isProperSchema(schema, groupsRealizations)) {
  363 + return true;
  364 + }
  365 + }
  366 + return false;
  367 + }
  368 +
  369 + private static boolean isProperSchema(String schema,
323 ArrayList<ArrayList<String>> groupsRealizations) { 370 ArrayList<ArrayList<String>> groupsRealizations) {
324 371
325 ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>(); 372 ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>();
@@ -328,13 +375,14 @@ public class Detector { @@ -328,13 +375,14 @@ public class Detector {
328 } 375 }
329 376
330 if (matchingPositionsExists(matchingPositions)) { 377 if (matchingPositionsExists(matchingPositions)) {
331 - ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions); 378 + return true;
  379 + /*ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions);
332 for (ArrayList<String> combination : product) { 380 for (ArrayList<String> combination : product) {
333 Set<String> combinationSet = new HashSet<String>(combination); 381 Set<String> combinationSet = new HashSet<String>(combination);
334 if (combinationSet.size() == matchingPositions.size()) { 382 if (combinationSet.size() == matchingPositions.size()) {
335 return true; 383 return true;
336 } 384 }
337 - } 385 + }*/
338 } 386 }
339 return false; 387 return false;
340 } 388 }
@@ -381,7 +429,7 @@ public class Detector { @@ -381,7 +429,7 @@ public class Detector {
381 } 429 }
382 } 430 }
383 return positions; 431 return positions;
384 - }*/ 432 + }
385 433
386 private static boolean schemaContains(String schema, String phraseType) { 434 private static boolean schemaContains(String schema, String phraseType) {
387 for (String position : schema.split("\\s\\+\\s")) { 435 for (String position : schema.split("\\s\\+\\s")) {