Commit 792c556f1a01862dc83354bc5e1d5bf5dac38898

Authored by Bartłomiej Nitoń
1 parent 46a46821

Added default Walenty dictionaries.

Too many changes to show.

To preserve performance only 2 of 4 files are displayed.

src/main/java/pl/waw/ipipan/zil/core/md/Main.java
@@ -33,8 +33,8 @@ public class Main { @@ -33,8 +33,8 @@ public class Main {
33 33
34 private static final boolean GZIP_OUTPUT = true; 34 private static final boolean GZIP_OUTPUT = true;
35 private static final String DEFAULT_ZERO_SUBJECT_MODEL = "/zero_subject_model.bin"; 35 private static final String DEFAULT_ZERO_SUBJECT_MODEL = "/zero_subject_model.bin";
36 - private static final String DEFAULT_VERBS_VALENCE = "/walenty_20170117_verbs_all_with_realizations.txt";  
37 - private static final String DEFAULT_NOUNS_VALENCE = "/walenty_20170117_nouns_all_with_realizations.txt"; 36 + private static final String DEFAULT_VERBS_VALENCE = "/walenty_verbs.txt";
  37 + private static final String DEFAULT_NOUNS_VALENCE = "/walenty_nouns.txt";
38 38
39 private static ZeroSubjectDetector zeroSubjectModel; 39 private static ZeroSubjectDetector zeroSubjectModel;
40 40
src/main/java/pl/waw/ipipan/zil/core/md/detection/Detector.java
@@ -106,6 +106,7 @@ public class Detector { @@ -106,6 +106,7 @@ public class Detector {
106 Map<ValenceDicts,Map<String,ArrayList<String>>> valence) { 106 Map<ValenceDicts,Map<String,ArrayList<String>>> valence) {
107 107
108 for (SyntacticGroup group : sentence.getGroups()) { 108 for (SyntacticGroup group : sentence.getGroups()) {
  109 +
109 SyntacticGroup nextGroup = group.getFollowingGroup(); 110 SyntacticGroup nextGroup = group.getFollowingGroup();
110 SyntacticGroup nextnextGroup = null; 111 SyntacticGroup nextnextGroup = null;
111 SyntacticGroup nextnextnextGroup = null; 112 SyntacticGroup nextnextnextGroup = null;
@@ -116,6 +117,20 @@ public class Detector { @@ -116,6 +117,20 @@ public class Detector {
116 } 117 }
117 } 118 }
118 119
  120 + /*if (group.getType().startsWith("NG")) {
  121 + ArrayList<SyntacticGroup> nestedGroups = new ArrayList<SyntacticGroup>();
  122 + nestedGroups.add(group);
  123 +
  124 + SyntacticGroup nextGroup = group.getFollowingGroup();
  125 + while (nextGroup != null) {
  126 + nestedGroups.add(nextGroup);
  127 + nextGroup = nextGroup.getFollowingGroup();
  128 + }
  129 +
  130 + Mention mention = create_mention(nestedGroups, valence.get(ValenceDicts.NounsValence));
  131 + sentence.addMention(mention);
  132 + }*/
  133 +
119 if (group.getType().startsWith("NG") && nextGroup != null && 134 if (group.getType().startsWith("NG") && nextGroup != null &&
120 nextnextGroup != null && nextnextnextGroup != null && 135 nextnextGroup != null && nextnextnextGroup != null &&
121 quatroCompatibility(group, nextGroup, nextnextGroup, 136 quatroCompatibility(group, nextGroup, nextnextGroup,
@@ -169,6 +184,27 @@ public class Detector { @@ -169,6 +184,27 @@ public class Detector {
169 return false; 184 return false;
170 } 185 }
171 186
  187 + /*private static boolean isProperSchema(String schema, ArrayList<String> group1Types,
  188 + ArrayList<String> group2Types) {
  189 + ArrayList<String> group1MPositions = getMatchingPositions(schema, group1Types);
  190 + ArrayList<String> group2MPositions = getMatchingPositions(schema, group2Types);
  191 +
  192 + ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>();
  193 + matchingPositions.add(group1MPositions);
  194 + matchingPositions.add(group2MPositions);
  195 +
  196 + if (matchingPositionsExists(matchingPositions)) {
  197 + ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions);
  198 + for (ArrayList<String> combination : product) {
  199 + Set<String> combinationSet = new HashSet<String>(combination);
  200 + if (combinationSet.size() == matchingPositions.size()) {
  201 + return true;
  202 + }
  203 + }
  204 + }
  205 + return false;
  206 + }*/
  207 +
172 private static boolean groupsValenceCompatibility(SyntacticGroup NG1, 208 private static boolean groupsValenceCompatibility(SyntacticGroup NG1,
173 SyntacticGroup NG2, Sentence sentence, 209 SyntacticGroup NG2, Sentence sentence,
174 Map<String,ArrayList<String>> walentyMapping) { 210 Map<String,ArrayList<String>> walentyMapping) {
@@ -266,65 +302,70 @@ public class Detector { @@ -266,65 +302,70 @@ public class Detector {
266 ArrayList<String> group2MPositions = getMatchingPositions(schema, group2Types); 302 ArrayList<String> group2MPositions = getMatchingPositions(schema, group2Types);
267 ArrayList<String> group3MPositions = getMatchingPositions(schema, group3Types); 303 ArrayList<String> group3MPositions = getMatchingPositions(schema, group3Types);
268 304
  305 + ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>();
  306 + matchingPositions.add(group1MPositions);
  307 + matchingPositions.add(group2MPositions);
  308 + matchingPositions.add(group3MPositions);
269 309
270 -  
271 - ArrayList<String> group1MPositionsCopy = new ArrayList<String>();  
272 - ArrayList<String> group2MPositionsCopy = getMatchingPositions(schema, group2Types);  
273 - ArrayList<String> group3MPositionsCopy = getMatchingPositions(schema, group3Types);  
274 -  
275 -  
276 - if (group1MPositions.isEmpty() || group2MPositions.isEmpty() || group3MPositions.isEmpty()) {  
277 - return false; 310 + if (matchingPositionsExists(matchingPositions)) {
  311 + ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions);
  312 + for (ArrayList<String> combination : product) {
  313 + Set<String> combinationSet = new HashSet<String>(combination);
  314 + if (combinationSet.size() == matchingPositions.size()) {
  315 + return true;
  316 + }
  317 + }
278 } 318 }
  319 + return false;
  320 + }*/
  321 +
  322 + /*private static boolean isProperSchema(String schema,
  323 + ArrayList<ArrayList<String>> groupsRealizations) {
279 324
280 - boolean group1ok = false;  
281 - boolean group2ok = false;  
282 - boolean group3ok = false;  
283 -  
284 - for (String pos : group1MPositions) {  
285 - 325 + ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>();
  326 + for (ArrayList<String> realizations : groupsRealizations) {
  327 + matchingPositions.add(getMatchingPositions(schema, realizations));
286 } 328 }
287 329
288 - ArrayList<String>  
289 -  
290 - if (union(group1MPositions, group2MPositions).size() > group1MPositions.size() &&  
291 - )  
292 -  
293 -  
294 - for (String group1Type : group1Types) {  
295 - if (schemaContains(schema, group1Type)) {  
296 - for (String group2Type : group2Types) {  
297 - if (schemaContains(schema, group2Type)) {  
298 - for (String group3Type : group3Types) {  
299 - if (schemaContains(schema, group3Type)) {  
300 - return true;  
301 - }  
302 - }  
303 - } 330 + if (matchingPositionsExists(matchingPositions)) {
  331 + ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions);
  332 + for (ArrayList<String> combination : product) {
  333 + Set<String> combinationSet = new HashSet<String>(combination);
  334 + if (combinationSet.size() == matchingPositions.size()) {
  335 + return true;
304 } 336 }
305 } 337 }
306 } 338 }
307 return false; 339 return false;
308 - }*/ 340 + }
309 341
310 - public static List<String> union(List<String> list1, List<String> list2) {  
311 - HashSet<String> set = new HashSet<String>();  
312 -  
313 - set.addAll(list1);  
314 - set.addAll(list2);  
315 -  
316 - return new ArrayList<String>(set); 342 + private static boolean matchingPositionsExists(ArrayList<ArrayList<String>> matchingPositions) {
  343 + for (ArrayList<String> positions : matchingPositions) {
  344 + if (positions.isEmpty()) {
  345 + return false;
  346 + }
  347 + }
  348 + return true;
317 } 349 }
318 350
319 - public static List<String> tripleUnion(List<String> list1, List<String> list2,  
320 - List<String> list3) {  
321 - HashSet<String> set = new HashSet<String>();  
322 -  
323 - set.addAll(list1);  
324 - set.addAll(list2);  
325 - set.addAll(list3);  
326 -  
327 - return new ArrayList<String>(set); 351 + private static ArrayList<ArrayList<String>> cartesianProduct(ArrayList<ArrayList<String>> lists) {
  352 + ArrayList<ArrayList<String>> product = new ArrayList<ArrayList<String>>();
  353 + if (lists.size() == 0) {
  354 + product.add(new ArrayList<String>());
  355 + return product;
  356 + } else {
  357 + ArrayList<String> firstList = lists.get(0);
  358 + ArrayList<ArrayList<String>> remainingLists = cartesianProduct(new ArrayList(lists.subList(1, lists.size())));
  359 + for (String condition : firstList) {
  360 + for (ArrayList<String> remainingList : remainingLists) {
  361 + ArrayList<String> resultList = new ArrayList<String>();
  362 + resultList.add(condition);
  363 + resultList.addAll(remainingList);
  364 + product.add(resultList);
  365 + }
  366 + }
  367 + }
  368 + return product;
328 } 369 }
329 370
330 private static ArrayList<String> getMatchingPositions(String schema, ArrayList<String> phraseRealizations) { 371 private static ArrayList<String> getMatchingPositions(String schema, ArrayList<String> phraseRealizations) {
@@ -340,7 +381,7 @@ public class Detector { @@ -340,7 +381,7 @@ public class Detector {
340 } 381 }
341 } 382 }
342 return positions; 383 return positions;
343 - } 384 + }*/
344 385
345 private static boolean schemaContains(String schema, String phraseType) { 386 private static boolean schemaContains(String schema, String phraseType) {
346 for (String position : schema.split("\\s\\+\\s")) { 387 for (String position : schema.split("\\s\\+\\s")) {