Commit 792c556f1a01862dc83354bc5e1d5bf5dac38898

Authored by Bartłomiej Nitoń
1 parent 46a46821

Added default Walenty dictionaries.

Too many changes to show.

To preserve performance only 2 of 4 files are displayed.

src/main/java/pl/waw/ipipan/zil/core/md/Main.java
... ... @@ -33,8 +33,8 @@ public class Main {
33 33  
34 34 private static final boolean GZIP_OUTPUT = true;
35 35 private static final String DEFAULT_ZERO_SUBJECT_MODEL = "/zero_subject_model.bin";
36   - private static final String DEFAULT_VERBS_VALENCE = "/walenty_20170117_verbs_all_with_realizations.txt";
37   - private static final String DEFAULT_NOUNS_VALENCE = "/walenty_20170117_nouns_all_with_realizations.txt";
  36 + private static final String DEFAULT_VERBS_VALENCE = "/walenty_verbs.txt";
  37 + private static final String DEFAULT_NOUNS_VALENCE = "/walenty_nouns.txt";
38 38  
39 39 private static ZeroSubjectDetector zeroSubjectModel;
40 40  
... ...
src/main/java/pl/waw/ipipan/zil/core/md/detection/Detector.java
... ... @@ -106,6 +106,7 @@ public class Detector {
106 106 Map<ValenceDicts,Map<String,ArrayList<String>>> valence) {
107 107  
108 108 for (SyntacticGroup group : sentence.getGroups()) {
  109 +
109 110 SyntacticGroup nextGroup = group.getFollowingGroup();
110 111 SyntacticGroup nextnextGroup = null;
111 112 SyntacticGroup nextnextnextGroup = null;
... ... @@ -116,6 +117,20 @@ public class Detector {
116 117 }
117 118 }
118 119  
  120 + /*if (group.getType().startsWith("NG")) {
  121 + ArrayList<SyntacticGroup> nestedGroups = new ArrayList<SyntacticGroup>();
  122 + nestedGroups.add(group);
  123 +
  124 + SyntacticGroup nextGroup = group.getFollowingGroup();
  125 + while (nextGroup != null) {
  126 + nestedGroups.add(nextGroup);
  127 + nextGroup = nextGroup.getFollowingGroup();
  128 + }
  129 +
  130 + Mention mention = create_mention(nestedGroups, valence.get(ValenceDicts.NounsValence));
  131 + sentence.addMention(mention);
  132 + }*/
  133 +
119 134 if (group.getType().startsWith("NG") && nextGroup != null &&
120 135 nextnextGroup != null && nextnextnextGroup != null &&
121 136 quatroCompatibility(group, nextGroup, nextnextGroup,
... ... @@ -169,6 +184,27 @@ public class Detector {
169 184 return false;
170 185 }
171 186  
  187 + /*private static boolean isProperSchema(String schema, ArrayList<String> group1Types,
  188 + ArrayList<String> group2Types) {
  189 + ArrayList<String> group1MPositions = getMatchingPositions(schema, group1Types);
  190 + ArrayList<String> group2MPositions = getMatchingPositions(schema, group2Types);
  191 +
  192 + ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>();
  193 + matchingPositions.add(group1MPositions);
  194 + matchingPositions.add(group2MPositions);
  195 +
  196 + if (matchingPositionsExists(matchingPositions)) {
  197 + ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions);
  198 + for (ArrayList<String> combination : product) {
  199 + Set<String> combinationSet = new HashSet<String>(combination);
  200 + if (combinationSet.size() == matchingPositions.size()) {
  201 + return true;
  202 + }
  203 + }
  204 + }
  205 + return false;
  206 + }*/
  207 +
172 208 private static boolean groupsValenceCompatibility(SyntacticGroup NG1,
173 209 SyntacticGroup NG2, Sentence sentence,
174 210 Map<String,ArrayList<String>> walentyMapping) {
... ... @@ -266,65 +302,70 @@ public class Detector {
266 302 ArrayList<String> group2MPositions = getMatchingPositions(schema, group2Types);
267 303 ArrayList<String> group3MPositions = getMatchingPositions(schema, group3Types);
268 304  
  305 + ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>();
  306 + matchingPositions.add(group1MPositions);
  307 + matchingPositions.add(group2MPositions);
  308 + matchingPositions.add(group3MPositions);
269 309  
270   -
271   - ArrayList<String> group1MPositionsCopy = new ArrayList<String>();
272   - ArrayList<String> group2MPositionsCopy = getMatchingPositions(schema, group2Types);
273   - ArrayList<String> group3MPositionsCopy = getMatchingPositions(schema, group3Types);
274   -
275   -
276   - if (group1MPositions.isEmpty() || group2MPositions.isEmpty() || group3MPositions.isEmpty()) {
277   - return false;
  310 + if (matchingPositionsExists(matchingPositions)) {
  311 + ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions);
  312 + for (ArrayList<String> combination : product) {
  313 + Set<String> combinationSet = new HashSet<String>(combination);
  314 + if (combinationSet.size() == matchingPositions.size()) {
  315 + return true;
  316 + }
  317 + }
278 318 }
  319 + return false;
  320 + }*/
  321 +
  322 + /*private static boolean isProperSchema(String schema,
  323 + ArrayList<ArrayList<String>> groupsRealizations) {
279 324  
280   - boolean group1ok = false;
281   - boolean group2ok = false;
282   - boolean group3ok = false;
283   -
284   - for (String pos : group1MPositions) {
285   -
  325 + ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>();
  326 + for (ArrayList<String> realizations : groupsRealizations) {
  327 + matchingPositions.add(getMatchingPositions(schema, realizations));
286 328 }
287 329  
288   - ArrayList<String>
289   -
290   - if (union(group1MPositions, group2MPositions).size() > group1MPositions.size() &&
291   - )
292   -
293   -
294   - for (String group1Type : group1Types) {
295   - if (schemaContains(schema, group1Type)) {
296   - for (String group2Type : group2Types) {
297   - if (schemaContains(schema, group2Type)) {
298   - for (String group3Type : group3Types) {
299   - if (schemaContains(schema, group3Type)) {
300   - return true;
301   - }
302   - }
303   - }
  330 + if (matchingPositionsExists(matchingPositions)) {
  331 + ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions);
  332 + for (ArrayList<String> combination : product) {
  333 + Set<String> combinationSet = new HashSet<String>(combination);
  334 + if (combinationSet.size() == matchingPositions.size()) {
  335 + return true;
304 336 }
305 337 }
306 338 }
307 339 return false;
308   - }*/
  340 + }
309 341  
310   - public static List<String> union(List<String> list1, List<String> list2) {
311   - HashSet<String> set = new HashSet<String>();
312   -
313   - set.addAll(list1);
314   - set.addAll(list2);
315   -
316   - return new ArrayList<String>(set);
  342 + private static boolean matchingPositionsExists(ArrayList<ArrayList<String>> matchingPositions) {
  343 + for (ArrayList<String> positions : matchingPositions) {
  344 + if (positions.isEmpty()) {
  345 + return false;
  346 + }
  347 + }
  348 + return true;
317 349 }
318 350  
319   - public static List<String> tripleUnion(List<String> list1, List<String> list2,
320   - List<String> list3) {
321   - HashSet<String> set = new HashSet<String>();
322   -
323   - set.addAll(list1);
324   - set.addAll(list2);
325   - set.addAll(list3);
326   -
327   - return new ArrayList<String>(set);
  351 + private static ArrayList<ArrayList<String>> cartesianProduct(ArrayList<ArrayList<String>> lists) {
  352 + ArrayList<ArrayList<String>> product = new ArrayList<ArrayList<String>>();
  353 + if (lists.size() == 0) {
  354 + product.add(new ArrayList<String>());
  355 + return product;
  356 + } else {
  357 + ArrayList<String> firstList = lists.get(0);
  358 + ArrayList<ArrayList<String>> remainingLists = cartesianProduct(new ArrayList(lists.subList(1, lists.size())));
  359 + for (String condition : firstList) {
  360 + for (ArrayList<String> remainingList : remainingLists) {
  361 + ArrayList<String> resultList = new ArrayList<String>();
  362 + resultList.add(condition);
  363 + resultList.addAll(remainingList);
  364 + product.add(resultList);
  365 + }
  366 + }
  367 + }
  368 + return product;
328 369 }
329 370  
330 371 private static ArrayList<String> getMatchingPositions(String schema, ArrayList<String> phraseRealizations) {
... ... @@ -340,7 +381,7 @@ public class Detector {
340 381 }
341 382 }
342 383 return positions;
343   - }
  384 + }*/
344 385  
345 386 private static boolean schemaContains(String schema, String phraseType) {
346 387 for (String position : schema.split("\\s\\+\\s")) {
... ...