Commit 792c556f1a01862dc83354bc5e1d5bf5dac38898
1 parent
46a46821
Added default Walenty dictionaries.
Showing
4 changed files
with
82755 additions
and
50 deletions
Too many changes to show.
To preserve performance only 2 of 4 files are displayed.
src/main/java/pl/waw/ipipan/zil/core/md/Main.java
@@ -33,8 +33,8 @@ public class Main { | @@ -33,8 +33,8 @@ public class Main { | ||
33 | 33 | ||
34 | private static final boolean GZIP_OUTPUT = true; | 34 | private static final boolean GZIP_OUTPUT = true; |
35 | private static final String DEFAULT_ZERO_SUBJECT_MODEL = "/zero_subject_model.bin"; | 35 | private static final String DEFAULT_ZERO_SUBJECT_MODEL = "/zero_subject_model.bin"; |
36 | - private static final String DEFAULT_VERBS_VALENCE = "/walenty_20170117_verbs_all_with_realizations.txt"; | ||
37 | - private static final String DEFAULT_NOUNS_VALENCE = "/walenty_20170117_nouns_all_with_realizations.txt"; | 36 | + private static final String DEFAULT_VERBS_VALENCE = "/walenty_verbs.txt"; |
37 | + private static final String DEFAULT_NOUNS_VALENCE = "/walenty_nouns.txt"; | ||
38 | 38 | ||
39 | private static ZeroSubjectDetector zeroSubjectModel; | 39 | private static ZeroSubjectDetector zeroSubjectModel; |
40 | 40 |
src/main/java/pl/waw/ipipan/zil/core/md/detection/Detector.java
@@ -106,6 +106,7 @@ public class Detector { | @@ -106,6 +106,7 @@ public class Detector { | ||
106 | Map<ValenceDicts,Map<String,ArrayList<String>>> valence) { | 106 | Map<ValenceDicts,Map<String,ArrayList<String>>> valence) { |
107 | 107 | ||
108 | for (SyntacticGroup group : sentence.getGroups()) { | 108 | for (SyntacticGroup group : sentence.getGroups()) { |
109 | + | ||
109 | SyntacticGroup nextGroup = group.getFollowingGroup(); | 110 | SyntacticGroup nextGroup = group.getFollowingGroup(); |
110 | SyntacticGroup nextnextGroup = null; | 111 | SyntacticGroup nextnextGroup = null; |
111 | SyntacticGroup nextnextnextGroup = null; | 112 | SyntacticGroup nextnextnextGroup = null; |
@@ -116,6 +117,20 @@ public class Detector { | @@ -116,6 +117,20 @@ public class Detector { | ||
116 | } | 117 | } |
117 | } | 118 | } |
118 | 119 | ||
120 | + /*if (group.getType().startsWith("NG")) { | ||
121 | + ArrayList<SyntacticGroup> nestedGroups = new ArrayList<SyntacticGroup>(); | ||
122 | + nestedGroups.add(group); | ||
123 | + | ||
124 | + SyntacticGroup nextGroup = group.getFollowingGroup(); | ||
125 | + while (nextGroup != null) { | ||
126 | + nestedGroups.add(nextGroup); | ||
127 | + nextGroup = nextGroup.getFollowingGroup(); | ||
128 | + } | ||
129 | + | ||
130 | + Mention mention = create_mention(nestedGroups, valence.get(ValenceDicts.NounsValence)); | ||
131 | + sentence.addMention(mention); | ||
132 | + }*/ | ||
133 | + | ||
119 | if (group.getType().startsWith("NG") && nextGroup != null && | 134 | if (group.getType().startsWith("NG") && nextGroup != null && |
120 | nextnextGroup != null && nextnextnextGroup != null && | 135 | nextnextGroup != null && nextnextnextGroup != null && |
121 | quatroCompatibility(group, nextGroup, nextnextGroup, | 136 | quatroCompatibility(group, nextGroup, nextnextGroup, |
@@ -169,6 +184,27 @@ public class Detector { | @@ -169,6 +184,27 @@ public class Detector { | ||
169 | return false; | 184 | return false; |
170 | } | 185 | } |
171 | 186 | ||
187 | + /*private static boolean isProperSchema(String schema, ArrayList<String> group1Types, | ||
188 | + ArrayList<String> group2Types) { | ||
189 | + ArrayList<String> group1MPositions = getMatchingPositions(schema, group1Types); | ||
190 | + ArrayList<String> group2MPositions = getMatchingPositions(schema, group2Types); | ||
191 | + | ||
192 | + ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>(); | ||
193 | + matchingPositions.add(group1MPositions); | ||
194 | + matchingPositions.add(group2MPositions); | ||
195 | + | ||
196 | + if (matchingPositionsExists(matchingPositions)) { | ||
197 | + ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions); | ||
198 | + for (ArrayList<String> combination : product) { | ||
199 | + Set<String> combinationSet = new HashSet<String>(combination); | ||
200 | + if (combinationSet.size() == matchingPositions.size()) { | ||
201 | + return true; | ||
202 | + } | ||
203 | + } | ||
204 | + } | ||
205 | + return false; | ||
206 | + }*/ | ||
207 | + | ||
172 | private static boolean groupsValenceCompatibility(SyntacticGroup NG1, | 208 | private static boolean groupsValenceCompatibility(SyntacticGroup NG1, |
173 | SyntacticGroup NG2, Sentence sentence, | 209 | SyntacticGroup NG2, Sentence sentence, |
174 | Map<String,ArrayList<String>> walentyMapping) { | 210 | Map<String,ArrayList<String>> walentyMapping) { |
@@ -266,65 +302,70 @@ public class Detector { | @@ -266,65 +302,70 @@ public class Detector { | ||
266 | ArrayList<String> group2MPositions = getMatchingPositions(schema, group2Types); | 302 | ArrayList<String> group2MPositions = getMatchingPositions(schema, group2Types); |
267 | ArrayList<String> group3MPositions = getMatchingPositions(schema, group3Types); | 303 | ArrayList<String> group3MPositions = getMatchingPositions(schema, group3Types); |
268 | 304 | ||
305 | + ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>(); | ||
306 | + matchingPositions.add(group1MPositions); | ||
307 | + matchingPositions.add(group2MPositions); | ||
308 | + matchingPositions.add(group3MPositions); | ||
269 | 309 | ||
270 | - | ||
271 | - ArrayList<String> group1MPositionsCopy = new ArrayList<String>(); | ||
272 | - ArrayList<String> group2MPositionsCopy = getMatchingPositions(schema, group2Types); | ||
273 | - ArrayList<String> group3MPositionsCopy = getMatchingPositions(schema, group3Types); | ||
274 | - | ||
275 | - | ||
276 | - if (group1MPositions.isEmpty() || group2MPositions.isEmpty() || group3MPositions.isEmpty()) { | ||
277 | - return false; | 310 | + if (matchingPositionsExists(matchingPositions)) { |
311 | + ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions); | ||
312 | + for (ArrayList<String> combination : product) { | ||
313 | + Set<String> combinationSet = new HashSet<String>(combination); | ||
314 | + if (combinationSet.size() == matchingPositions.size()) { | ||
315 | + return true; | ||
316 | + } | ||
317 | + } | ||
278 | } | 318 | } |
319 | + return false; | ||
320 | + }*/ | ||
321 | + | ||
322 | + /*private static boolean isProperSchema(String schema, | ||
323 | + ArrayList<ArrayList<String>> groupsRealizations) { | ||
279 | 324 | ||
280 | - boolean group1ok = false; | ||
281 | - boolean group2ok = false; | ||
282 | - boolean group3ok = false; | ||
283 | - | ||
284 | - for (String pos : group1MPositions) { | ||
285 | - | 325 | + ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>(); |
326 | + for (ArrayList<String> realizations : groupsRealizations) { | ||
327 | + matchingPositions.add(getMatchingPositions(schema, realizations)); | ||
286 | } | 328 | } |
287 | 329 | ||
288 | - ArrayList<String> | ||
289 | - | ||
290 | - if (union(group1MPositions, group2MPositions).size() > group1MPositions.size() && | ||
291 | - ) | ||
292 | - | ||
293 | - | ||
294 | - for (String group1Type : group1Types) { | ||
295 | - if (schemaContains(schema, group1Type)) { | ||
296 | - for (String group2Type : group2Types) { | ||
297 | - if (schemaContains(schema, group2Type)) { | ||
298 | - for (String group3Type : group3Types) { | ||
299 | - if (schemaContains(schema, group3Type)) { | ||
300 | - return true; | ||
301 | - } | ||
302 | - } | ||
303 | - } | 330 | + if (matchingPositionsExists(matchingPositions)) { |
331 | + ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions); | ||
332 | + for (ArrayList<String> combination : product) { | ||
333 | + Set<String> combinationSet = new HashSet<String>(combination); | ||
334 | + if (combinationSet.size() == matchingPositions.size()) { | ||
335 | + return true; | ||
304 | } | 336 | } |
305 | } | 337 | } |
306 | } | 338 | } |
307 | return false; | 339 | return false; |
308 | - }*/ | 340 | + } |
309 | 341 | ||
310 | - public static List<String> union(List<String> list1, List<String> list2) { | ||
311 | - HashSet<String> set = new HashSet<String>(); | ||
312 | - | ||
313 | - set.addAll(list1); | ||
314 | - set.addAll(list2); | ||
315 | - | ||
316 | - return new ArrayList<String>(set); | 342 | + private static boolean matchingPositionsExists(ArrayList<ArrayList<String>> matchingPositions) { |
343 | + for (ArrayList<String> positions : matchingPositions) { | ||
344 | + if (positions.isEmpty()) { | ||
345 | + return false; | ||
346 | + } | ||
347 | + } | ||
348 | + return true; | ||
317 | } | 349 | } |
318 | 350 | ||
319 | - public static List<String> tripleUnion(List<String> list1, List<String> list2, | ||
320 | - List<String> list3) { | ||
321 | - HashSet<String> set = new HashSet<String>(); | ||
322 | - | ||
323 | - set.addAll(list1); | ||
324 | - set.addAll(list2); | ||
325 | - set.addAll(list3); | ||
326 | - | ||
327 | - return new ArrayList<String>(set); | 351 | + private static ArrayList<ArrayList<String>> cartesianProduct(ArrayList<ArrayList<String>> lists) { |
352 | + ArrayList<ArrayList<String>> product = new ArrayList<ArrayList<String>>(); | ||
353 | + if (lists.size() == 0) { | ||
354 | + product.add(new ArrayList<String>()); | ||
355 | + return product; | ||
356 | + } else { | ||
357 | + ArrayList<String> firstList = lists.get(0); | ||
358 | + ArrayList<ArrayList<String>> remainingLists = cartesianProduct(new ArrayList(lists.subList(1, lists.size()))); | ||
359 | + for (String condition : firstList) { | ||
360 | + for (ArrayList<String> remainingList : remainingLists) { | ||
361 | + ArrayList<String> resultList = new ArrayList<String>(); | ||
362 | + resultList.add(condition); | ||
363 | + resultList.addAll(remainingList); | ||
364 | + product.add(resultList); | ||
365 | + } | ||
366 | + } | ||
367 | + } | ||
368 | + return product; | ||
328 | } | 369 | } |
329 | 370 | ||
330 | private static ArrayList<String> getMatchingPositions(String schema, ArrayList<String> phraseRealizations) { | 371 | private static ArrayList<String> getMatchingPositions(String schema, ArrayList<String> phraseRealizations) { |
@@ -340,7 +381,7 @@ public class Detector { | @@ -340,7 +381,7 @@ public class Detector { | ||
340 | } | 381 | } |
341 | } | 382 | } |
342 | return positions; | 383 | return positions; |
343 | - } | 384 | + }*/ |
344 | 385 | ||
345 | private static boolean schemaContains(String schema, String phraseType) { | 386 | private static boolean schemaContains(String schema, String phraseType) { |
346 | for (String position : schema.split("\\s\\+\\s")) { | 387 | for (String position : schema.split("\\s\\+\\s")) { |