Commit 792c556f1a01862dc83354bc5e1d5bf5dac38898
1 parent
46a46821
Added default Walenty dictionaries.
Showing
4 changed files
with
82755 additions
and
50 deletions
Too many changes to show.
To preserve performance only 2 of 4 files are displayed.
src/main/java/pl/waw/ipipan/zil/core/md/Main.java
... | ... | @@ -33,8 +33,8 @@ public class Main { |
33 | 33 | |
34 | 34 | private static final boolean GZIP_OUTPUT = true; |
35 | 35 | private static final String DEFAULT_ZERO_SUBJECT_MODEL = "/zero_subject_model.bin"; |
36 | - private static final String DEFAULT_VERBS_VALENCE = "/walenty_20170117_verbs_all_with_realizations.txt"; | |
37 | - private static final String DEFAULT_NOUNS_VALENCE = "/walenty_20170117_nouns_all_with_realizations.txt"; | |
36 | + private static final String DEFAULT_VERBS_VALENCE = "/walenty_verbs.txt"; | |
37 | + private static final String DEFAULT_NOUNS_VALENCE = "/walenty_nouns.txt"; | |
38 | 38 | |
39 | 39 | private static ZeroSubjectDetector zeroSubjectModel; |
40 | 40 | |
... | ... |
src/main/java/pl/waw/ipipan/zil/core/md/detection/Detector.java
... | ... | @@ -106,6 +106,7 @@ public class Detector { |
106 | 106 | Map<ValenceDicts,Map<String,ArrayList<String>>> valence) { |
107 | 107 | |
108 | 108 | for (SyntacticGroup group : sentence.getGroups()) { |
109 | + | |
109 | 110 | SyntacticGroup nextGroup = group.getFollowingGroup(); |
110 | 111 | SyntacticGroup nextnextGroup = null; |
111 | 112 | SyntacticGroup nextnextnextGroup = null; |
... | ... | @@ -116,6 +117,20 @@ public class Detector { |
116 | 117 | } |
117 | 118 | } |
118 | 119 | |
120 | + /*if (group.getType().startsWith("NG")) { | |
121 | + ArrayList<SyntacticGroup> nestedGroups = new ArrayList<SyntacticGroup>(); | |
122 | + nestedGroups.add(group); | |
123 | + | |
124 | + SyntacticGroup nextGroup = group.getFollowingGroup(); | |
125 | + while (nextGroup != null) { | |
126 | + nestedGroups.add(nextGroup); | |
127 | + nextGroup = nextGroup.getFollowingGroup(); | |
128 | + } | |
129 | + | |
130 | + Mention mention = create_mention(nestedGroups, valence.get(ValenceDicts.NounsValence)); | |
131 | + sentence.addMention(mention); | |
132 | + }*/ | |
133 | + | |
119 | 134 | if (group.getType().startsWith("NG") && nextGroup != null && |
120 | 135 | nextnextGroup != null && nextnextnextGroup != null && |
121 | 136 | quatroCompatibility(group, nextGroup, nextnextGroup, |
... | ... | @@ -169,6 +184,27 @@ public class Detector { |
169 | 184 | return false; |
170 | 185 | } |
171 | 186 | |
187 | + /*private static boolean isProperSchema(String schema, ArrayList<String> group1Types, | |
188 | + ArrayList<String> group2Types) { | |
189 | + ArrayList<String> group1MPositions = getMatchingPositions(schema, group1Types); | |
190 | + ArrayList<String> group2MPositions = getMatchingPositions(schema, group2Types); | |
191 | + | |
192 | + ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>(); | |
193 | + matchingPositions.add(group1MPositions); | |
194 | + matchingPositions.add(group2MPositions); | |
195 | + | |
196 | + if (matchingPositionsExists(matchingPositions)) { | |
197 | + ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions); | |
198 | + for (ArrayList<String> combination : product) { | |
199 | + Set<String> combinationSet = new HashSet<String>(combination); | |
200 | + if (combinationSet.size() == matchingPositions.size()) { | |
201 | + return true; | |
202 | + } | |
203 | + } | |
204 | + } | |
205 | + return false; | |
206 | + }*/ | |
207 | + | |
172 | 208 | private static boolean groupsValenceCompatibility(SyntacticGroup NG1, |
173 | 209 | SyntacticGroup NG2, Sentence sentence, |
174 | 210 | Map<String,ArrayList<String>> walentyMapping) { |
... | ... | @@ -266,65 +302,70 @@ public class Detector { |
266 | 302 | ArrayList<String> group2MPositions = getMatchingPositions(schema, group2Types); |
267 | 303 | ArrayList<String> group3MPositions = getMatchingPositions(schema, group3Types); |
268 | 304 | |
305 | + ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>(); | |
306 | + matchingPositions.add(group1MPositions); | |
307 | + matchingPositions.add(group2MPositions); | |
308 | + matchingPositions.add(group3MPositions); | |
269 | 309 | |
270 | - | |
271 | - ArrayList<String> group1MPositionsCopy = new ArrayList<String>(); | |
272 | - ArrayList<String> group2MPositionsCopy = getMatchingPositions(schema, group2Types); | |
273 | - ArrayList<String> group3MPositionsCopy = getMatchingPositions(schema, group3Types); | |
274 | - | |
275 | - | |
276 | - if (group1MPositions.isEmpty() || group2MPositions.isEmpty() || group3MPositions.isEmpty()) { | |
277 | - return false; | |
310 | + if (matchingPositionsExists(matchingPositions)) { | |
311 | + ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions); | |
312 | + for (ArrayList<String> combination : product) { | |
313 | + Set<String> combinationSet = new HashSet<String>(combination); | |
314 | + if (combinationSet.size() == matchingPositions.size()) { | |
315 | + return true; | |
316 | + } | |
317 | + } | |
278 | 318 | } |
319 | + return false; | |
320 | + }*/ | |
321 | + | |
322 | + /*private static boolean isProperSchema(String schema, | |
323 | + ArrayList<ArrayList<String>> groupsRealizations) { | |
279 | 324 | |
280 | - boolean group1ok = false; | |
281 | - boolean group2ok = false; | |
282 | - boolean group3ok = false; | |
283 | - | |
284 | - for (String pos : group1MPositions) { | |
285 | - | |
325 | + ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>(); | |
326 | + for (ArrayList<String> realizations : groupsRealizations) { | |
327 | + matchingPositions.add(getMatchingPositions(schema, realizations)); | |
286 | 328 | } |
287 | 329 | |
288 | - ArrayList<String> | |
289 | - | |
290 | - if (union(group1MPositions, group2MPositions).size() > group1MPositions.size() && | |
291 | - ) | |
292 | - | |
293 | - | |
294 | - for (String group1Type : group1Types) { | |
295 | - if (schemaContains(schema, group1Type)) { | |
296 | - for (String group2Type : group2Types) { | |
297 | - if (schemaContains(schema, group2Type)) { | |
298 | - for (String group3Type : group3Types) { | |
299 | - if (schemaContains(schema, group3Type)) { | |
300 | - return true; | |
301 | - } | |
302 | - } | |
303 | - } | |
330 | + if (matchingPositionsExists(matchingPositions)) { | |
331 | + ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions); | |
332 | + for (ArrayList<String> combination : product) { | |
333 | + Set<String> combinationSet = new HashSet<String>(combination); | |
334 | + if (combinationSet.size() == matchingPositions.size()) { | |
335 | + return true; | |
304 | 336 | } |
305 | 337 | } |
306 | 338 | } |
307 | 339 | return false; |
308 | - }*/ | |
340 | + } | |
309 | 341 | |
310 | - public static List<String> union(List<String> list1, List<String> list2) { | |
311 | - HashSet<String> set = new HashSet<String>(); | |
312 | - | |
313 | - set.addAll(list1); | |
314 | - set.addAll(list2); | |
315 | - | |
316 | - return new ArrayList<String>(set); | |
342 | + private static boolean matchingPositionsExists(ArrayList<ArrayList<String>> matchingPositions) { | |
343 | + for (ArrayList<String> positions : matchingPositions) { | |
344 | + if (positions.isEmpty()) { | |
345 | + return false; | |
346 | + } | |
347 | + } | |
348 | + return true; | |
317 | 349 | } |
318 | 350 | |
319 | - public static List<String> tripleUnion(List<String> list1, List<String> list2, | |
320 | - List<String> list3) { | |
321 | - HashSet<String> set = new HashSet<String>(); | |
322 | - | |
323 | - set.addAll(list1); | |
324 | - set.addAll(list2); | |
325 | - set.addAll(list3); | |
326 | - | |
327 | - return new ArrayList<String>(set); | |
351 | + private static ArrayList<ArrayList<String>> cartesianProduct(ArrayList<ArrayList<String>> lists) { | |
352 | + ArrayList<ArrayList<String>> product = new ArrayList<ArrayList<String>>(); | |
353 | + if (lists.size() == 0) { | |
354 | + product.add(new ArrayList<String>()); | |
355 | + return product; | |
356 | + } else { | |
357 | + ArrayList<String> firstList = lists.get(0); | |
358 | + ArrayList<ArrayList<String>> remainingLists = cartesianProduct(new ArrayList(lists.subList(1, lists.size()))); | |
359 | + for (String condition : firstList) { | |
360 | + for (ArrayList<String> remainingList : remainingLists) { | |
361 | + ArrayList<String> resultList = new ArrayList<String>(); | |
362 | + resultList.add(condition); | |
363 | + resultList.addAll(remainingList); | |
364 | + product.add(resultList); | |
365 | + } | |
366 | + } | |
367 | + } | |
368 | + return product; | |
328 | 369 | } |
329 | 370 | |
330 | 371 | private static ArrayList<String> getMatchingPositions(String schema, ArrayList<String> phraseRealizations) { |
... | ... | @@ -340,7 +381,7 @@ public class Detector { |
340 | 381 | } |
341 | 382 | } |
342 | 383 | return positions; |
343 | - } | |
384 | + }*/ | |
344 | 385 | |
345 | 386 | private static boolean schemaContains(String schema, String phraseType) { |
346 | 387 | for (String position : schema.split("\\s\\+\\s")) { |
... | ... |