Commit 9c1af6531483d512d8ad817ee8dfbb4a1a59b985
1 parent
2cc66960
Coordinated schemata validation is now limited by a minimum of 20 positions with…
… a coordinated pair of phrase types.
Showing
2 changed files
with
36 additions
and
19 deletions
dictionary/ajax_lemma_view.py
... | ... | @@ -2586,7 +2586,6 @@ def validate_new_frames(request, data, id, examples, lemma_examples, |
2586 | 2586 | aspect_rel_lemmas = get_aspect_rel_lemmas(old_object) |
2587 | 2587 | missing_frames = get_all_test_missing_frames(frames, aspect_rel_lemmas) |
2588 | 2588 | missmatched_B_frames = validate_B_frames(old_object) |
2589 | - frames_to_merge = find_similar_frames(old_object.frames.all()) | |
2590 | 2589 | wrong_aspect_frames = get_wrong_aspect_frames(old_object, old_object.frames.order_by('text_rep')) |
2591 | 2590 | deriv_miss_frames_msg = get_deriv_miss_frames_message(old_object) |
2592 | 2591 | if old_object.entry_obj.pos.tag == 'verb': |
... | ... | @@ -2612,6 +2611,7 @@ def validate_new_frames(request, data, id, examples, lemma_examples, |
2612 | 2611 | message_content += u'\t- %s\n' % (miss_frame.text_rep) |
2613 | 2612 | message_content += '\n' |
2614 | 2613 | message_content += deriv_miss_frames_msg |
2614 | + frames_to_merge = find_similar_frames(old_object.frames.all()) | |
2615 | 2615 | if len(frames_to_merge) > 0: |
2616 | 2616 | message_content += u'Sugerowane jest połączenie poniższych schematów, zawierają one często koordynujące się typy fraz:\n' |
2617 | 2617 | for comb in frames_to_merge: |
... | ... |
dictionary/validation.py
... | ... | @@ -171,6 +171,41 @@ def validate_B_frames(lemma_obj): |
171 | 171 | |
172 | 172 | ### KOORDYNACJA ##### |
173 | 173 | |
174 | +def find_similar_frames(frames): | |
175 | + frames_to_merge = [] | |
176 | + if len(frames) > 1: | |
177 | + combinations = itertools.combinations(frames, 2) | |
178 | + for comb in combinations: | |
179 | + if (comb[0].characteristics.get(type=u'ZWROTNOŚĆ') == comb[1].characteristics.get(type=u'ZWROTNOŚĆ') and | |
180 | + comb[0].characteristics.get(type=u'ASPEKT') == comb[1].characteristics.get(type=u'ASPEKT') and | |
181 | + comb[0].characteristics.get(type=u'NEGATYWNOŚĆ') == comb[1].characteristics.get(type=u'NEGATYWNOŚĆ') and | |
182 | + comb[0].characteristics.get(type=u'PREDYKATYWNOŚĆ') == comb[1].characteristics.get(type=u'PREDYKATYWNOŚĆ')): | |
183 | + occurrences = check_max_args_coor(comb[0], comb[1]) | |
184 | + if occurrences >= 20: | |
185 | + frames_to_merge.append({'frames': comb, | |
186 | + 'occurrences': occurrences}) | |
187 | + frames_to_merge = sorted(frames_to_merge, | |
188 | + key=operator.itemgetter('occurrences'), | |
189 | + reverse=True) | |
190 | + return frames_to_merge | |
191 | + | |
192 | +def check_max_args_coor(frame1, frame2): | |
193 | + max_occurr = 0 | |
194 | + pos_diff1 = frame1.positions.exclude(pk__in=frame2.positions.all()) | |
195 | + pos_diff2 = frame2.positions.exclude(pk__in=frame1.positions.all()) | |
196 | + if(frame1.positions.count() == frame2.positions.count() and | |
197 | + pos_diff1.count() == 1 and pos_diff2.count() == 1 and | |
198 | + pos_diff1.all()[0].categories.count() == pos_diff2.all()[0].categories.count() and | |
199 | + (pos_diff1.all()[0].categories.all() | | |
200 | + pos_diff2.all()[0].categories.all()).count() == pos_diff1.all()[0].categories.count()): | |
201 | + for phrase_type1 in pos_diff1.all()[0].arguments.all(): | |
202 | + for phrase_type2 in pos_diff2.all()[0].arguments.all(): | |
203 | + matching_positions = Position.objects.filter(arguments=phrase_type1).filter(arguments=phrase_type2) | |
204 | + occurr = matching_positions.aggregate(Sum('occurrences'))['occurrences__sum'] | |
205 | + if occurr and occurr > max_occurr: | |
206 | + max_occurr = occurr | |
207 | + return max_occurr | |
208 | + | |
174 | 209 | def check_frames_diff(frame1, frame2): |
175 | 210 | occurr = 0 |
176 | 211 | pos_diff1 = frame1.positions.exclude(pk__in=frame2.positions.all()) |
... | ... | @@ -190,24 +225,6 @@ def check_frames_diff(frame1, frame2): |
190 | 225 | return occurr |
191 | 226 | return occurr |
192 | 227 | |
193 | -def find_similar_frames(frames): | |
194 | - frames_to_merge = [] | |
195 | - if len(frames) > 1: | |
196 | - combinations = itertools.combinations(frames, 2) | |
197 | - for comb in combinations: | |
198 | - if (comb[0].characteristics.get(type=u'ZWROTNOŚĆ') == comb[1].characteristics.get(type=u'ZWROTNOŚĆ') and | |
199 | - comb[0].characteristics.get(type=u'ASPEKT') == comb[1].characteristics.get(type=u'ASPEKT') and | |
200 | - comb[0].characteristics.get(type=u'NEGATYWNOŚĆ') == comb[1].characteristics.get(type=u'NEGATYWNOŚĆ') and | |
201 | - comb[0].characteristics.get(type=u'PREDYKATYWNOŚĆ') == comb[1].characteristics.get(type=u'PREDYKATYWNOŚĆ')): | |
202 | - occurrences = check_frames_diff(comb[0], comb[1]) | |
203 | - if occurrences > 0: | |
204 | - frames_to_merge.append({'frames': comb, | |
205 | - 'occurrences': occurrences}) | |
206 | - frames_to_merge = sorted(frames_to_merge, | |
207 | - key=operator.itemgetter('occurrences'), | |
208 | - reverse=True) | |
209 | - return frames_to_merge | |
210 | - | |
211 | 228 | ###################### walidacja powiazanych hasel (nieczasownikowe) ####################################### |
212 | 229 | |
213 | 230 | def get_deriv_miss_frames_message(lemma): |
... | ... |